In [6]:
import pandas as pd
import numpy as np
import requests
import shap
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

class EnhancedADRPredictor:
    def __init__(self, drug_name='Paracetamol', max_results=1000):
        self.drug_name = drug_name
        self.max_results = max_results
        self.base_url = "https://api.fda.gov/drug/event.json"
        self.model = None
        self.scaler = StandardScaler()
        self.rng = np.random.default_rng(42)

    def fetch_openfda_data(self):
        params = {'search': f'patient.drug.medicinalproduct:"{self.drug_name}"', 'limit': self.max_results}
        all_results = []
        try:
            response = requests.get(self.base_url, params=params, timeout=15)
            response.raise_for_status()
            data = response.json()
            results = data.get('results', [])
            if results:
                all_results.extend(results)
            print(f"Fetched {len(all_results)} records.")
        except Exception as e:
            print(f"Error fetching data: {e}")
        return pd.DataFrame(self._process_results(all_results)) if all_results else self.generate_sample_data()

    def _process_results(self, results):
        processed = []
        for event in results:
            try:
                reactions = [reac['reactionmeddrapt'] for reac in event.get('patient', {}).get('reaction', [])]
                entry = {
                    'age': float(event.get('patient', {}).get('patientonsetage', 30)),
                    'sex': event.get('patient', {}).get('patientsex', 'unknown').lower(),
                    'weight': float(event.get('patient', {}).get('patientweight', 70)),
                    'dosage': self._extract_dosage(event),
                    'reactions': reactions if reactions else ['unknown'],
                    'serious': 1 if event.get('serious', 0) else 0
                }
                processed.append(entry)
            except Exception as e:
                print(f"Error processing entry: {e}")
        return processed

    def _extract_dosage(self, event):
        try:
            dosage_text = event.get('patient', {}).get('drug', [{}])[0].get('drugdosagetext', '')
            return float(''.join(filter(str.isdigit, dosage_text))) if dosage_text else 1.0
        except Exception:
            return 1.0

    def generate_sample_data(self, num_samples=1000):
        print("Generating synthetic dataset...")
        df = pd.DataFrame({
            'age': self.rng.integers(18, 80, num_samples),
            'sex': self.rng.choice(['male', 'female'], num_samples),
            'weight': self.rng.uniform(50, 120, num_samples),
            'dosage': self.rng.uniform(1, 10, num_samples),
            'reaction_count': self.rng.integers(1, 5, num_samples),
            'bmi': self.rng.uniform(15, 35, num_samples),
            'serious': self.rng.choice([0, 1], num_samples, p=[0.7, 0.3])
        })
        return df

    def preprocess_data(self, df):
        df['sex'] = df['sex'].map({'male': 0, 'female': 1}).fillna(0.5)
        numerical_features = ['age', 'weight', 'dosage', 'reaction_count', 'bmi']
        df[numerical_features] = self.scaler.fit_transform(df[numerical_features])
        df['target'] = df['serious']
        
        smote = SMOTE()
        X_resampled, y_resampled = smote.fit_resample(df[numerical_features], df['target'])
        df = pd.DataFrame(X_resampled, columns=numerical_features)
        df['target'] = y_resampled
        
        return df

    def build_model(self):
        input_features = Input(shape=(5,))
        x = Dense(64, activation='relu')(input_features)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        x = Dense(32, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)
        output = Dense(1, activation='sigmoid')(x)
        
        self.model = Model(inputs=input_features, outputs=output)
        self.model.compile(optimizer=RMSprop(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    def train_and_save_model(self, df, model_path="adr_model.keras"):
        X = df[['age', 'weight', 'dosage', 'reaction_count', 'bmi']].values
        y = df['target'].values
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.build_model()
        
        self.model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)])
        self.model.save(model_path)
        print(f"✅ Model saved to {model_path}")
        
        y_pred = (self.model.predict(X_test) > 0.5).astype(int)
        print("Classification Report:\n", classification_report(y_test, y_pred))
        print("ROC AUC Score:", roc_auc_score(y_test, y_pred))
        
        plt.figure(figsize=(6, 6))
        sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
        plt.title("Confusion Matrix")
        plt.show()

    def full_pipeline(self):
        print("Starting ADR detection pipeline...")
        df = self.fetch_openfda_data()
        df = self.preprocess_data(df)
        self.train_and_save_model(df)

if __name__ == "__main__":
    predictor = EnhancedADRPredictor(drug_name="Aspirin", max_results=5000)
    predictor.full_pipeline()


SyntaxError: invalid syntax (1749712030.py, line 1)

In [8]:
!pip install pandas numpy requests shap matplotlib seaborn scikit-learn tensorflow


PermissionError: [WinError 5] Access is denied