In [42]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [43]:
# Load Dataset 1
df1 = pd.read_csv(r"D:\Internship_assignment\drug_discovery_ml\data\drugs_data.csv")

# Encode categorical features
from sklearn.preprocessing import LabelEncoder
import pickle
import os

os.makedirs("models", exist_ok=True)  # create models folder if not exists

# Label Encoders
le_sex = LabelEncoder()
le_bp = LabelEncoder()
le_chol = LabelEncoder()
le_drug1 = LabelEncoder()

# Fit + transform + save
df1['Sex'] = le_sex.fit_transform(df1['Sex'])
with open("models/le_sex.pkl", "wb") as f:
    pickle.dump(le_sex, f)

df1['BP'] = le_bp.fit_transform(df1['BP'])
with open("models/le_bp.pkl", "wb") as f:
    pickle.dump(le_bp, f)

df1['Cholesterol'] = le_chol.fit_transform(df1['Cholesterol'])
with open("models/le_chol.pkl", "wb") as f:
    pickle.dump(le_chol, f)

df1['Drug'] = le_drug1.fit_transform(df1['Drug'])
with open("models/le_drug1.pkl", "wb") as f:
    pickle.dump(le_drug1, f)

# Features & labels
X1 = df1.drop("Drug", axis=1)
y1 = df1["Drug"]

# Scale features
from sklearn.preprocessing import StandardScaler
scaler1 = StandardScaler()
X1_scaled = scaler1.fit_transform(X1)

# Convert to tensors
import torch
X1_tensor = torch.tensor(X1_scaled, dtype=torch.float32)
y1_tensor = torch.tensor(y1.values, dtype=torch.long)


In [44]:
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load dataset (use raw string for safe path)
df2 = pd.read_csv(r"D:\Internship_assignment\drug_discovery_ml\data\Drug_Consumption.csv")

# Select features (must match your dataset)
features = ['Age', 'Gender', 'Education', 'Nscore', 'Escore', 'Oscore', 'Ascore', 'Cannabis']
df2 = df2[features].dropna()

# Label encode categorical features
le_age = LabelEncoder()
le_gender = LabelEncoder()
le_edu = LabelEncoder()

df2['Age'] = le_age.fit_transform(df2['Age'])
df2['Gender'] = le_gender.fit_transform(df2['Gender'])
df2['Education'] = le_edu.fit_transform(df2['Education'])

# Map Cannabis to binary: 0 = non-user, 1 = user
cannabis_mapping = {
    'CL0': 0,
    'CL1': 0,
    'CL2': 1, 'CL3': 1, 'CL4': 1, 'CL5': 1, 'CL6': 1
}
df2['Cannabis'] = df2['Cannabis'].map(cannabis_mapping)

# Feature matrix and label
X2 = df2.drop("Cannabis", axis=1)
y2 = df2["Cannabis"]

# Scale features
scaler2 = StandardScaler()
X2_scaled = scaler2.fit_transform(X2)

# Convert to PyTorch tensors
X2_tensor = torch.tensor(X2_scaled, dtype=torch.float32)
y2_tensor = torch.tensor(y2.values, dtype=torch.long)


In [45]:
# Define PyTorch-based Neural Network
import torch.nn as nn

class DrugModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(DrugModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 32),
            nn.ReLU(),
            nn.Linear(32, output_size)
        )

    def forward(self, x):
        return self.net(x)


In [46]:
# Train using Dataset 1
X1_train, X1_test, y1_train, y1_test = train_test_split(X1_tensor, y1_tensor, test_size=0.2, random_state=42)

model1 = DrugModel(input_size=X1_train.shape[1], output_size=len(y1.unique()))
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model1.parameters(), lr=0.001)

# Training loop
for epoch in range(100):
    model1.train()
    optimizer.zero_grad()
    out = model1(X1_train)
    loss = loss_fn(out, y1_train)
    loss.backward()
    optimizer.step()

print("✅ Model trained on Dataset 1 (drugs_data.csv)")


✅ Model trained on Dataset 1 (drugs_data.csv)


In [47]:
# Evaluate on Dataset 1
model1.eval()
with torch.no_grad():
    y1_pred = torch.argmax(model1(X1_test), dim=1)

acc1 = accuracy_score(y1_test.numpy(), y1_pred.numpy())
print(f"📊 Accuracy on Dataset 1: {acc1 * 100:.2f}%")


📊 Accuracy on Dataset 1: 56.67%


In [48]:
# Train using Dataset 2
X2_train, X2_test, y2_train, y2_test = train_test_split(X2_tensor, y2_tensor, test_size=0.2, random_state=42)

model2 = DrugModel(input_size=X2_train.shape[1], output_size=2)  # Binary classification
loss_fn2 = nn.CrossEntropyLoss()
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)

# Training loop
for epoch in range(100):
    model2.train()
    optimizer2.zero_grad()
    out = model2(X2_train)
    loss = loss_fn2(out, y2_train)
    loss.backward()
    optimizer2.step()

print("✅ Model trained on Dataset 2 (Drug_Consumption.csv)")


✅ Model trained on Dataset 2 (Drug_Consumption.csv)


In [49]:
# Evaluate on Dataset 2
model2.eval()
with torch.no_grad():
    y2_pred = torch.argmax(model2(X2_test), dim=1)

acc2 = accuracy_score(y2_test.numpy(), y2_pred.numpy())
print(f"📊 Accuracy on Dataset 2: {acc2 * 100:.2f}%")


📊 Accuracy on Dataset 2: 77.72%


In [50]:
# Save model trained on drugs_data.csv
torch.save(model1.state_dict(), "../models/drug_model.pth")
print("💾 Saved model1 to models/drug_model.pth")


💾 Saved model1 to models/drug_model.pth


In [51]:
import os
import torch 
os.makedirs("../models", exist_ok=True)

# Save the trained cannabis model
torch.save(model2.state_dict(), "../models/cannabis_model.pth")
print("✅ cannabis_model.pth saved successfully!")


✅ cannabis_model.pth saved successfully!
