In [None]:
import pandas as pd
import requests
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import numpy as np
from io import StringIO
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder, StandardScaler, OneHotEncoder

dataset1_url = "https://raw.githubusercontent.com/hbedros/data622-assignment4/main/data/dataset1.csv"
dataset2_url = "https://raw.githubusercontent.com/hbedros/data622-assignment4/main/data/dataset2.csv"

response1 = requests.get(dataset1_url, verify=False)
response2 = requests.get(dataset2_url, verify=False)

data1 = StringIO(response1.text)
data2 = StringIO(response2.text)
dataset1 = pd.read_csv(data1)
dataset2 = pd.read_csv(data2)



## Cleaning Dataset 1 - The Depression Dataset

df1 = pd.DataFrame(dataset1)
df1.columns = df1.columns.str.strip()
df1 = df1.rename(columns = {'Have you ever had suicidal thoughts ?':'suicidal thoughts'})
df1['Sleep Duration'] = pd.Categorical(df1['Sleep Duration'], categories=['Less than 5 hours', '5-6 hours', '7-8 hours', 'More than 8 hours'])

numeric_cols_df1 = ['Age', 'Academic Pressure', 'Study Satisfaction', 'Study Hours', 'Financial Stress']
categorical_cols_df1 = ['Gender', 'Sleep Duration', 'Dietary Habits', 'suicidal thoughts', 'Family History of Mental Illness', 'Depression']
scaler = MinMaxScaler()
df1[numeric_cols_df1] = scaler.fit_transform(df1[numeric_cols_df1])

df2 = pd.DataFrame(dataset2)
df2.columns = df2.columns.str.strip()

def parse_range(val):
    if isinstance(val, str) and "-" in val:
        lower, upper = map(float, val.split("-"))
        return (lower + upper) / 2
    else:
        return float(val)

df2["cgpa"] = df2["cgpa"].apply(parse_range)

def parse_sleep(val):
    if isinstance(val, str) and "hrs" in val:
        val = val.replace(" hrs", "").strip()
    if "-" in val:
        lower, upper = map(float, val.split("-"))
        return (lower + upper) / 2
    else:
        return float(val)

if "average_sleep" in df2.columns:
    df2["average_sleep"] = df2["average_sleep"].apply(parse_sleep)

numerical_cols = [
    "cgpa", "study_satisfaction", "average_sleep"
]

missing_cols = [col for col in numerical_cols if col not in df2.columns]
if missing_cols:
    print("Missing columns after cleaning:", missing_cols)
else:
    scaler = MinMaxScaler()
    df2[numerical_cols] = scaler.fit_transform(df2[numerical_cols])

if "stress_relief_activities" in df2.columns:
    activities = df2["stress_relief_activities"].str.get_dummies(sep=",")
    
    df2 = pd.concat([df2, activities], axis=1)
    

def parse_sports_engagement(val):
    if isinstance(val, str):
        val = val.strip().lower()  # Normalize the input
        if "no sports" in val:
            return 0.0  # Assign 0 for 'No Sports'
        elif "-" in val:
            # Handle ranges like "1-3 times"
            try:
                lower, upper = map(float, val.replace(" times", "").split("-"))
                return (lower + upper) / 2
            except ValueError:
                return np.nan  # Handle malformed ranges
        elif "7+" in val or "7+s" in val:
            # Assign a numeric value for '7+' or '7+s'
            return 7.5
        elif "time" in val:
            # Handle single occurrences like "1 time"
            try:
                return float(val.replace(" time", "").strip())
            except ValueError:
                return np.nan
    try:
        return float(val)
    except ValueError:
        return np.nan

In [4]:

## Set seeds round 1

import random
import torch
import torch.nn as nn
import torch.optim as optim

seed_value = 23
np.random.seed(seed_value) 
random.seed(seed_value) 
torch.manual_seed(seed_value) 
torch.cuda.manual_seed_all(seed_value)



# Neural Networks dataset 1


from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

X = df1.drop(columns=['Depression'])
y = df1['Depression'].map({'Yes': 1, 'No': 0})

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()  
    predictions = model(X_test_tensor)
    predicted_labels = (predictions >= 0.5).float()  

    accuracy = accuracy_score(y_test_tensor.numpy(), predicted_labels.numpy())
    df1_NN = accuracy






# Neural Networks dataset 2

X = df2.drop(columns=['cgpa'])
y = df2['cgpa']

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()  
    predictions = model(X_test_tensor)
    mae = torch.mean(torch.abs(predictions - y_test_tensor)).item()
    df2_NN_MAE = mae


with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)
    mse = torch.mean((test_predictions - y_test_tensor) ** 2).item()
    rmse = torch.sqrt(torch.tensor(mse)).item()
    df2_NN_rmse = rmse





# DF1 PCA
from sklearn.decomposition import PCA


X = df1.drop(columns=['Depression'])
y = df1['Depression']

y_encoded = y.map({'Yes': 1, 'No': 0}).values

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
pca = PCA(n_components=2)  
X_pca = pca.fit_transform(X_preprocessed)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)
    test_predictions_class = (test_predictions > 0.5).float()  

    accuracy = accuracy_score(y_test, test_predictions_class.numpy())
    df1_PCA = accuracy



# DF2 PCA


X = df2.drop(columns=['cgpa'])  
y = df2['cgpa']  

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
pca = PCA(n_components=2)  
X_pca = pca.fit_transform(X_preprocessed)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.output(x)

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.MSELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)

    mse = torch.mean((test_predictions - y_test_tensor) ** 2).item()
    rmse = torch.sqrt(torch.tensor(mse)).item()
    df2_PCA_RMSE = rmse

    mae = torch.mean(torch.abs(test_predictions - y_test_tensor)).item()
    df2_PCA_MAE = mae














# DF1 Bootstrapping


X = df1.drop(columns=['Depression']) 
y = df1['Depression']

y_encoded = y.map({'Yes': 1, 'No': 0}).values

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
pca = PCA(n_components=2) 
X_pca = pca.fit_transform(X_preprocessed)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)


n_bootstrap = 100
accuracy_list = []

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

for i in range(n_bootstrap):
    bootstrap_indices = np.random.choice(range(len(X_train_tensor)), size=len(X_train_tensor), replace=True)
    X_train_bootstrap = X_train_tensor[bootstrap_indices]
    y_train_bootstrap = y_train_tensor[bootstrap_indices]
    
    model = NeuralNet(input_size=X_train_tensor.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()  

    for epoch in range(10):  
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train_bootstrap)
        loss = criterion(predictions, y_train_bootstrap)
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        model.eval()
        test_predictions = model(X_test_tensor)
        test_predictions_class = (test_predictions > 0.5).float()  
        acc = accuracy_score(y_test_tensor.cpu().numpy(), test_predictions_class.cpu().numpy())
        accuracy_list.append(acc)

mean_accuracy = np.mean(accuracy_list)
std_accuracy = np.std(accuracy_list)
df1_boot_acc = mean_accuracy
df1_boot_stdev = std_accuracy









# DF2 Bootstrapping

X = df2.drop(columns=['cgpa'])
y = df2['cgpa']

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

n_bootstrap = 100
rmse_list = []
mae_list = []

for i in range(n_bootstrap):
    bootstrap_indices = np.random.choice(range(len(X_train)), size=len(X_train), replace=True)
    X_train_bootstrap = X_train_tensor[bootstrap_indices]
    y_train_bootstrap = y_train_tensor[bootstrap_indices]
    
    model = NeuralNet(input_size=X_train_tensor.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    
    for epoch in range(10):  
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train_bootstrap)
        loss = criterion(predictions, y_train_bootstrap)
        loss.backward()
        optimizer.step()
    
    with torch.no_grad():
        model.eval()
        test_predictions = model(X_test_tensor)
        mse = torch.mean((test_predictions - y_test_tensor) ** 2).item()
        rmse = np.sqrt(mse)
        mae = torch.mean(torch.abs(test_predictions - y_test_tensor)).item()
        rmse_list.append(rmse)
        mae_list.append(mae)

df2_boot_rmse = np.mean(rmse_list)
df2_boot_mae = np.mean(mae_list)





# DF1 KNN

from sklearn.neighbors import KNeighborsClassifier

X = df1.drop(columns=['Depression'])
y = df1['Depression']

y_encoded = y.map({'Yes': 1, 'No': 0}).values

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y_encoded, test_size=0.2, random_state=42)

knn = KNeighborsClassifier(n_neighbors=5)  

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
df1_KNN = accuracy




# DF2 KNN
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.neighbors import KNeighborsRegressor

X = df2.drop(columns=['cgpa'])  
y = df2['cgpa']  

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),  
        ('cat', OneHotEncoder(), categorical_cols)   
    ]
)

X_preprocessed = preprocessor.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

knn = KNeighborsRegressor(n_neighbors=5)  

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
df2_KNN_rmse = rmse

mae = mean_absolute_error(y_test, y_pred)
df2_KNN_mae = mae





from tabulate import tabulate

new_table = {
    'Data Table': ['DF 1', 'DF 1', 'DF 1', 'DF 1', 'DF 2', 'DF 2', 'DF 2', 'DF 2'],
    'Model': ['Neural Network', 'PCA', 'Bootstrapping', 'K-nearest neighbors', 'Neural Network', 'PCA', 'Bootstrapping', 'K-nearest neighbors'],
    'Accuracy': [df1_NN, df1_PCA, df1_boot_acc, df1_KNN, None, None, None, None],
    'MAE': [None, None, None, None, df2_NN_MAE, df2_PCA_MAE, df2_boot_mae, df2_KNN_mae],
    'RMSE': [None, None, None, None, df2_NN_rmse, df2_PCA_RMSE, df2_boot_rmse, df2_KNN_rmse]
}

df3 = pd.DataFrame(new_table)
df3[['Accuracy', 'MAE', 'RMSE']] = df3[['Accuracy', 'MAE', 'RMSE']].round(4)
df3 = df3.fillna("")


print(tabulate(df3, headers='keys', tablefmt='pretty', showindex=False))




+------------+---------------------+----------+--------+--------+
| Data Table |        Model        | Accuracy |  MAE   |  RMSE  |
+------------+---------------------+----------+--------+--------+
|    DF 1    |   Neural Network    |  0.8317  |        |        |
|    DF 1    |         PCA         |  0.7822  |        |        |
|    DF 1    |    Bootstrapping    |  0.7332  |        |        |
|    DF 1    | K-nearest neighbors |  0.8713  |        |        |
|    DF 2    |   Neural Network    |          | 0.3313 | 0.3564 |
|    DF 2    |         PCA         |          | 0.4274 | 0.4774 |
|    DF 2    |    Bootstrapping    |          | 0.3122 | 0.3408 |
|    DF 2    | K-nearest neighbors |          | 0.1785 | 0.2515 |
+------------+---------------------+----------+--------+--------+


In [6]:
## Set seeds round 2

import random
import torch
import torch.nn as nn
import torch.optim as optim

seed_value = 97
np.random.seed(seed_value) 
random.seed(seed_value) 
torch.manual_seed(seed_value) 
torch.cuda.manual_seed_all(seed_value)



# Neural Networks dataset 1


from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score

X = df1.drop(columns=['Depression'])
y = df1['Depression'].map({'Yes': 1, 'No': 0})

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()  
    predictions = model(X_test_tensor)
    predicted_labels = (predictions >= 0.5).float()  

    accuracy = accuracy_score(y_test_tensor.numpy(), predicted_labels.numpy())
    df1_NN = accuracy






# Neural Networks dataset 2

X = df2.drop(columns=['cgpa'])
y = df2['cgpa']

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()  
    predictions = model(X_test_tensor)
    mae = torch.mean(torch.abs(predictions - y_test_tensor)).item()
    df2_NN_MAE = mae


with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)
    mse = torch.mean((test_predictions - y_test_tensor) ** 2).item()
    rmse = torch.sqrt(torch.tensor(mse)).item()
    df2_NN_rmse = rmse





# DF1 PCA
from sklearn.decomposition import PCA


X = df1.drop(columns=['Depression'])
y = df1['Depression']

y_encoded = y.map({'Yes': 1, 'No': 0}).values

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
pca = PCA(n_components=2)  
X_pca = pca.fit_transform(X_preprocessed)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)
    test_predictions_class = (test_predictions > 0.5).float()  

    accuracy = accuracy_score(y_test, test_predictions_class.numpy())
    df1_PCA = accuracy



# DF2 PCA


X = df2.drop(columns=['cgpa'])  
y = df2['cgpa']  

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
pca = PCA(n_components=2)  
X_pca = pca.fit_transform(X_preprocessed)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.output(x)

model = NeuralNet(input_size=X_train_tensor.shape[1])
criterion = nn.MSELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()
    optimizer.step()

with torch.no_grad():
    model.eval()
    test_predictions = model(X_test_tensor)

    mse = torch.mean((test_predictions - y_test_tensor) ** 2).item()
    rmse = torch.sqrt(torch.tensor(mse)).item()
    df2_PCA_RMSE = rmse

    mae = torch.mean(torch.abs(test_predictions - y_test_tensor)).item()
    df2_PCA_MAE = mae














# DF1 Bootstrapping


X = df1.drop(columns=['Depression']) 
y = df1['Depression']

y_encoded = y.map({'Yes': 1, 'No': 0}).values

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
pca = PCA(n_components=2) 
X_pca = pca.fit_transform(X_preprocessed)

X_train, X_test, y_train, y_test = train_test_split(X_pca, y_encoded, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)


n_bootstrap = 100
accuracy_list = []

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.output = nn.Linear(32, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x

for i in range(n_bootstrap):
    bootstrap_indices = np.random.choice(range(len(X_train_tensor)), size=len(X_train_tensor), replace=True)
    X_train_bootstrap = X_train_tensor[bootstrap_indices]
    y_train_bootstrap = y_train_tensor[bootstrap_indices]
    
    model = NeuralNet(input_size=X_train_tensor.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()  

    for epoch in range(10):  
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train_bootstrap)
        loss = criterion(predictions, y_train_bootstrap)
        loss.backward()
        optimizer.step()

    with torch.no_grad():
        model.eval()
        test_predictions = model(X_test_tensor)
        test_predictions_class = (test_predictions > 0.5).float()  
        acc = accuracy_score(y_test_tensor.cpu().numpy(), test_predictions_class.cpu().numpy())
        accuracy_list.append(acc)

mean_accuracy = np.mean(accuracy_list)
std_accuracy = np.std(accuracy_list)
df1_boot_acc = mean_accuracy
df1_boot_stdev = std_accuracy









# DF2 Bootstrapping

X = df2.drop(columns=['cgpa'])
y = df2['cgpa']

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

n_bootstrap = 100
rmse_list = []
mae_list = []

for i in range(n_bootstrap):
    bootstrap_indices = np.random.choice(range(len(X_train)), size=len(X_train), replace=True)
    X_train_bootstrap = X_train_tensor[bootstrap_indices]
    y_train_bootstrap = y_train_tensor[bootstrap_indices]
    
    model = NeuralNet(input_size=X_train_tensor.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    
    for epoch in range(10):  
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train_bootstrap)
        loss = criterion(predictions, y_train_bootstrap)
        loss.backward()
        optimizer.step()
    
    with torch.no_grad():
        model.eval()
        test_predictions = model(X_test_tensor)
        mse = torch.mean((test_predictions - y_test_tensor) ** 2).item()
        rmse = np.sqrt(mse)
        mae = torch.mean(torch.abs(test_predictions - y_test_tensor)).item()
        rmse_list.append(rmse)
        mae_list.append(mae)

df2_boot_rmse = np.mean(rmse_list)
df2_boot_mae = np.mean(mae_list)





# DF1 KNN

from sklearn.neighbors import KNeighborsClassifier

X = df1.drop(columns=['Depression'])
y = df1['Depression']

y_encoded = y.map({'Yes': 1, 'No': 0}).values

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(), categorical_cols)
    ]
)

X_preprocessed = preprocessor.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y_encoded, test_size=0.2, random_state=42)

knn = KNeighborsClassifier(n_neighbors=5)  

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
df1_KNN = accuracy




# DF2 KNN
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.neighbors import KNeighborsRegressor

X = df2.drop(columns=['cgpa'])  
y = df2['cgpa']  

numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),  
        ('cat', OneHotEncoder(), categorical_cols)   
    ]
)

X_preprocessed = preprocessor.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

knn = KNeighborsRegressor(n_neighbors=5)  

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
df2_KNN_rmse = rmse

mae = mean_absolute_error(y_test, y_pred)
df2_KNN_mae = mae





from tabulate import tabulate

new_table2 = {
    'Data Table': ['DF 1', 'DF 1', 'DF 1', 'DF 1', 'DF 2', 'DF 2', 'DF 2', 'DF 2'],
    'Model': ['Neural Network', 'PCA', 'Bootstrapping', 'K-nearest neighbors', 'Neural Network', 'PCA', 'Bootstrapping', 'K-nearest neighbors'],
    'Accuracy': [df1_NN, df1_PCA, df1_boot_acc, df1_KNN, None, None, None, None],
    'MAE': [None, None, None, None, df2_NN_MAE, df2_PCA_MAE, df2_boot_mae, df2_KNN_mae],
    'RMSE': [None, None, None, None, df2_NN_rmse, df2_PCA_RMSE, df2_boot_rmse, df2_KNN_rmse]
}

df4 = pd.DataFrame(new_table2)
df4[['Accuracy', 'MAE', 'RMSE']] = df4[['Accuracy', 'MAE', 'RMSE']].round(4)
df4 = df4.fillna("")

print(tabulate(df4, headers='keys', tablefmt='pretty', showindex=False))

+------------+---------------------+----------+--------+--------+
| Data Table |        Model        | Accuracy |  MAE   |  RMSE  |
+------------+---------------------+----------+--------+--------+
|    DF 1    |   Neural Network    |  0.703   |        |        |
|    DF 1    |         PCA         |  0.7624  |        |        |
|    DF 1    |    Bootstrapping    |  0.753   |        |        |
|    DF 1    | K-nearest neighbors |  0.8713  |        |        |
|    DF 2    |   Neural Network    |          | 0.3283 | 0.3549 |
|    DF 2    |         PCA         |          | 0.4484 | 0.4966 |
|    DF 2    |    Bootstrapping    |          | 0.3186 | 0.3467 |
|    DF 2    | K-nearest neighbors |          | 0.1785 | 0.2515 |
+------------+---------------------+----------+--------+--------+
