In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:

# Load Dataset
data = pd.read_csv("Student Depression Dataset.csv")
print("Dataset Columns:", data.columns)

Dataset Columns: Index(['id', 'Gender', 'Age', 'City', 'Profession', 'Academic Pressure',
       'Work Pressure', 'CGPA', 'Study Satisfaction', 'Job Satisfaction',
       'Sleep Duration', 'Dietary Habits', 'Degree',
       'Have you ever had suicidal thoughts ?', 'Work/Study Hours',
       'Financial Stress', 'Family History of Mental Illness', 'Depression'],
      dtype='object')


In [3]:

# Assuming the last column is the target
target_column = "Depression"
print(f"Assuming target column: {target_column}")

Assuming target column: Depression


In [4]:

# Encode categorical columns
categorical_columns = data.select_dtypes(include=["object"]).columns.tolist()
if target_column in categorical_columns:
    categorical_columns.remove(target_column)

In [5]:

# One-hot encode categorical columns
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

# Convert target column to numeric if necessary
if data[target_column].dtype == "object":
    data[target_column] = data[target_column].astype("category").cat.codes


In [6]:

# Features and Target
X = data.drop(columns=[target_column])
y = data[target_column]

In [7]:

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Bat Algorithm Parameters
num_bats = 10
max_iter = 10
loudness = 0.6
pulse_rate = 0.5
frequency_min, frequency_max = 0, 2

In [8]:

# Fitness Function with Additional Metrics
def fitness_function_with_metrics(solution):
    selected_features = np.where(solution > 0.5)[0]
    if len(selected_features) == 0:
        return 0, 0, 0, 0  # Return 0 for accuracy, precision, recall, and F1 if no features are selected
         # Train model on selected features
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train.iloc[:, selected_features], y_train)
    preds = clf.predict(X_test.iloc[:, selected_features])
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, preds)
    precision = precision_score(y_test, preds, average="weighted", zero_division=0)
    recall = recall_score(y_test, preds, average="weighted", zero_division=0)
    f1 = f1_score(y_test, preds, average="weighted", zero_division=0)

    return accuracy, precision, recall, f1


In [9]:

   
# Initialize bats
positions = np.random.rand(num_bats, X.shape[1])  # Random positions in [0,1]
velocities = np.random.uniform(-1, 1, (num_bats, X.shape[1]))  # Random velocities
best_global = positions[np.random.randint(0, num_bats)]  # Randomly select a bat
best_global_accuracy, best_global_precision, best_global_recall, best_global_f1 = fitness_function_with_metrics(best_global)



In [10]:

# Bat Algorithm
for t in range(max_iter):
    for i in range(num_bats):
        # Calculate frequency and update velocity and position
        frequency = frequency_min + (frequency_max - frequency_min) * np.random.rand()
        velocities[i] += (positions[i] - best_global) * frequency
        positions[i] = np.clip(positions[i] + velocities[i], 0, 1)  # Ensure positions stay in range
         # Local search
        if np.random.rand() > pulse_rate:
            positions[i] = np.clip(best_global + np.random.normal(0, 0.1, size=X.shape[1]), 0, 1)

        # Evaluate fitness
        accuracy, precision, recall, f1 = fitness_function_with_metrics(positions[i])
        if accuracy > best_global_accuracy and np.random.rand() < loudness:
            best_global = positions[i]
            best_global_accuracy, best_global_precision, best_global_recall, best_global_f1 = accuracy, precision, recall, f1

    # Update loudness and pulse rate
    loudness = max(0.1, loudness * 0.95)
    pulse_rate = min(1.0, pulse_rate * 1.05)
    # Print progress
    print(f"Iteration {t + 1}:")
    print(f"Best Accuracy = {best_global_accuracy:.4f}")
    print(f"Best Precision = {best_global_precision:.4f}")
    print(f"Best Recall = {best_global_recall:.4f}")
    print(f"Best F1 Score = {best_global_f1:.4f}")
    selected_features = np.where(best_global > 0.5)[0]
    print("Selected Features:", X.columns[selected_features].tolist())
    print()

Iteration 1:
Best Accuracy = 0.7772
Best Precision = 0.7760
Best Recall = 0.7772
Best F1 Score = 0.7759
Selected Features: ['Age', 'CGPA', 'Work/Study Hours', 'Gender_Male', 'City_Ahmedabad', 'City_Chennai', 'City_City', 'City_Faridabad', 'City_Gaurav', 'City_Harsha', 'City_Hyderabad', 'City_Kalyan', 'City_Khaziabad', 'City_Kolkata', 'City_Less Delhi', 'City_Less than 5 Kalyan', 'City_M.Tech', 'City_ME', 'City_Mihir', 'City_Mira', 'City_Mumbai', 'City_Rajkot', 'City_Saanvi', 'City_Vaanya', 'City_Varanasi', 'Profession_Chef', 'Profession_Content Writer', 'Profession_Lawyer', 'Profession_Manager', 'Profession_Student', 'Sleep Duration_7-8 hours', 'Sleep Duration_Less than 5 hours', 'Sleep Duration_Others', 'Dietary Habits_Others', 'Dietary Habits_Unhealthy', 'Degree_B.Tech', 'Degree_BA', 'Degree_Class 12', 'Degree_LLB', 'Degree_M.Ed', 'Degree_M.Pharm', 'Degree_MA', 'Degree_MBBS', 'Degree_MCA', 'Degree_MD', 'Degree_ME', 'Degree_Others', 'Degree_PhD', 'Have you ever had suicidal thoughts ?

In [11]:

# Final Results
print("Final Results:")
print(f"Best Accuracy: {best_global_accuracy:.4f}")
print(f"Best Precision: {best_global_precision:.4f}")
print(f"Best Recall: {best_global_recall:.4f}")
print(f"Best F1 Score: {best_global_f1:.4f}")
print("Selected Features:", X.columns[selected_features].tolist())


Final Results:
Best Accuracy: 0.8053
Best Precision: 0.8047
Best Recall: 0.8053
Best F1 Score: 0.8049
Selected Features: ['Age', 'Academic Pressure', 'Work/Study Hours', 'Gender_Male', 'City_Agra', 'City_Ahmedabad', 'City_Bhavna', 'City_Chennai', 'City_Faridabad', 'City_Harsha', 'City_Hyderabad', 'City_Kalyan', 'City_Khaziabad', 'City_M.Tech', 'City_ME', 'City_Mihir', 'City_Mira', 'City_Mumbai', 'City_Nalini', 'City_Rajkot', 'City_Rashi', 'City_Surat', 'City_Vaanya', 'Profession_Educational Consultant', 'Profession_Lawyer', 'Profession_Manager', 'Profession_Pharmacist', 'Profession_Student', 'Sleep Duration_7-8 hours', 'Sleep Duration_More than 8 hours', 'Sleep Duration_Others', 'Dietary Habits_Moderate', 'Dietary Habits_Unhealthy', 'Degree_B.Com', 'Degree_BA', 'Degree_BBA', 'Degree_Class 12', 'Degree_LLB', 'Degree_M.Pharm', 'Degree_M.Tech', 'Degree_MA', 'Degree_MBBS', 'Degree_MCA', 'Degree_MD', 'Degree_Others', 'Degree_PhD', 'Have you ever had suicidal thoughts ?_Yes', 'Family History