In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [6]:
df_val = pd.read_csv('../X_test_encoded.csv')
df_train = pd.read_csv('../X_train_encoded.csv')
X_val = df_val.drop('Depression', axis=1)
y_val = df_val['Depression']
X = df_train.drop('Depression', axis=1)
y = df_train['Depression']

In [14]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

# Create pipeline with MinMaxScaler and Logistic Regression
pipe = Pipeline([
    ('scaler', MinMaxScaler()),
    ('log_reg', LogisticRegression(max_iter=1000, random_state=42))
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'log_reg__C': [0.01, 0.1, 1, 10, 20, 30, 100],  # Regularization strength
    'log_reg__solver': ['saga', 'lbfgs'],  # Solvers for optimization
}

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=pipe, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X, y)

# Get the best model
best_model = grid_search.best_estimator_

# Print the best parameters and accuracy
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Cross-Validation Accuracy: {grid_search.best_score_}")

# Predict on training and validation sets using the best model
y_train_pred = best_model.predict(X)
y_valid_pred = best_model.predict(X_val)
print("\nClassification Report (Validation Set):")
print(classification_report(y_val, y_valid_pred, digits=4))


Fitting 5 folds for each of 14 candidates, totalling 70 fits
Best Parameters: {'log_reg__C': 10, 'log_reg__solver': 'saga'}
Best Cross-Validation Accuracy: 0.8472979142724286

Classification Report (Validation Set):
              precision    recall  f1-score   support

           0     0.8305    0.7899    0.8097      2313
           1     0.8562    0.8858    0.8708      3267

    accuracy                         0.8461      5580
   macro avg     0.8433    0.8379    0.8402      5580
weighted avg     0.8455    0.8461    0.8454      5580



In [9]:
import pandas as pd
import os
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# After getting predictions and running classification_report
def save_metrics_to_csv(model_name, y_true, y_pred, filepath='../model_metrics.csv'):
    """
    Save model metrics to CSV file with each model as a row
    
    Parameters:
    -----------
    model_name : str
        Name of the model
    y_true : array-like
        True labels
    y_pred : array-like
        Predicted labels
    filepath : str
        Path to CSV file
    """
    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    
    # Class 0 metrics
    precision_0 = precision_score(y_true, y_pred, pos_label=0)
    recall_0 = recall_score(y_true, y_pred, pos_label=0)
    f1_0 = f1_score(y_true, y_pred, pos_label=0)
    
    # Class 1 metrics
    precision_1 = precision_score(y_true, y_pred, pos_label=1)
    recall_1 = recall_score(y_true, y_pred, pos_label=1)
    f1_1 = f1_score(y_true, y_pred, pos_label=1)
    
    # Average metrics
    precision_avg = precision_score(y_true, y_pred, average='macro')
    recall_avg = recall_score(y_true, y_pred, average='macro')
    f1_avg = f1_score(y_true, y_pred, average='macro')
    
    # Create a dictionary with all metrics
    metrics_dict = {
        'model': model_name,
        'accuracy': accuracy,
        'precision_class0': precision_0,
        'recall_class0': recall_0,
        'f1_class0': f1_0,
        'precision_class1': precision_1,
        'recall_class1': recall_1,
        'f1_class1': f1_1,
        'precision_avg': precision_avg,
        'recall_avg': recall_avg,
        'f1_avg': f1_avg
    }
    
    # Check if file exists
    if os.path.exists(filepath):
        # Read existing data and append new row
        metrics_df = pd.read_csv(filepath)
        
        # Check if model already exists in the dataframe
        if model_name in metrics_df['model'].values:
            # Update existing row
            metrics_df.loc[metrics_df['model'] == model_name] = pd.Series(metrics_dict)
        else:
            # Append new row
            metrics_df = pd.concat([metrics_df, pd.DataFrame([metrics_dict])], ignore_index=True)
    else:
        # Create new dataframe
        metrics_df = pd.DataFrame([metrics_dict])
    
    # Save to CSV
    metrics_df.to_csv(filepath, index=False)
    print(f"Metrics saved to {filepath}")
    
    return metrics_df

# Use the function after evaluating your model
# Example usage after running the model:
save_metrics_to_csv("logictics_Regression_lib", y_val, y_valid_pred)

Metrics saved to ../model_metrics.csv


Unnamed: 0,model,accuracy,precision_class0,recall_class0,f1_class0,precision_class1,recall_class1,f1_class1,precision_avg,recall_avg,f1_avg
0,SVM_Linear,0.846953,0.833562,0.788154,0.810222,0.855585,0.888583,0.871772,0.844573,0.838368,0.840997
1,logictics_Regression_lib,0.846057,0.830455,0.789883,0.809661,0.856213,0.885828,0.870769,0.843334,0.837856,0.840215


In [11]:
from logistic_regression_scratch import LogisticRegression
# Initialize the custom logistic regression model
log_reg_scratch = LogisticRegression(max_iter=1000, learning_rate=0.01, random_state=42)
# Fit the model on the training data
log_reg_scratch.fit(X, y)
# Predict on the training and validation sets
y_train_pred_scratch = log_reg_scratch.predict(X)
y_valid_pred_scratch = log_reg_scratch.predict(X_val)
# Generate classification reports for the custom model
print("Classification Report (Training Set - Scratch):")
print(classification_report(y, y_train_pred_scratch))
print("\nClassification Report (Validation Set - Scratch):")
print(classification_report(y_val, y_valid_pred_scratch))
# Calculate and print accuracy scores
print(f"Training Set Accuracy (Scratch): {accuracy_score(y, y_train_pred_scratch)}")

TypeError: LogisticRegression.__init__() got an unexpected keyword argument 'max_iter'

In [None]:
save_metrics_to_csv("logictics_Regression_scratch", y_val, y_valid_pred_scratch)