In [2]:

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load datasets with exception handling
try:
    mental_data = pd.read_csv('static/pcos.csv')
except FileNotFoundError as e:
    print(f"Error loading file: {e}")
    exit()

# Preview the data
print(mental_data.head())
print(mental_data.info())

# Handle missing values
mental_data = mental_data.dropna()
if mental_data.empty:
    print("Mental disorder dataset is empty after dropping NaN values.")
    exit()

# Define feature columns and target column
feature_columns = [
    'Age','Weight','Height','Bloodgroup','Monthsperiod','gainweight','hairgrowth','skindarkening','hairloss','pimples','eatfast','regularbasis','diagnosedwithPCOS','moodswings','periodsregular','periodlast'

]
target_column = 'diagnosedwithPCOS'

# Check if columns exist
missing_columns = [col for col in feature_columns + [target_column] if col not in mental_data.columns]
if missing_columns:
    print(f"Missing columns in dataset: {missing_columns}")
    exit()

# Encode categorical variables
encoder = LabelEncoder()
for col in feature_columns:
    mental_data[col] = encoder.fit_transform(mental_data[col])

mental_data[target_column] = encoder.fit_transform(mental_data[target_column])

# Split into features and target
X_mental = mental_data[feature_columns]
y_mental = mental_data[target_column]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_mental, y_mental, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Save the model and encoder
joblib.dump(model, 'static/models/pcos.pkl')
joblib.dump(encoder, 'static/encoders/pcos_encoder.pkl')

# Save the accuracy score
with open('static/models/mental_disorder_metrics.txt', 'w') as f:
    f.write(f"Accuracy: {accuracy}\n")


   Age  Weight  Height  Bloodgroup  Monthsperiod  gainweight  hairgrowth  \
0   21    47.0   168.0          13             1           0           0   
1   21    45.0   156.0          11             1           0           0   
2   17    37.0   162.0          15             1           0           0   
3   28    54.0   160.0          15             1           0           0   
4   45    40.0   150.0          13             2           0           0   

   skindarkening  hairloss  pimples  eatfast  regularbasis  diagnosedwithPCOS  \
0              0         0        0        0             0                  0   
1              1         1        1        0             0                  0   
2              0         0        1        1             0                  0   
3              0         0        0        0             0                  0   
4              0         1        0        0             0                  1   

   moodswings  periodsregular  periodlast  
0           