# Mental Health Score

In [184]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier
import pickle
import numpy as np

In [185]:
data = pd.read_csv('digital_diet_mental_health.csv')

# Check the type of each column to make sure they are all numerical

In [186]:
for column in data.columns:
    if not pd.api.types.is_numeric_dtype(data[column]):
        print(f"Column '{column}' is not numerical.")
    else:
        print(f"Column '{column}' is numerical.")

Column 'user_id' is not numerical.
Column 'age' is numerical.
Column 'gender' is not numerical.
Column 'daily_screen_time_hours' is numerical.
Column 'phone_usage_hours' is numerical.
Column 'laptop_usage_hours' is numerical.
Column 'tablet_usage_hours' is numerical.
Column 'tv_usage_hours' is numerical.
Column 'social_media_hours' is numerical.
Column 'work_related_hours' is numerical.
Column 'entertainment_hours' is numerical.
Column 'gaming_hours' is numerical.
Column 'sleep_duration_hours' is numerical.
Column 'sleep_quality' is numerical.
Column 'mood_rating' is numerical.
Column 'stress_level' is numerical.
Column 'physical_activity_hours_per_week' is numerical.
Column 'location_type' is not numerical.
Column 'mental_health_score' is numerical.
Column 'uses_wellness_apps' is numerical.
Column 'eats_healthy' is numerical.
Column 'caffeine_intake_mg_per_day' is numerical.
Column 'weekly_anxiety_score' is numerical.
Column 'weekly_depression_score' is numerical.
Column 'mindfulness_

## Create Model

In [259]:
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])
data['location_type'] = label_encoder.fit_transform(data['location_type'])
X = data.drop(['mental_health_score', 'user_id'], axis=1)
#X = data.drop('user_id', axis=1)
y = data['mental_health_score']

In [260]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=202, test_size=0.25, shuffle=True)

In [261]:
model = LinearRegression()
results = model.fit(X_train, y_train)

In [262]:
y_pred = model.predict(X_test)

In [264]:
# Model performance
print(f'The MAE is {mean_absolute_error(y_test, y_pred)}')
print(f'The RMSE is {np.sqrt(mean_squared_error(y_test, y_pred))}')

The MAE is 15.803456185516326
The RMSE is 18.101639844994555


In [265]:
# Saving the trained model
with open('model.pkl', 'wb') as f:
    pickle.dump(results, f)
    print('Pickling completed')

Pickling completed


In [266]:
X["mindfulness_minutes_per_day"].describe()

count    2000.000000
mean       10.753750
std         7.340269
min         0.000000
25%         4.900000
50%        10.400000
75%        15.800000
max        36.400000
Name: mindfulness_minutes_per_day, dtype: float64

In [267]:
X.columns

Index(['age', 'gender', 'daily_screen_time_hours', 'phone_usage_hours',
       'laptop_usage_hours', 'tablet_usage_hours', 'tv_usage_hours',
       'social_media_hours', 'work_related_hours', 'entertainment_hours',
       'gaming_hours', 'sleep_duration_hours', 'sleep_quality', 'mood_rating',
       'stress_level', 'physical_activity_hours_per_week', 'location_type',
       'uses_wellness_apps', 'eats_healthy', 'caffeine_intake_mg_per_day',
       'weekly_anxiety_score', 'weekly_depression_score',
       'mindfulness_minutes_per_day'],
      dtype='object')

In [268]:
data['mental_health_score'].describe()

count    2000.000000
mean       49.650500
std        17.546717
min        20.000000
25%        35.000000
50%        49.000000
75%        64.250000
max        80.000000
Name: mental_health_score, dtype: float64