In [None]:
! [ -e /content ] && pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [None]:
from fastbook import *
from fastai.vision.widgets import *

# Mental Health

> [mental health kaggle data](https://www.kaggle.com/datasets/shariful07/student-mental-health)

In [None]:
! [ -e /content ] && pip install opendatasets
! [ -e /content ] && pip install dtreeviz

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import opendatasets as od
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from fastai.tabular.all import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from dtreeviz.trees import *
from IPython.display import Image, display_svg, SVG

pd.options.display.max_rows = 20
pd.options.display.max_columns = 8

In [None]:
od.download("https://www.kaggle.com/datasets/shariful07/student-mental-health?resource=download")

Skipping, found downloaded files in "./student-mental-health" (use force=True to force download)


In [None]:
# read csv into a Pandas DataFrame
df = pd.read_csv('student-mental-health/Student Mental health.csv', low_memory=False)
df.columns

Index(['Timestamp', 'Choose your gender', 'Age', 'What is your course?',
       'Your current year of Study', 'What is your CGPA?', 'Marital status',
       'Do you have Depression?', 'Do you have Anxiety?',
       'Do you have Panic attack?',
       'Did you seek any specialist for a treatment?'],
      dtype='object')

In [None]:
## data explorations
# find out how many unique topics there are
unique_topics = df["What is your course?"].unique()
unique_topics

array(['Engineering', 'Islamic education', 'BIT', 'Laws', 'Mathemathics', 'Pendidikan islam', 'BCS', 'Human Resources', 'Irkhs', 'Psychology', 'KENMS', 'Accounting ', 'ENM', 'Marine science', 'KOE',
       'Banking Studies', 'Business Administration', 'Law', 'KIRKHS', 'Usuluddin ', 'TAASL', 'Engine', 'ALA', 'Biomedical science', 'koe', 'Kirkhs', 'BENL', 'Benl', 'IT', 'CTS', 'engin', 'Econs',
       'MHSC', 'Malcom', 'Kop', 'Human Sciences ', 'Biotechnology', 'Communication ', 'Diploma Nursing', 'Pendidikan Islam ', 'Radiography', 'psychology', 'Fiqh fatwa ', 'DIPLOMA TESL', 'Koe',
       'Fiqh', 'Islamic Education', 'Nursing ', 'Pendidikan Islam'], dtype=object)

In [None]:
# map topics to integers
unique_topics_dict = {}
leng = len(unique_topics)
for i in range(leng):
  unique_topics_dict[unique_topics[i]] = i

In [None]:
# use data exploration to update table using one-hot encoding
df["What is your course?"].replace(unique_topics_dict, inplace=True)
df

Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,...,Do you have Depression?,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?
0,8/7/2020 12:02,Female,18.0,0,...,Yes,No,Yes,No
1,8/7/2020 12:04,Male,21.0,1,...,No,Yes,No,No
2,8/7/2020 12:05,Male,19.0,2,...,Yes,Yes,Yes,No
3,8/7/2020 12:06,Female,22.0,3,...,Yes,No,No,No
4,8/7/2020 12:13,Male,23.0,4,...,No,No,No,No
...,...,...,...,...,...,...,...,...,...
96,13/07/2020 19:56:49,Female,21.0,6,...,No,Yes,No,No
97,13/07/2020 21:21:42,Male,18.0,0,...,Yes,Yes,No,No
98,13/07/2020 21:22:56,Female,19.0,47,...,Yes,No,Yes,No
99,13/07/2020 21:23:57,Female,23.0,48,...,No,No,No,No


In [103]:
GPAs = df['What is your CGPA?']

In [104]:
def calc_gpa(GPA):
  split = GPA.split()
  split0 = split[0]
  split1 = split[2]
  aveGPA = (float(split0) + float(split1)) / 2
  return GPA, aveGPA

In [109]:
gpa_dict = {}
for GPA in GPAs:
  gpa_dict[GPA] = calc_gpa(GPA)[1]

In [113]:
df["What is your CGPA?"].replace(gpa_dict, inplace=True)
out = df["What is your CGPA?"]
out

0      3.245
1      3.245
2      3.245
3      3.245
4      3.245
       ...  
96     3.750
97     3.245
98     3.750
99     3.750
100    3.245
Name: What is your CGPA?, Length: 101, dtype: float64

In [114]:
# after some further data explorations, more one-hot encoding
df["Do you have Depression?"].replace({"Yes": 1, "No": 0}, inplace=True)
df["Do you have Anxiety?"].replace({"Yes": 1, "No": 0}, inplace=True)
df["Do you have Panic attack?"].replace({"Yes": 1, "No": 0}, inplace=True)
df["Choose your gender"].replace({"Female": 1.0, "Male": 0.0}, inplace=True)
df["Your current year of Study"].replace({"year 1": 1, "year 2": 2, "year 3": 3, "year 4": 4}, inplace=True)
df["Did you seek any specialist for a treatment?"].replace({"Yes": 1, "No": 0}, inplace=True)
df['Marital status'].replace({"Yes": 1, "No": 0}, inplace=True)
df

Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,...,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?,MENTAL HEALTH SCORE
0,8/7/2020 12:02,1,18.0,0,...,0,1,0,2
1,8/7/2020 12:04,0,21.0,1,...,1,0,0,1
2,8/7/2020 12:05,0,19.0,2,...,1,1,0,3
3,8/7/2020 12:06,1,22.0,3,...,0,0,0,1
4,8/7/2020 12:13,0,23.0,4,...,0,0,0,0
...,...,...,...,...,...,...,...,...,...
96,13/07/2020 19:56:49,1,21.0,6,...,1,0,0,1
97,13/07/2020 21:21:42,0,18.0,0,...,1,0,0,2
98,13/07/2020 21:22:56,1,19.0,47,...,0,1,0,2
99,13/07/2020 21:23:57,1,23.0,48,...,0,0,0,0


In [None]:
# if we want to predict mental health issue likelyhood, we need a 
# metric for this, we will assign a 1 to any YES in mental health  
# related columns and 0 to any NO, then take the sums and put  
# them into a column MENTAL_HEALTH_SCORE

In [None]:
def add_cols(x, y, z):
  return x + y + z

In [115]:
df['MENTAL HEALTH SCORE'] = df.apply(lambda x: add_cols(
    x['Do you have Depression?'], 
    x['Do you have Anxiety?'], 
    x['Do you have Panic attack?']), axis=1
)
df

Unnamed: 0,Timestamp,Choose your gender,Age,What is your course?,...,Do you have Anxiety?,Do you have Panic attack?,Did you seek any specialist for a treatment?,MENTAL HEALTH SCORE
0,8/7/2020 12:02,1,18.0,0,...,0,1,0,2
1,8/7/2020 12:04,0,21.0,1,...,1,0,0,1
2,8/7/2020 12:05,0,19.0,2,...,1,1,0,3
3,8/7/2020 12:06,1,22.0,3,...,0,0,0,1
4,8/7/2020 12:13,0,23.0,4,...,0,0,0,0
...,...,...,...,...,...,...,...,...,...
96,13/07/2020 19:56:49,1,21.0,6,...,1,0,0,1
97,13/07/2020 21:21:42,0,18.0,0,...,1,0,0,2
98,13/07/2020 21:22:56,1,19.0,47,...,0,1,0,2
99,13/07/2020 21:23:57,1,23.0,48,...,0,0,0,0
