In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/student-dataset/student_depression_dataset.csv


In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("zubairdhuddi/student-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/student-dataset


In [8]:
import littlelearn as ll 
from littlelearn import DeepLearning as dl

In [10]:
df = pd.read_csv(path+"/student_depression_dataset.csv")
df['Sleep Duration'] = ll.preprocessing.LabelEncoder().fit_encod(df['Sleep Duration'].to_list())
df['Gender'] = ll.preprocessing.LabelEncoder().fit_encod(df['Gender'].to_list())
df['Profession'] = ll.preprocessing.LabelEncoder().fit_encod(df['Profession'].to_list())
df['Dietary Habits'] = ll.preprocessing.LabelEncoder().fit_encod(df['Dietary Habits'].to_list())
df['Degree'] = ll.preprocessing.LabelEncoder().fit_encod(df['Degree'].to_list())
df['Have you ever had suicidal thoughts ?'] = ll.preprocessing.LabelEncoder().fit_encod(df['Have you ever had suicidal thoughts ?'].to_list())
df['Family History of Mental Illness'] = ll.preprocessing.LabelEncoder().fit_encod(df['Family History of Mental Illness'].to_list())
df = df.drop(['id','City'],axis=1)
df['Financial Stress'] = ll.preprocessing.LabelEncoder().fit_encod(df['Financial Stress'].to_list())
df

Unnamed: 0,Gender,Age,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,0,33.0,0,5.0,0.0,8.97,2.0,0.0,0,0,0,0,3.0,0,0,1
1,1,24.0,0,2.0,0.0,5.90,5.0,0.0,0,1,1,1,3.0,1,1,0
2,0,31.0,0,3.0,0.0,7.03,5.0,0.0,1,0,2,1,9.0,0,1,0
3,1,28.0,0,3.0,0.0,5.59,2.0,0.0,2,1,3,0,4.0,2,1,1
4,1,25.0,0,4.0,0.0,8.13,3.0,0.0,0,1,4,0,1.0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27896,1,27.0,0,5.0,0.0,5.75,5.0,0.0,0,2,6,0,7.0,0,1,0
27897,0,27.0,0,2.0,0.0,9.40,3.0,0.0,1,0,11,1,0.0,3,1,0
27898,0,31.0,0,3.0,0.0,6.61,4.0,0.0,0,2,17,1,12.0,1,0,0
27899,1,18.0,0,5.0,0.0,6.88,2.0,0.0,1,0,6,0,10.0,2,0,1


In [11]:
x_train = df.drop(['Depression'],axis=1).values 
y_train = df['Depression'].values.reshape(-1,1)

In [14]:
class Model :
    def __init__(self) :
        self.layers1 = dl.layers.Dense(32,'relu')
        self.layers2 = dl.layers.Dense(64,'relu')
        self.fn = dl.layers.Dense(1,'sigmoid')
        self.node = [self.layers1,self.layers2,self.fn]
    def get_weight(self) :
        weight = list()
        for n in self.node :
            we = n.get_weight()
            for w in we :
                weight.append(w)
        return weight 

    def __call__(self,x) :
        x = self.layers1(x)
        x = self.layers2(x)
        x = self.fn(x)
        return x

In [15]:
class Dataset :
    def __init__(self) :
        self.x_train = x_train
        self.label = y_train 

    def __getitem__ (self,idx) :
        return self.x_train[idx],self.label[idx]
    
    def __len__(self) :
        return len(x_train)
        
class DataLoader:
    def __init__(self, dataset, batch_size=32, shuffle=True):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle

    def __len__(self) :
        return (len(self.dataset) + self.batch_size -1 ) // self.batch_size

    def __iter__(self):
        self.indices = np.arange(len(self.dataset))
        if self.shuffle:
            np.random.shuffle(self.indices)

        for start in range(0, len(self.indices), self.batch_size):
            batch_idx = self.indices[start:start+self.batch_size]
            batch = [self.dataset[i] for i in batch_idx]
            yield self._collate(batch)

    def _collate(self, batch):
        x = np.array([b[0] for b in batch])
        y = np.array([b[1] for b in batch])
        return x, y


In [16]:
dataset = Dataset()
dataloaded = DataLoader(dataset,128)

In [18]:
model = Model()
model(x_train[:1])
optim = dl.optimizers.Adam()
optim.apply_weight(model.get_weight())
loss_fn = dl.loss.BinaryCrossentropy()

In [19]:
from tqdm import tqdm

In [20]:
epochs = 5
for epoch in range(epochs) :
    iterator = tqdm(dataloaded)
    for data,label in iterator :
        y_pred = model(data)
        loss = loss_fn(label,y_pred)
        loss.backwardpass()
        optim.forward_in_weight()
        loss.kill_grad()
        iterator.set_description(f"epoch : {epoch + 1} / {epochs} || loss : {loss.tensor}")
        iterator.set_postfix(loss = loss.tensor)

epoch : 1 / 5 || loss : 0.5397753715515137: 100%|██████████| 218/218 [00:14<00:00, 15.07it/s, loss=0.5397754]  
epoch : 2 / 5 || loss : 0.37498247623443604: 100%|██████████| 218/218 [00:16<00:00, 13.34it/s, loss=0.37498248]
epoch : 3 / 5 || loss : 0.34574049711227417: 100%|██████████| 218/218 [00:19<00:00, 11.40it/s, loss=0.3457405] 
epoch : 4 / 5 || loss : 0.38639941811561584: 100%|██████████| 218/218 [00:21<00:00, 10.11it/s, loss=0.38639942]
epoch : 5 / 5 || loss : 0.3524169325828552: 100%|██████████| 218/218 [00:24<00:00,  8.76it/s, loss=0.35241693] 


In [21]:
model(x_train)

(Tensor with shape : ((27901, 1)) : 
  [[0.52474105]
 [0.02472413]
 [0.01686886]
 ...
 [0.21744113]
 [0.9658427 ]
 [0.8685922 ]])

In [24]:
df = pd.read_csv(path+"/student_depression_dataset.csv")
result = model(x_train)
df['Depression'] = result.tensor
df

Unnamed: 0,id,Gender,Age,City,Profession,Academic Pressure,Work Pressure,CGPA,Study Satisfaction,Job Satisfaction,Sleep Duration,Dietary Habits,Degree,Have you ever had suicidal thoughts ?,Work/Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,2,Male,33.0,Visakhapatnam,Student,5.0,0.0,8.97,2.0,0.0,'5-6 hours',Healthy,B.Pharm,Yes,3.0,1.0,No,0.524741
1,8,Female,24.0,Bangalore,Student,2.0,0.0,5.90,5.0,0.0,'5-6 hours',Moderate,BSc,No,3.0,2.0,Yes,0.024724
2,26,Male,31.0,Srinagar,Student,3.0,0.0,7.03,5.0,0.0,'Less than 5 hours',Healthy,BA,No,9.0,1.0,Yes,0.016869
3,30,Female,28.0,Varanasi,Student,3.0,0.0,5.59,2.0,0.0,'7-8 hours',Moderate,BCA,Yes,4.0,5.0,Yes,0.651752
4,32,Female,25.0,Jaipur,Student,4.0,0.0,8.13,3.0,0.0,'5-6 hours',Moderate,M.Tech,Yes,1.0,1.0,No,0.619730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27896,140685,Female,27.0,Surat,Student,5.0,0.0,5.75,5.0,0.0,'5-6 hours',Unhealthy,'Class 12',Yes,7.0,1.0,Yes,0.814804
27897,140686,Male,27.0,Ludhiana,Student,2.0,0.0,9.40,3.0,0.0,'Less than 5 hours',Healthy,MSc,No,0.0,3.0,Yes,0.048199
27898,140689,Male,31.0,Faridabad,Student,3.0,0.0,6.61,4.0,0.0,'5-6 hours',Unhealthy,MD,No,12.0,2.0,No,0.217441
27899,140690,Female,18.0,Ludhiana,Student,5.0,0.0,6.88,2.0,0.0,'Less than 5 hours',Healthy,'Class 12',Yes,10.0,5.0,No,0.965843
