[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Venura-Shiromal/Ai-session-term1/blob/main/AI_MachineLearning/Stress%20Predicition/StressPrediction_CatBoost.ipynb)

In [1]:
# just in case
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


# Importing Modules

In [51]:
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Loading Data

In [56]:
df = pd.read_csv("lifestyle_dataset.csv")
df.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


# Preprocessing

In [57]:
df[['Systolic Pressure', 'Diastolic Pressure']] = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic Pressure'] = pd.to_numeric(df['Systolic Pressure'])
df['Diastolic Pressure'] = pd.to_numeric(df['Diastolic Pressure'])

df = df.drop('Blood Pressure', axis=1)

In [58]:
cat_cols = ["Occupation", "Gender"]

## Mapping

In [59]:
print(df["BMI Category"].unique())
print(df["Sleep Disorder"].unique())

['Overweight' 'Normal' 'Obese' 'Normal Weight']
[nan 'Sleep Apnea' 'Insomnia']


In [60]:
BMI_Map = {
    "Normal Weight": 1,
    "Normal": 1,
    "Overweight": 2,
    "Obese": 3
}

df["BMI Category"] = df["BMI Category"].map(BMI_Map)

Sleep_Disorder_Map = {
    "Sleep Apnea": 1,
    "Insomnia": 2
}

df["Sleep Disorder"] = df["Sleep Disorder"].map(Sleep_Disorder_Map)

### Filling Missing Values

In [61]:
df["Sleep Disorder"].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Sleep Disorder"].fillna(0, inplace=True)


In [62]:
df.isna().sum()

Unnamed: 0,0
Person ID,0
Gender,0
Age,0
Occupation,0
Sleep Duration,0
Quality of Sleep,0
Physical Activity Level,0
Stress Level,0
BMI Category,0
Heart Rate,0


In [63]:
df.head(20)

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,Systolic Pressure,Diastolic Pressure
0,1,Male,27,Software Engineer,6.1,6,42,6,2,77,4200,0.0,126,83
1,2,Male,28,Doctor,6.2,6,60,8,1,75,10000,0.0,125,80
2,3,Male,28,Doctor,6.2,6,60,8,1,75,10000,0.0,125,80
3,4,Male,28,Sales Representative,5.9,4,30,8,3,85,3000,1.0,140,90
4,5,Male,28,Sales Representative,5.9,4,30,8,3,85,3000,1.0,140,90
5,6,Male,28,Software Engineer,5.9,4,30,8,3,85,3000,2.0,140,90
6,7,Male,29,Teacher,6.3,6,40,7,3,82,3500,2.0,140,90
7,8,Male,29,Doctor,7.8,7,75,6,1,70,8000,0.0,120,80
8,9,Male,29,Doctor,7.8,7,75,6,1,70,8000,0.0,120,80
9,10,Male,29,Doctor,7.8,7,75,6,1,70,8000,0.0,120,80


### X, Y Configuration

In [64]:
x = df.drop(["Stress Level"], axis=1)
y = df["Stress Level"]

In [65]:
x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2, random_state=42)

# Model Configuration

In [66]:
model = CatBoostClassifier(
    iterations=500,
    depth=6,
    learning_rate=0.04,
    l2_leaf_reg=5,
    border_count=50,
    random_state=42,
    verbose=50
)

# Training

In [67]:
model.fit(x_train, y_train, cat_features=cat_cols, eval_set=(x_test, y_test))

0:	learn: 1.7197959	test: 1.7216815	best: 1.7216815 (0)	total: 57.8ms	remaining: 28.9s
50:	learn: 0.4705001	test: 0.4718703	best: 0.4718703 (50)	total: 370ms	remaining: 3.26s
100:	learn: 0.2783842	test: 0.2884747	best: 0.2884747 (100)	total: 725ms	remaining: 2.87s
150:	learn: 0.2021024	test: 0.2238052	best: 0.2238052 (150)	total: 1.05s	remaining: 2.43s
200:	learn: 0.1611819	test: 0.1911793	best: 0.1911793 (200)	total: 1.36s	remaining: 2.02s
250:	learn: 0.1348685	test: 0.1709599	best: 0.1709599 (250)	total: 1.94s	remaining: 1.92s
300:	learn: 0.1152639	test: 0.1544504	best: 0.1544504 (300)	total: 2.56s	remaining: 1.7s
350:	learn: 0.0999696	test: 0.1410191	best: 0.1410191 (350)	total: 3.18s	remaining: 1.35s
400:	learn: 0.0878483	test: 0.1302849	best: 0.1302849 (400)	total: 3.73s	remaining: 922ms
450:	learn: 0.0791999	test: 0.1216797	best: 0.1216797 (450)	total: 4.22s	remaining: 459ms
499:	learn: 0.0712154	test: 0.1131445	best: 0.1131445 (499)	total: 4.5s	remaining: 0us

bestTest = 0.11314

<catboost.core.CatBoostClassifier at 0x785d220e0b60>

# Evaluation

In [72]:
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {100*accuracy:.2f}%")we

Accuracy: 97.33%
