In [66]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

In [67]:
df =pd.read_csv('StressLevelDataset.csv')
# https://www.kaggle.com/datasets/rxnach/student-stress-factors-a-comprehensive-analysis?resource=download

In [68]:
df.head(2)

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,14,20,0,11,2,1,2,4,2,3,...,2,3,2,3,3,2,3,3,2,1
1,15,8,1,15,5,3,1,4,3,1,...,2,1,4,1,5,1,4,5,5,2


In [69]:
df.shape

(1100, 21)

In [70]:
df.columns

Index(['anxiety_level', 'self_esteem', 'mental_health_history', 'depression',
       'headache', 'blood_pressure', 'sleep_quality', 'breathing_problem',
       'noise_level', 'living_conditions', 'safety', 'basic_needs',
       'academic_performance', 'study_load', 'teacher_student_relationship',
       'future_career_concerns', 'social_support', 'peer_pressure',
       'extracurricular_activities', 'bullying', 'stress_level'],
      dtype='object')

## Anova Feature Selection

In [71]:
from sklearn.feature_selection import SelectKBest, f_classif

In [72]:
# Assuming `X` is your features dataframe and `y` is your target variable
X = df[['anxiety_level', 'self_esteem', 'mental_health_history', 'depression',
        'headache', 'blood_pressure', 'sleep_quality', 'breathing_problem',
        'noise_level', 'living_conditions', 'safety', 'basic_needs',
        'academic_performance', 'study_load', 'teacher_student_relationship',
        'future_career_concerns', 'social_support', 'peer_pressure',
        'extracurricular_activities', 'bullying']]
y = df['stress_level']  # Assuming 'stress_level' is the target variable

# Set the number of top features to select
selector = SelectKBest(score_func=f_classif, k=10)  # Selects the top 10 features

# Fit the selector to the data
selector.fit(X, y)

# Get the scores and p-values
scores = selector.scores_
p_values = selector.pvalues_

# Create a DataFrame for easy visualization
feature_scores = pd.DataFrame({'Feature': X.columns, 'ANOVA_Score': scores, 'p_value': p_values})
feature_scores = feature_scores.sort_values(by='ANOVA_Score', ascending=False)
print(feature_scores)

                         Feature  ANOVA_Score        p_value
5                 blood_pressure  1106.223910  9.277323e-264
1                    self_esteem   775.386130  1.268559e-210
19                      bullying   712.871269  4.227007e-199
6                  sleep_quality   710.124696  1.397301e-198
15        future_career_concerns   684.270952  1.228543e-193
0                  anxiety_level   655.453225  5.296746e-188
3                     depression   652.629287  1.920442e-187
12          academic_performance   639.222921  9.060029e-185
10                        safety   613.634750  1.397009e-179
11                   basic_needs   600.120070  8.543327e-177
4                       headache   570.226316  1.634364e-170
18    extracurricular_activities   549.800151  4.007655e-166
17                 peer_pressure   539.338954  7.633016e-164
14  teacher_student_relationship   513.033251  5.173706e-158
8                    noise_level   440.800303  3.168813e-141
16                social

## Linear Regression

In [73]:
X = df[['self_esteem', 'anxiety_level']]
y = df['stress_level']

In [74]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [75]:
model = LinearRegression()
model.fit(X_train, y_train)

In [76]:
y_pred = model.predict(X_test)

In [77]:
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 0.17242117989940328


## Decision Tree Regressor

In [78]:
X = df[['blood_pressure', 'sleep_quality']]
y = df['stress_level']


In [79]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [80]:
model = DecisionTreeRegressor(random_state=42)
model.fit(X, y)

## Random Forest Regressor

In [81]:
X = df[['blood_pressure', 'sleep_quality']]
y = df['stress_level']

In [82]:
rf_model = RandomForestRegressor(random_state=42, oob_score=True)
rf_model.fit(X, y)