In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('StressLevelDataset.csv')

In [3]:
df.head()

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,14,20,0,11,2,1,2,4,2,3,...,2,3,2,3,3,2,3,3,2,1
1,15,8,1,15,5,3,1,4,3,1,...,2,1,4,1,5,1,4,5,5,2
2,12,18,1,14,2,1,2,2,2,2,...,2,2,3,3,2,2,3,2,2,1
3,16,12,1,15,4,3,1,3,4,2,...,2,2,4,1,4,1,4,4,5,2
4,16,28,0,7,2,3,5,1,3,2,...,3,4,3,1,2,1,5,0,5,1


In [4]:
df.shape

(1100, 21)

In [5]:
df['stress_level'].value_counts()

stress_level
0    373
2    369
1    358
Name: count, dtype: int64

In [6]:
X = df.iloc[:,:20]
y = df.iloc[:,-1]


In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=2)

In [8]:
X_test.shape

(220, 20)

In [9]:
y_test.shape

(220,)

In [10]:
X.shape

(1100, 20)

## Standard Scaler

In [11]:
# scaler
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# PCA

In [49]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

pca = PCA(n_components=0.95)   # preserve  95% of the variance in feture
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
print(f'{pca.n_components_} Number of feature that preserve 95% of the variance out of {df.shape[1]} columns.')


# select only 3 columns out of 21 columns
pca_1 = PCA(n_components=2)
X_train_1 = pca_1.fit_transform(X_train)
X_test_1 = pca_1.transform(X_test)

# make dataframe
pca_columns = [f'PC{i+1}' for i in range(X_train_1.shape[1])]

df_train_pca = pd.DataFrame(X_train_1, columns=pca_columns, index=X_train.index)
df_test_pca = pd.DataFrame(X_test_1, columns=pca_columns, index=X_test.index)
df_train_pca.head()

df_train_pca['target'] = y_train.values
df_test_pca['target'] = y_test.values

# visualization
import plotly.express as px

fig = px.scatter_3d(
    df_train_pca, 
    x='PC1',
    y = 'PC2',
    z = 'target',
    color='target',
    
    opacity=0.7
)

fig.update_traces(marker=dict(size=5))
plt.tight_layout()
fig.show()

lr = LinearRegression()
# checking the r2 score of occupy 95% of the variance feature 
lr.fit(X_train_pca, y_train)
y_pred_pca_1 = lr.predict(X_test_pca)

print(f'R2 score of 95% reserve variance feature: ', r2_score(y_test, y_pred_pca_1))

# checking the r2 score of 3 features only
lr.fit(X_train_1, y_train)

y_pred_pca = lr.predict(X_test_1)

print('R2 score of 3 feature out of 21: ', r2_score(y_test,y_pred_pca))



16 Number of feature that preserve 95% of the variance out of 21 columns.


R2 score of 95% reserve variance feature:  0.7790041343158587
R2 score of 3 feature out of 21:  0.6834921865569367


<Figure size 640x480 with 0 Axes>

In [31]:
df['stress_level'].value_counts()

stress_level
0    373
2    369
1    358
Name: count, dtype: int64

# 1. Linar Regression

In [318]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(X_train,y_train)

# not scaled
y_pred = lr.predict(X_test)
r2_raw = r2_score(y_test, y_pred)
# print('R2 score raw: ', r2_raw)

# scaled
lr.fit(X_train_scaled,y_train)
y_pred_scaled = lr.predict(X_test_scaled)
r2_scaled = r2_score(y_test, y_pred_scaled)
# print('R2 score scaled: ',r2_scaled)

# Train score
lr.fit(X_train, y_train)
y_pred = lr.predict(X_train)
r2_train = r2_score(y_train, y_pred)
# print('R2 train: ', r2_train)

lr.fit(X_train_pca, y_train)
y_pred_pca = lr.predict(X_test_pca)
r2_pca = r2_score(y_test, y_pred_pca)
# print('R2 score PCA: ', r2_pca)

results = pd.DataFrame({
    'Model': ['Linear Regression (raw)',
              'Linear Regression (scaled)',
              'Train Data (not scaled)',
              'Linear Regression (PCA)'],
    'R2 score': [r2_raw, r2_scaled, r2_train, r2_pca]

})
print('='*100)


print('Coefficient: ',lr.coef_)
print('Intercept: ',lr.intercept_)
print('='*100)
results.T


Coefficient:  [ 0.21000335 -0.03950435  0.04024978 -0.01929834  0.03723748  0.01464942
 -0.01238406 -0.070902   -0.01787236 -0.08801033 -0.03137921 -0.04537773
  0.02371959  0.06752427 -0.03509646 -0.03759818]
Intercept:  1.0045454545454546


Unnamed: 0,0,1,2,3
Model,Linear Regression (raw),Linear Regression (scaled),Train Data (not scaled),Linear Regression (PCA)
R2 score,0.761989,0.761989,0.799728,0.779004


# 2. Gradient Decent
### I. Batch Gradient Decent

In [319]:
class BGDRegressor:
    def __init__(self, learning_rate = 0.01, epochs=100):

        self.coef_ = None
        self.intercept_ = None
        self.epochs = epochs
        self.lr = learning_rate
    
    def fit(self, X_train, y_train):
        # init your coef 
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
            #update all the coefficient and intercept
            y_hat = np.dot(X_train, self.coef_) + self.intercept_

            intercept_der = -2 * np.mean(y_train - y_hat)
            self.intercept_ = self.intercept_ - (self.lr * intercept_der)

            coef_der = -2 * np.dot((y_train - y_hat), X_train)/X_train.shape[0]
            self.coef_ = self.coef_ - (self.lr*coef_der)

        print(self.intercept_, self.coef_)

    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [320]:
bgd = BGDRegressor(learning_rate=0.001, epochs=3000)
bgd.fit(X_train, y_train)
y_pred_bgd = bgd.predict(X_test)
r2_bgd = r2_score(y_test, y_pred_bgd)
print('R2 score of bgd', r2_bgd)

-0.2823199430712375 [ 0.00473381 -0.00453389  0.42442978  0.01105934  0.07130695  0.12501489
 -0.01334037  0.02276425  0.06880971  0.02329928 -0.08515757 -0.03177629
 -0.04517854  0.03328774  0.02271181 -0.00091009  0.11910931  0.02173597
  0.05230423  0.03842911]
R2 score of bgd 0.738303170954103


### II. Sochastic Gradient Decent

In [321]:
class SGDRegressor:
    def __init__(self, learning_rate=0.01, epochs=100):
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
    
    def fit(self,X_train,y_train):
        X_train = np.array(X_train)
        y_train = np.array(y_train)

        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
       

        for i in range(self.epochs):
            for j in range(X_train.shape[0]):
                idX = np.random.randint(0, X_train.shape[0])

                y_hat = np.dot(X_train[idX], self.coef_)+self.intercept_
                errors = y_train[idX] - y_hat
                coef_der = -2 * errors*X_train[idX]
                intercept_der = -2 * errors

                self.coef_ -= self.lr*coef_der
                self.intercept_ -= self.lr*intercept_der
        print(self.coef_, self.intercept_)
        return self
    
    def predict(self, X_test):
        X_test = np.array(X_test)
        return np.dot(X_test, self.coef_) + self.intercept_

In [322]:
start = time.time()
sgdr = SGDRegressor(learning_rate=0.0001, epochs=100)
sgdr.fit(X_train, y_train)

y_pred_sgdr = sgdr.predict(X_test)

print('Time taken is ', time.time() - start)
print('R2 score: ', r2_score(y_test, y_pred_sgdr))

[ 0.01313643 -0.00378826  0.09618018  0.01039053  0.08622861  0.01824201
 -0.01412192  0.02519062  0.05456451  0.0163405  -0.06079487 -0.03160753
 -0.04609926  0.04769482  0.054197    0.02255899  0.02642089  0.04900652
  0.07870405  0.04072048] -0.2200591997638437
Time taken is  0.8308029174804688
R2 score:  0.7420989474193502


### SGDRegression using OLS method also Hyperparameter tune

In [343]:
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('sgd', SGDRegressor(loss='squared_error', max_iter=1000, random_state=42))
])

# define hyperameter grid
param_grid = {
    'sgd__alpha': [0.0001,0.001,0.01,0.1],
    'sgd__penalty': ['l2', 'l1', 'elasticnet'],
    'sgd__eta0': [0.001,0.01,0.1,1],
    'sgd__learning_rate': ['constant', 'invscaling', 'adaptive']
}

grid = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    scoring='r2',
    n_jobs=-1
)

grid.fit(X_train,y_train)



In [347]:
print('Best Parameters: ', grid.best_params_)
print('Best CV R2: ', grid.best_score_)

# Test score
y_pred_sgd_ols = grid.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred_sgd_ols))

Best Parameters:  {'sgd__alpha': 0.1, 'sgd__eta0': 0.001, 'sgd__learning_rate': 'constant', 'sgd__penalty': 'elasticnet'}
Best CV R2:  0.7595303285373435
R2 score:  0.7764320554356081


### III. Mini-Batch Gradient Decent

In [388]:
import random

class MBGDRegressor:
    def __init__(self, batch_size, learning_rate=0.01, epochs=100):
        self.coef_ = None
        self.intercept_ = None
        self.batch_size = batch_size
        self.lr = learning_rate
        self.epochs = epochs
    
    def fit(self, X_train_scaled,y_train):
        X_train_scaled = np.array(X_train_scaled)
        y_train = np.array(y_train)

        self.coef_= np.ones(X_train_scaled.shape[1])
        self.intercept_ = 0

        for i in range(self.epochs):
            for j in range(int(X_train_scaled.shape[0]/self.batch_size)):
                idX = random.sample(range(X_train_scaled.shape[0]),self.batch_size)

                y_hat = np.dot(X_train_scaled[idX], self.coef_) + self.intercept_
                error = y_train[idX] - y_hat

                intercept_der = -2 * np.mean(error)
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)

                coef_der = -2 * np.dot(error, X_train_scaled[idX]) / self.batch_size
                self.coef_ = self.coef_ - (self.lr * coef_der)
                
        print(self.intercept_, self.coef_)
    
    def predict(self, X_test_scaled):
        X_test_scaled = np.array(X_test_scaled)
        return np.dot(X_test_scaled, self.coef_) + self.intercept_

In [393]:
mbgdr = MBGDRegressor(batch_size=10, learning_rate=0.001, epochs=1000)
mbgdr.fit(X_train_scaled,y_train)

y_pred_mbgd = mbgdr.predict(X_test_scaled)
print('R2 score: ', r2_score(y_test, y_pred_mbgd))

1.0060917485915923 [ 0.02253234 -0.10830342 -0.00783731  0.08786732  0.1041279  -0.05250833
 -0.03495701  0.00176019  0.06001058 -0.00717742 -0.0803268  -0.08753155
 -0.08657137  0.06193765  0.08004077  0.01431852 -0.0575754   0.06647511
  0.09303445  0.0549518 ]
R2 score:  0.765054840481696


# 3. Polynomial Regression

In [400]:
from sklearn.preprocessing import PolynomialFeatures

pipeline = Pipeline([
    ('poly', PolynomialFeatures(include_bias=False)),
    ('scaler', StandardScaler()),
    ('lr', LinearRegression())
])

param_grid = {
    'poly__degree': [1,2,3,4,5]
}

grid = GridSearchCV(
    pipeline, param_grid, cv=5, scoring='r2', n_jobs=-1
)

grid.fit(X_train,y_train)

In [410]:
y_pred_poly = grid.predict(X_test)
print('R2 score: ', r2_score(y_test, y_pred_poly))
best_model = grid.best_estimator_

intercept = best_model.named_steps['lr'].intercept_
coef = best_model.named_steps['lr'].coef_

print('Intercept: ', intercept)
print('Coefficient: ', coef)


R2 score:  0.7619894236717223
Intercept:  1.0045454545454546
Coefficient:  [ 0.01927276 -0.09330182 -0.01608164  0.09052705  0.10374348 -0.05772738
 -0.04526235  0.01406578  0.08292079 -0.01886884 -0.07398967 -0.08051055
 -0.07815902  0.05710592  0.08281161  0.0160257  -0.06776027  0.06562305
  0.09823088  0.05538597]


# Ridge Regression

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge()

param_grid = {
    'alpha': np.logspace(-3,3,20)
}

grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)

print('Best alpha: ', grid_search.best_params_['alpha'])
print('Best CV score (R2): ', grid_search.best_score_)
grid_search.best_estimator_
 

Best alpha:  483.2930238571752
Best CV score (R2):  0.7629090275110035
