In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/Book.csv')

In [3]:
df.head()

Unnamed: 0,No,Date,SO2,NOx,RSPM,SPM,AQI
0,1,18-06-2015,10,14,43,115,43
1,2,22-06-2015,10,12,40,88,40
2,3,25-06-2015,9,12,36,81,36
3,4,29-06-2015,11,16,51,133,51
4,5,02-07-2015,11,17,55,146,55


In [4]:
df = df.drop(columns=['No', 'Date'], axis=1)

In [5]:
df.head()

Unnamed: 0,SO2,NOx,RSPM,SPM,AQI
0,10,14,43,115,43
1,10,12,40,88,40
2,9,12,36,81,36
3,11,16,51,133,51
4,11,17,55,146,55


In [17]:
X = df.drop(labels=['AQI'], axis=1)
y = df[['AQI']]

In [19]:
numerical_columns = [cols for cols in X.columns if X.dtypes[cols] != 'object']
categorical_columns = [cols for cols in X.columns if X.dtypes[cols] == 'object']
print(numerical_columns)
print(categorical_columns)

['SO2', 'NOx', 'RSPM', 'SPM']
[]


In [8]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [20]:
num_pipeline = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scalar', StandardScaler())
    ]
)

preprocessor = ColumnTransformer(
    [
        ('numerical_pipeline', num_pipeline, numerical_columns)
    ]
)

In [11]:
df.shape

(2216, 5)

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
X_train.head()

Unnamed: 0,SO2,NOx,RSPM,SPM
198,10,19,33,70
49,13,29,71,166
173,15,29,76,203
2092,21,31,77,153
715,9,13,46,99


In [23]:
y_train.head()

Unnamed: 0,AQI
198,33
49,71
173,76
2092,77
715,46


In [24]:
X_train_scaled = preprocessor.fit_transform(X_train)
X_test_scaled = preprocessor.transform(X_test)

In [15]:
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [29]:
regressor = LinearRegression()
regressor.fit(X_train_scaled, y_train)

In [30]:
regressor.coef_

array([[ 2.28689443, -2.15273315, 26.98869332, -0.38466892]])

In [31]:
regressor.intercept_

array([86.26410835])

In [28]:
def Model_Evaluation(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r_square = r2_score(true, predicted)
    
    return mae, mse, rmse, r_square

In [36]:
models = {
    'linear_regressor' : LinearRegression(),
    'lasso' : Lasso(),
    'ridge' : Ridge(),
    'elastic_net' : ElasticNet()
}

trained_model_list = []
model_list = []
r2_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mae, mse, rmse, r_square = Model_Evaluation(y_test, y_pred)
    
    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])
    
    print("Model Training Performance")
    print("Mean Absolute Error : ", mae)
    print("Mean Squared Error : ", mse)
    print("Root Mean Squared Error : ", rmse)
    print("R2 Score : ", r_square)
    
    print("-"*40)
    print('\n')

linear_regressor
Model Training Performance
Mean Absolute Error :  2.0785006043347116
Mean Squared Error :  10.79127940543238
Root Mean Squared Error :  3.285008280877292
R2 Score :  0.9848765434089816
----------------------------------------


lasso
Model Training Performance
Mean Absolute Error :  2.1678844510891
Mean Squared Error :  10.989078714457502
Root Mean Squared Error :  3.3149779357421827
R2 Score :  0.984599337236165
----------------------------------------


ridge
Model Training Performance
Mean Absolute Error :  2.078508970488699
Mean Squared Error :  10.79122402531719
Root Mean Squared Error :  3.2849998516464485
R2 Score :  0.9848766210215367
----------------------------------------


elastic_net
Model Training Performance
Mean Absolute Error :  2.1206339094412123
Mean Squared Error :  10.771283317003576
Root Mean Squared Error :  3.2819633326720115
R2 Score :  0.9849045669605904
----------------------------------------


