In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression

In [4]:
data = pd.read_csv('data/sim_lin_reg.csv')
data.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [5]:
X = data.iloc[:,0:1]
y = data.iloc[:,-1:]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [6]:
lr = LinearRegression()
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)

## Mean Absolute Error

In [11]:
mae = mean_absolute_error(y_test,y_pred)
print('Mean Absolute Error == ',mae)

Mean Absolute Error ==  0.2884710931878175


## Mean Squared Error

In [12]:
mse = mean_squared_error(y_test,y_pred)
print('Mean Squared Error == ',mse)

Mean Squared Error ==  0.12129235313495527


## Root Mean Squared Error

In [13]:
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
print('Root Mean Squared Error == ',rmse)

Root Mean Squared Error ==  0.34827051717731616


## R2_score

In [14]:
r2 = r2_score(y_test,y_pred)
print('R2 score == ',r2)

R2 score ==  0.780730147510384


## Adjusted R2_score

In [15]:
n = X_test.shape[0]
k = X_test.shape[1]
adjusted_r2 = 1- (((1-r2)*(n-1))/(n-k-1))
print('Adjusted R2 score == ',adjusted_r2)

Adjusted R2 score ==  0.7749598882343415


# Practice

In [1]:
import pandas as pd
import numpy as np

In [7]:
np.random.seed(2)
house_prices=pd.read_csv("data/HousingData.csv")

In [8]:
house_prices["CRIM"].fillna(house_prices["CRIM"].mean(),inplace=True)
house_prices["ZN"].fillna(house_prices["ZN"].mean(),inplace=True)
house_prices["INDUS"].fillna(house_prices["INDUS"].mean(),inplace=True)
house_prices["CHAS"].fillna(house_prices["CHAS"].mean(),inplace=True)
house_prices["AGE"].fillna(house_prices["AGE"].mean(),inplace=True)
house_prices["LSTAT"].fillna(house_prices["LSTAT"].mean(),inplace=True)

In [9]:
np.random.seed(2)
X=house_prices.drop("MEDV",axis=1)
y=house_prices["MEDV"]

In [14]:
np.random.seed(2)
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
X_train, X_test, y_train, y_test=train_test_split(X,
                                                  y,
                                                  test_size=0.2)
reg=RandomForestRegressor()
reg.fit(X_train,y_train)
y_preds=reg.predict(X_test)
r2_score=reg.score(X_test,y_test)
print(f"R2- SCORE : {r2_score*100:.2f}%")

R2- SCORE : 87.08%


In [18]:
from sklearn.metrics import mean_absolute_error
mae= mean_absolute_error(y_test,y_preds)
print(f"Mean Absolute Error(MAE) : {mae : .2f}")

Mean Absolute Error(MAE) :  2.32


In [23]:
df=pd.DataFrame(data={"Actual Values":y_test,
                      "Predicted Values":y_preds})
df["Differences"]=df["Actual Values"]-df["Predicted Values"]
diff=abs(df["Differences"]).mean()
diff    #MEAN ABSOLUTE ERROR

2.3241078431372517

In [26]:
from sklearn.metrics import mean_squared_error
mse=mean_squared_error(y_test,y_preds)
print(f"Mean Squared Error(MSE) : {mse : .2f}")

Mean Squared Error(MSE) :  10.81


In [27]:
df=pd.DataFrame(data={"Actual Values":y_test,
                      "Predicted Values":y_preds})
df["Differences"]=df["Actual Values"]-df["Predicted Values"]
diff=((df["Differences"])**2).mean()
diff  #MEAN SQUARED ERROR

10.80527655882352

In [29]:
np.random.seed(2)
from sklearn.model_selection import cross_val_score
cv_r2_score=cross_val_score(reg,X,y,cv=5, scoring="r2")
cv_r2_score

array([0.77033873, 0.84576532, 0.70889646, 0.44088099, 0.4521411 ])

In [35]:
np.random.seed(2)
cv_mae=cross_val_score(reg,X,y,cv=5, scoring="neg_mean_absolute_error")
cv_mae*-1

array([2.10104902, 2.70831683, 3.4410198 , 4.00074257, 3.02434653])