In [45]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split,KFold,LeaveOneOut
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet,LassoCV,RidgeCV,ElasticNetCV
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt

In [46]:
notebook_path = os.getcwd()
# Datasets path
# auto
csv_train_l_path = f"{notebook_path}/data/energy-train-l.csv"
csv_test_l_path= f"{notebook_path}/data/energy-test-l.csv"

In [47]:
df = pd.read_csv(csv_train_l_path)
train_set = np.array(df)

x_train = train_set[:, :-1]
y_train = train_set[:, -1:]

df = pd.read_csv(csv_test_l_path)
test_set = np.array(df)

x_test = test_set[:, :-1]
y_test = test_set[:, -1:]

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((1168, 1), (1168, 1), (292, 1), (292, 1))

## Hold-out

In [48]:
x_train_spl,x_val_spl,y_train_spl,y_val_spl=train_test_split(x_train,y_train,test_size=0.2,random_state=2,shuffle=True,)

print(x_train_spl.shape,y_train_spl.shape,x_val_spl.shape,y_val_spl.shape)

(934, 1) (934, 1) (234, 1) (234, 1)


In [49]:
# plt.scatter(x_train_spl,y_train_spl,s=2)
# plt.scatter(x_val_spl,y_val_spl,s=2)
# plt.scatter(x_test,y_test,s=2)
# plt.show()

In [50]:
model=LinearRegression()
model.fit(x_train_spl,y_train_spl)
model.score(x_val_spl,y_val_spl)

0.12012137370995768

## KFold

In [51]:
kf=KFold(n_splits=5,random_state=2,shuffle=True)#3,5,10
# list(kf.split(x_train))
scores=[]
for i,(index_train_spl,index_val_spl) in enumerate(kf.split(x_train))  :
    x_train_spl=x_train[index_train_spl]
    y_train_spl=y_train[index_train_spl]
    
    x_val_spl=x_train[index_val_spl]
    y_val_spl=y_train[index_val_spl]
    
    # print(x_train_spl.shape,y_train_spl.shape,x_val_spl.shape,y_val_spl.shape, np.mean(x_train_spl))
    
    model=LinearRegression()
    model.fit(x_train_spl,y_train_spl)
    s=model.score(x_val_spl,y_val_spl)
    scores.append(s)
    # print(f"{i}- {s}")
print(f"Scores\nmean: {np.mean(scores)}, std: {np.std(scores)}")
print(scores)

Scores
mean: 0.10608457807363587, std: 0.02783433019374388
[0.12012137370995746, 0.05715297143125864, 0.09755915651902536, 0.13935053304782052, 0.11623885566011738]


## Leave One Out(LOO)

In [52]:
# import warnings
# # warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
# warnings.filterwarnings("ignore")


degree=3

loo=LeaveOneOut()

polynomial=PolynomialFeatures(degree)

scores=[]
for i,(index_train_spl,index_val_spl) in enumerate(loo.split(x_train))  :
    x_train_spl=x_train[index_train_spl]
    y_train_spl=y_train[index_train_spl]
    x_train_spl_poly=polynomial.fit_transform(x_train_spl)
    
    x_val_spl=x_train[index_val_spl]
    y_val_spl=y_train[index_val_spl]
    x_val_spl_poly=polynomial.fit_transform(x_val_spl)
    
    # print(x_train_spl.shape,y_train_spl.shape,x_val_spl.shape,y_val_spl.shape, np.mean(x_train_spl))
    
    # model=LinearRegression()
    # model=Lasso(alpha=0.01,tol=.000000001)
    model=Ridge(alpha=0.5)
    # model=ElasticNet()
    model.fit(x_train_spl_poly,y_train_spl)
    y_pred=model.predict(x_val_spl_poly)
    # s=model.score(x_val_spl_poly,y_val_spl)
    s=mean_absolute_error(y_val_spl,y_pred)
    # s=mean_squared_error(y_val_spl,y_pred)
    # s=r2_score(y_val_spl,y_pred)
    scores.append(s)
    # print(f"{i}- {s}")
print(f"Scores\nmean: {np.mean(scores)}, std: {np.std(scores)}")
print(scores)

Scores
mean: 0.4062037146414321, std: 0.34989149815521003
[0.2734612397393372, 0.7453259609012526, 0.3573211618598203, 0.25141228918017094, 0.01935980058732889, 0.06417666610977257, 0.46694017780104524, 0.42037369134534597, 0.5307330514326165, 0.3444619216572924, 0.28466637306448694, 0.039579011919119056, 0.6012265944181906, 0.4164079285738993, 0.6359882068347043, 0.5472568199338359, 0.49740733527275444, 0.049902402044701644, 0.3972957088617023, 0.5157833782822682, 0.09438278766273767, 0.39958725554793234, 0.9947579502400624, 0.07743660093994123, 0.12119787421016126, 0.004207246893411787, 0.7056464781562077, 0.4674096114617699, 0.04887969066060696, 1.788187269687735, 1.037226835292943, 0.4654249795875759, 0.23911209939552058, 0.11978761887855138, 0.007193969505011777, 0.19593599968504682, 0.1519965515754948, 0.3478952208346646, 0.4192798548802137, 0.2644010789682158, 0.2732781894630043, 0.7163193137561155, 0.22517750547932647, 0.16183564051729338, 0.07658545243178816, 0.052016584859756

In [53]:
# retrain
model=Ridge(alpha=0.5)
x_train_poly=polynomial.fit_transform(x_train)
x_test_poly=polynomial.fit_transform(x_test)

model.fit(x_train_poly,y_train)

y_pred=model.predict(x_test_poly)
s=mean_absolute_error(y_test,y_pred)
print(s,r2_score(y_test,y_pred))

0.4234659782030165 0.699455490246452


# [Ridge,Lasso,ElasticNet] + CV

## RidgeCV

In [54]:
model=RidgeCV(alphas=[100,20,15,14,13,12,11,10,9,8,7,6,5,1,0.1,0.01,0.001],cv=5)
model.fit(x_train,y_train)
score=model.score(x_test,y_test)
print(f"score: {score}")
print(f"alpha: {model.alpha_}")

#retrain
model=Ridge(alpha=model.alpha_)
model.fit(x_train,y_train)
score=model.score(x_test,y_test)
print(f"score: {score}")

score: 0.11981972668352725
alpha: 9.0
score: 0.11981972668352725


## LassoCV

In [55]:
model=LassoCV(alphas=[100,20,15,14,13,12,11,10,9,8,7,6,5,1,0.1,0.01,0.001,0.0001,0.00001,0.000001],cv=5)
model.fit(x_train,y_train)
score=model.score(x_test,y_test)
print(f"score: {score}")
print(f"alpha: {model.alpha_}")

#retrain
model=Lasso(alpha=model.alpha_)
model.fit(x_train,y_train)
score=model.score(x_test,y_test)
print(f"score: {score}")

score: 0.11984440607250268
alpha: 1e-06
score: 0.11984440607250268


  y = column_or_1d(y, warn=True)


## ElasticNetCV

In [64]:
model=ElasticNetCV(alphas=[100,20,15,14,13,12,11,10,9,8,7,6,5,1,0.1,0.01,0.001,0.0001,0.00001,0.000001],cv=5,l1_ratio=[0.1,0.9])
model.fit(x_train,y_train)
score=model.score(x_test,y_test)
print(f"score: {score}")
print(f"alpha: {model.alpha_}")

#retrain
model=ElasticNet(alpha=model.alpha_)
model.fit(x_train,y_train)
score=model.score(x_test,y_test)
print(f"score: {score}")

score: 0.11984146245438954
alpha: 0.001
score: 0.11983934776176508


  y = column_or_1d(y, warn=True)
