In [1]:
# Importation des packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import Lasso

In [2]:
# Importer les données
df = pd.read_csv("Fuel_Consumption.csv")
df.head()

Unnamed: 0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_COMB,CO2EMISSIONS
0,2.0,4,8.5,196
1,2.4,4,9.6,221
2,1.5,4,5.9,136
3,3.5,6,11.1,255
4,3.5,6,10.6,244


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1067 entries, 0 to 1066
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   ENGINESIZE            1067 non-null   float64
 1   CYLINDERS             1067 non-null   int64  
 2   FUELCONSUMPTION_COMB  1067 non-null   float64
 3   CO2EMISSIONS          1067 non-null   int64  
dtypes: float64(2), int64(2)
memory usage: 33.5 KB


In [4]:
# Fractionner les données
x = df.drop("CO2EMISSIONS", axis = 1).values
y = df.CO2EMISSIONS.values

In [5]:
# Normalisation de x
x = StandardScaler().fit_transform(x)

In [6]:
# Splitter les données
x_train, x_test, y_train, y_test =  train_test_split(x,y, test_size=  0.2, random_state= 42)
x_val, x_test, y_val, y_test =  train_test_split(x_test, y_test,test_size=  0.5, random_state= 42)


In [7]:
# Afficher les dimensions
print("Dimension des données train: ", x_train.shape,y_train.shape)
print("Dimension des données de validation: ", x_val.shape,y_val.shape)
print("Dimension des données test: ", x_test.shape,y_test.shape)

Dimension des données train:  (853, 3) (853,)
Dimension des données de validation:  (107, 3) (107,)
Dimension des données test:  (107, 3) (107,)


In [8]:
# Initialisation du modèle
lr = LinearRegression()
# Entrainement du modèle
lr.fit(x_train, y_train)


In [9]:
# Calculer la metrique avec R2 score et Mean Squared Error
y_pred = lr.predict(x_val)
a = f"R2 score: {r2_score(y_pred, y_val)} ------- Mean squared error: {mean_squared_error(y_pred, y_val),}"
print(a)

R2 score: 0.8454049713860013 ------- Mean squared error: (589.7942470139345,)


In [10]:
# Recherche de paramètre optimal
params = {
    "alpha" : np.logspace(-8,8,100)
}
rr = Ridge()
grid_rr = GridSearchCV(rr,params, cv = 5)
grid_rr.fit(x_train, y_train)




In [11]:
# Print du best params
grid_rr.best_params_

{'alpha': 7.742636826811277}

In [12]:
# Initialiser avec l'alpha optimal
Rid_r = Ridge(alpha = 16.297508346206467 )
# Entrainer le modèle
Rid_r.fit(x_train, y_train)

In [13]:
# calculer le R2 et le mean squared error
y_pred = Rid_r.predict(x_val)
b =f"R2 score: {r2_score(y_pred, y_val)} ------- Mean squared error: {mean_squared_error(y_pred, y_val),}"
print(b)

R2 score: 0.8406207227831229 ------- Mean squared error: (595.3000428823991,)


In [14]:
# Recherche de paramètre optimal
params = {
    "alpha" : np.logspace(-8,8,100)
}
lar = Lasso()
grid_lar = GridSearchCV(lar,params, cv = 5)
grid_lar.fit(x_train, y_train)

# Afficher l'alpha optimal
grid_lar.best_params_


{'alpha': 0.12915496650148853}

In [15]:
# Initialiser le modèle
lar = Lasso(alpha = 0.12915496650148853)
# Entrainer le modèle
lar.fit(x_train, y_train)

In [16]:
# calculer le R2 et le mean squared error
y_pred = lar.predict(x_val)
c = f"R2 score: {r2_score(y_pred, y_val)} ------- Mean squared error: {mean_squared_error(y_pred, y_val)}"


### Prediction 

In [17]:
# Prediction
x_test_3 = x_test[:3]
# predire l'espérence de vie des 3 zones
y_pred_3 = lr.predict(x_test_3)
# Afficher les prédictions
for i in range(0,3):
  print(f"Le type {i} a emet {np.round(y_pred_3[i],2)}")

Le type 0 a emet 307.73
Le type 1 a emet 197.41
Le type 2 a emet 185.93


In [18]:
a,b,c

('R2 score: 0.8454049713860013 ------- Mean squared error: (589.7942470139345,)',
 'R2 score: 0.8406207227831229 ------- Mean squared error: (595.3000428823991,)',
 'R2 score: 0.8443816981739393 ------- Mean squared error: 590.9882326842314')