# Predicción con modelos de ml

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LinearRegression as LinReg

from sklearn import metrics
import numpy as np

## Preparamos los datos

In [2]:
df = pd.read_csv("datos_limpios.csv")

In [3]:
dicc_gas = {'E10':1,'SP98':0}

In [4]:
df['gas_num'] = df.gas_type.map(dicc_gas)

### Dividimos el data set en test y train y variable objetivo __CONSUMO__


In [5]:
X = df.drop(["consume","gas_type","refill gas"], axis=1)

In [6]:
X.head()

Unnamed: 0,distance,speed,temp_inside,temp_outside,AC,rain,sun,refill liters,snow,gas_num
0,28.0,26,21.5,12,0,0,0,45.0,0,1
1,12.0,30,21.5,13,0,0,0,0.0,0,1
2,11.2,38,21.5,15,0,0,0,0.0,0,1
3,12.9,36,21.5,14,0,0,0,0.0,0,1
4,18.5,46,21.5,15,0,0,0,0.0,0,1


In [7]:
y = df.consume

In [8]:
y.head()

0    5.0
1    4.2
2    5.5
3    3.9
4    4.5
Name: consume, dtype: float64

In [9]:
X_train, X_test, y_train, y_test = tts(X,y, test_size=0.2)

### Regresión lineal

In [10]:
linreg=LinReg()

In [11]:
linreg

LinearRegression()

In [12]:
linreg.fit(X_train, y_train)

LinearRegression()

In [13]:
linreg.coef_

array([ 1.97757845e-04, -1.66618731e-02, -2.32938281e-02, -3.63386599e-02,
        2.20376759e-01,  6.78086903e-01, -1.48729909e-01, -1.39331719e-02,
       -6.20388498e-01,  1.28095197e-01])

In [14]:
linreg.intercept_

6.393116107167958

In [15]:
y_pred = linreg.predict(X_test)

In [16]:
print('MAE - Error Medio Absoluto', metrics.mean_absolute_error(y_test, y_pred))
print('MSE - Error Cuadratico Medio', metrics.mean_squared_error(y_test, y_pred))
print('RMSE - Raiz Error Cuadratico Medio', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('R2 - Coeficiente de Determinacion', metrics.r2_score(y_test, y_pred))

MAE - Error Medio Absoluto 0.7356792679416676
MSE - Error Cuadratico Medio 1.454792770768292
RMSE - Raiz Error Cuadratico Medio 1.2061479058425182
R2 - Coeficiente de Determinacion 0.1752586502400073


### Intentamos otros modelos

In [17]:
from sklearn.linear_model import Ridge, Lasso
from sklearn.linear_model import SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR

In [18]:
models={
    'ridge': Ridge(),
    'lasso': Lasso(),
    'sgd': SGDRegressor(),
    'knn': KNeighborsRegressor(),
    'grad': GradientBoostingRegressor(),
}

In [19]:
for name, model in models.items():
    print("ENTRENANDO: ", name)
    model.fit(X_train, y_train)

ENTRENANDO:  ridge
ENTRENANDO:  lasso
ENTRENANDO:  sgd
ENTRENANDO:  knn
ENTRENANDO:  grad


In [20]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"------{name}------")
    print('MAE - ', metrics.mean_absolute_error(y_test, y_pred))
    print('MSE - ', metrics.mean_squared_error(y_test, y_pred))
    print('RMSE - ', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    print('R2 - ', metrics.r2_score(y_test, y_pred))

------ridge------
MAE -  0.7300767974177708
MSE -  1.4454895985365335
RMSE -  1.2022851569143376
R2 -  0.18053274217780135
------lasso------
MAE -  0.7424374234004726
MSE -  1.5899314532927007
RMSE -  1.260924840461437
R2 -  0.09864673569831794
------sgd------
MAE -  881510283237.197
MSE -  1.9438513266773047e+24
RMSE -  1394220687939.074
R2 -  -1.1019951426139812e+24
------knn------
MAE -  0.5707692307692308
MSE -  0.8151794871794872
RMSE -  0.9028729075454015
R2 -  0.537863918447977
------grad------
MAE -  0.5231787826473275
MSE -  0.8836441606203987
RMSE -  0.9400234893982164
R2 -  0.4990503854698647


### Como conclusión no parece que sea posible predecir con precisión el consumo