In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model   import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import sklearn.metrics as metrics

In [2]:
data = pd.read_csv("iot_telemetry_data.csv")
data.head()

Unnamed: 0,ts,device,co,humidity,light,lpg,motion,smoke,temp
0,1594512000.0,b8:27:eb:bf:9d:51,0.004956,51.0,False,0.007651,False,0.020411,22.7
1,1594512000.0,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001
2,1594512000.0,b8:27:eb:bf:9d:51,0.004976,50.9,False,0.007673,False,0.020475,22.6
3,1594512000.0,1c:bf:ce:15:ec:4d,0.004403,76.800003,True,0.007023,False,0.018628,27.0
4,1594512000.0,b8:27:eb:bf:9d:51,0.004967,50.9,False,0.007664,False,0.020448,22.6


In [3]:
from sklearn.preprocessing import LabelEncoder

encLab = LabelEncoder()
deviceEnc = pd.DataFrame(encLab.fit_transform(data['device']), columns=['device_condition'])
deviceEnc.index = data.index

dataEnc = pd.concat([data, deviceEnc], axis=1)
dataEnc.head()

Unnamed: 0,ts,device,co,humidity,light,lpg,motion,smoke,temp,device_condition
0,1594512000.0,b8:27:eb:bf:9d:51,0.004956,51.0,False,0.007651,False,0.020411,22.7,2
1,1594512000.0,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001,0
2,1594512000.0,b8:27:eb:bf:9d:51,0.004976,50.9,False,0.007673,False,0.020475,22.6,2
3,1594512000.0,1c:bf:ce:15:ec:4d,0.004403,76.800003,True,0.007023,False,0.018628,27.0,1
4,1594512000.0,b8:27:eb:bf:9d:51,0.004967,50.9,False,0.007664,False,0.020448,22.6,2


In [4]:
encLab2 = LabelEncoder()
lightEnc = pd.DataFrame(encLab2.fit_transform(dataEnc['light']), columns=['lights'])
lightEnc.index = dataEnc.index

dataEnc2 = pd.concat([dataEnc, lightEnc], axis=1)
dataEnc2.head()

Unnamed: 0,ts,device,co,humidity,light,lpg,motion,smoke,temp,device_condition,lights
0,1594512000.0,b8:27:eb:bf:9d:51,0.004956,51.0,False,0.007651,False,0.020411,22.7,2,0
1,1594512000.0,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001,0,0
2,1594512000.0,b8:27:eb:bf:9d:51,0.004976,50.9,False,0.007673,False,0.020475,22.6,2,0
3,1594512000.0,1c:bf:ce:15:ec:4d,0.004403,76.800003,True,0.007023,False,0.018628,27.0,1,1
4,1594512000.0,b8:27:eb:bf:9d:51,0.004967,50.9,False,0.007664,False,0.020448,22.6,2,0


In [5]:
dataProcessed = dataEnc2.drop(['device', 'light'], axis=1)
dataProcessed.head()

Unnamed: 0,ts,co,humidity,lpg,motion,smoke,temp,device_condition,lights
0,1594512000.0,0.004956,51.0,0.007651,False,0.020411,22.7,2,0
1,1594512000.0,0.00284,76.0,0.005114,False,0.013275,19.700001,0,0
2,1594512000.0,0.004976,50.9,0.007673,False,0.020475,22.6,2,0
3,1594512000.0,0.004403,76.800003,0.007023,False,0.018628,27.0,1,1
4,1594512000.0,0.004967,50.9,0.007664,False,0.020448,22.6,2,0


In [6]:
data_limpia_input = dataProcessed.copy()
removedCol = ['lpg', 'smoke', 'co']

data_limpia = data_limpia_input.drop(removedCol, axis=1)
data_limpia.head()

Unnamed: 0,ts,humidity,motion,temp,device_condition,lights
0,1594512000.0,51.0,False,22.7,2,0
1,1594512000.0,76.0,False,19.700001,0,0
2,1594512000.0,50.9,False,22.6,2,0
3,1594512000.0,76.800003,False,27.0,1,1
4,1594512000.0,50.9,False,22.6,2,0


In [8]:
X_cols = list(set(data_limpia.columns)-set(['ts','device_condition']))
y_cols = ['device_condition']

X = data_limpia[X_cols].values
y = data_limpia[y_cols].values

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

sc_x = StandardScaler().fit(X)
sc_y = StandardScaler().fit(y)

X_train = sc_x.transform(X_train)
X_test = sc_x.transform(X_test)
y_train = sc_y.transform(y_train)
y_test = sc_y.transform(y_test)

In [12]:
modelo_lineal = LinearRegression().fit(X_train, y_train)
y_predict_linear = modelo_lineal.predict(X_test)

modelo_lasso = Lasso(alpha=0.02).fit(X_train, y_train)
y_predict_lasso = modelo_lasso.predict(X_test)

modelo_ridge = Ridge(alpha=1).fit(X_train, y_train)
y_predict_ridge = modelo_ridge.predict(X_test)

In [21]:
print("="*82)
print("Metricas para el modelo de LinearRegression")
print("")

linear_loss = mean_squared_error(y_test, y_predict_linear)
r2_linear = metrics.r2_score(y_test, y_predict_linear)
rmse_linear = mean_squared_error(y_test, y_predict_linear, squared = False)

print('Coeficiente de Determinacion R2 = ',r2_linear)
print('Error Cuadratico Medio (Mean Squared Error - MSE) = ',linear_loss)
print('Raiz Cuadrada del Error Cuadratico Medio (RMSE) = ',rmse_linear)

print("="*82)
print("Metricas para el modelo de Lasso")
print("")

lasso_loss = mean_squared_error(y_test, y_predict_lasso)
r2_lasso = metrics.r2_score(y_test, y_predict_lasso)
rmse_lasso = mean_squared_error(y_test, y_predict_lasso, squared = False)

print('Coeficiente de Determinacion R2 = ',r2_lasso)
print('Error Cuadratico Medio (Mean Squared Error - MSE) = ',lasso_loss)
print('Raiz Cuadrada del Error Cuadratico Medio (RMSE) = ',rmse_lasso)

print("="*82)
print("Metricas para el modelo de Ridge")
print("")

ridge_loss = mean_squared_error(y_test, y_predict_ridge)
r2 = metrics.r2_score(y_test, y_predict_ridge)
rmse_ridge = mean_squared_error(y_test, y_predict_ridge, squared = False)

print('Coeficiente de Determinacion R2 = ',r2)
print('Error Cuadratico Medio (Mean Squared Error - MSE) = ',ridge_loss)
print('Raiz Cuadrada del Error Cuadratico Medio (RMSE) = ',rmse_ridge)

Metricas para el modelo de LinearRegression

Coeficiente de Determinacion R2 =  0.8232507139737045
Error Cuadratico Medio (Mean Squared Error - MSE) =  0.17650359352984252
Raiz Cuadrada del Error Cuadratico Medio (RMSE) =  0.4201233075298757
Metricas para el modelo de Lasso

Coeficiente de Determinacion R2 =  0.8191629075160579
Error Cuadratico Medio (Mean Squared Error - MSE) =  0.1805857176823654
Raiz Cuadrada del Error Cuadratico Medio (RMSE) =  0.42495378299571046
Metricas para el modelo de Ridge

Coeficiente de Determinacion R2 =  0.823250713047305
Error Cuadratico Medio (Mean Squared Error - MSE) =  0.17650359445495425
Raiz Cuadrada del Error Cuadratico Medio (RMSE) =  0.42012330863087594


In [15]:
print("="*32)
print("coeficientes de lasso")
print(modelo_lasso.coef_)

print("="*32)
print("coeficientes de ridge")
print(modelo_ridge.coef_)


coeficientes de lasso
[ 0.         -0.8785011  -0.         -0.05146773]
coeficientes de ridge
[[ 0.11717151 -0.84155921 -0.00257962 -0.16197954]]
