In [1]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
data = pd.read_csv("iot_telemetry_data.csv")
data.head()

Unnamed: 0,ts,device,co,humidity,light,lpg,motion,smoke,temp
0,1594512000.0,b8:27:eb:bf:9d:51,0.004956,51.0,False,0.007651,False,0.020411,22.7
1,1594512000.0,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001
2,1594512000.0,b8:27:eb:bf:9d:51,0.004976,50.9,False,0.007673,False,0.020475,22.6
3,1594512000.0,1c:bf:ce:15:ec:4d,0.004403,76.800003,True,0.007023,False,0.018628,27.0
4,1594512000.0,b8:27:eb:bf:9d:51,0.004967,50.9,False,0.007664,False,0.020448,22.6


In [4]:
from sklearn.preprocessing import LabelEncoder

encLab = LabelEncoder()
deviceEnc = pd.DataFrame(encLab.fit_transform(data['device']), columns=['device_condition'])
deviceEnc.index = data.index

dataEnc = pd.concat([data, deviceEnc], axis=1)
dataEnc.head()

Unnamed: 0,ts,device,co,humidity,light,lpg,motion,smoke,temp,device_condition
0,1594512000.0,b8:27:eb:bf:9d:51,0.004956,51.0,False,0.007651,False,0.020411,22.7,2
1,1594512000.0,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001,0
2,1594512000.0,b8:27:eb:bf:9d:51,0.004976,50.9,False,0.007673,False,0.020475,22.6,2
3,1594512000.0,1c:bf:ce:15:ec:4d,0.004403,76.800003,True,0.007023,False,0.018628,27.0,1
4,1594512000.0,b8:27:eb:bf:9d:51,0.004967,50.9,False,0.007664,False,0.020448,22.6,2


In [5]:
encLab2 = LabelEncoder()
lightEnc = pd.DataFrame(encLab2.fit_transform(dataEnc['light']), columns=['lights'])
lightEnc.index = dataEnc.index

dataEnc2 = pd.concat([dataEnc, lightEnc], axis=1)
dataEnc2.head()

Unnamed: 0,ts,device,co,humidity,light,lpg,motion,smoke,temp,device_condition,lights
0,1594512000.0,b8:27:eb:bf:9d:51,0.004956,51.0,False,0.007651,False,0.020411,22.7,2,0
1,1594512000.0,00:0f:00:70:91:0a,0.00284,76.0,False,0.005114,False,0.013275,19.700001,0,0
2,1594512000.0,b8:27:eb:bf:9d:51,0.004976,50.9,False,0.007673,False,0.020475,22.6,2,0
3,1594512000.0,1c:bf:ce:15:ec:4d,0.004403,76.800003,True,0.007023,False,0.018628,27.0,1,1
4,1594512000.0,b8:27:eb:bf:9d:51,0.004967,50.9,False,0.007664,False,0.020448,22.6,2,0


In [7]:
dataProcessed = dataEnc2.drop(['device', 'light'], axis=1)
dataProcessed.head()

Unnamed: 0,ts,co,humidity,lpg,motion,smoke,temp,device_condition,lights
0,1594512000.0,0.004956,51.0,0.007651,False,0.020411,22.7,2,0
1,1594512000.0,0.00284,76.0,0.005114,False,0.013275,19.700001,0,0
2,1594512000.0,0.004976,50.9,0.007673,False,0.020475,22.6,2,0
3,1594512000.0,0.004403,76.800003,0.007023,False,0.018628,27.0,1,1
4,1594512000.0,0.004967,50.9,0.007664,False,0.020448,22.6,2,0


In [8]:
data_limpia_input = dataProcessed.copy()
removedCol = ['lpg', 'smoke', 'co']

data_limpia = data_limpia_input.drop(removedCol, axis=1)
data_limpia.head()

Unnamed: 0,ts,humidity,motion,temp,device_condition,lights
0,1594512000.0,51.0,False,22.7,2,0
1,1594512000.0,76.0,False,19.700001,0,0
2,1594512000.0,50.9,False,22.6,2,0
3,1594512000.0,76.800003,False,27.0,1,1
4,1594512000.0,50.9,False,22.6,2,0


In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [22]:
X_cols = list(set(data_limpia.columns)-set(['ts','device_condition']))
y_cols = ['device_condition']

X = data_limpia[X_cols].values
y = data_limpia[y_cols].values

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

sc_x = StandardScaler().fit(X)
sc_y = StandardScaler().fit(y)

X_train = sc_x.transform(X_train)
X_test = sc_x.transform(X_test)
y_train = sc_y.transform(y_train)
y_test = sc_y.transform(y_test)

In [35]:
modelo_lasso = Lasso(alpha=0.02).fit(X_train, y_train)

In [40]:
y_predict_lasso = modelo_lasso.predict(X_test)
y_predict_lasso.reshape(-1, 1)

array([[ 0.30498092],
       [ 0.70586524],
       [-0.46790713],
       ...,
       [ 0.31270969],
       [-1.30364394],
       [ 0.73779812]])

In [37]:
from sklearn.metrics import mean_squared_error
import sklearn.metrics as metrics

ridge_loss = mean_squared_error(y_test, y_predict_lasso)
r2 = metrics.r2_score(y_test, y_predict_lasso)

print('r2 = ',r2)
print('mse = ',ridge_loss)


r2 =  0.819162915916624
mse =  0.18058570929347662
