In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import date, time, datetime, timedelta

from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression

from sklearn import metrics

In [9]:
df = pd.read_csv("Occupancy_Estimation.csv")

df_test = df.loc[df['Date'] == '2017/12/22']

df_train = df.loc[df['Date']!='2017/12/22']
df_train = df_train.loc[df_train['Room_Occupancy_Count']!=0]

x_train = df_train.drop(columns=['Room_Occupancy_Count'])
y_train = df_train['Room_Occupancy_Count']

#TESTE EM TODAS AS INTANCIAS
x_test = df_test.drop(columns=['Room_Occupancy_Count'])
y_test = df_test['Room_Occupancy_Count']
#print(y_test)

#separação dos data-set para teste de acurácia em cada classe (quando há determinado número de pessoas no quarto)

zero = df_test.loc[df_test.Room_Occupancy_Count == 0]
xzero_test = zero.drop(columns=['Room_Occupancy_Count'])
yzero_test = zero['Room_Occupancy_Count']

um = df_test.loc[df_test.Room_Occupancy_Count == 1]
xum_test = um.drop(columns=['Room_Occupancy_Count'])
yum_test = um['Room_Occupancy_Count']

dois = df_test.loc[df_test.Room_Occupancy_Count == 2]
xdois_test = dois.drop(columns=['Room_Occupancy_Count'])
ydois_test = dois['Room_Occupancy_Count']

tres = df_test.loc[df_test.Room_Occupancy_Count == 3]
xtres_test = tres.drop(columns=['Room_Occupancy_Count'])
ytres_test = tres['Room_Occupancy_Count']

In [10]:
class TransformLinearRegression(LinearRegression):
    def __init__(self, **kw):
        super(TransformLinearRegression, self).__init__(**kw)
        
    def transform(self, x, y=None):
        x = self.predict(x)
        return np.reshape(x,(-1, 1))
    
    def fit_transform(self, x, y0):
        self.fit(x, y0)
        return self.transform(x)
        

In [11]:
class PirAgg():
    def __init__(self, line_agg):
        self.line_agg = line_agg
    
    def fit(self, x, y=None):
        return self
    
    def transform(self, x, y=None):
        line_agg = self.line_agg
        
        df = pd.DataFrame({'pred':x[:,0], 'slope':x[:,1], 's6_pir':x[:,2], 's7_pir':x[:,3], 'date':x[:,4], 'time':x[:,5]})
        
        df_roll = df.sort_values(['date','time']).rolling(line_agg , min_periods=1, center=False).agg({'s6_pir':np.max, 's7_pir':np.max})
        df_roll.rename(columns={'s6_pir':'s6_pir_roll', 's7_pir':'s7_pir_roll'}, inplace=True)
        df_merge = pd.merge(df, df_roll, left_index=True, right_index=True)[['pred', 'slope', 's6_pir_roll', 's7_pir_roll', 'date', 'time']]
        
#        df_merge['pir'] = df_merge['s6_pir_roll'] + df_merge['s7_pir_roll'] >= 1
        df_merge = df_merge.drop(columns=['date', 'time'])
        df_merge = df_merge.astype(float)
        x = df_merge.to_numpy()
        
        return x
    
    
    def fit_transform(self, x, y=None):
        self.fit(x)
        return self.transform(x)


In [12]:
class SlopeRounder(BaseEstimator, TransformerMixin):
    def __init__(self, slope_base, slope_value, base):
        self.slope_base = slope_base
        self.slope_value = slope_value
        self.base = base
    
    def fit(self, x, y=None):
        return self
    
    def transform(self, x, y=None):
        
        predictions = x[:,0]
        
#        x[:,0] = np.where(np.abs(x[:,0]%1-0.5) > self.base, x[:,0], np.where(x[:,2] + x[:,3] == 0, 0, x[:,0]))
#        x[:,0] = np.where(x[:,0] < self.base, x[:,0], np.where(x[:,2] + x[:,3] == 0, 0, x[:,0]))

        x[:,0] = np.where(x[:,0] >= self.base, np.where(x[:,2] + x[:,3] == 0, 0, x[:,0]), x[:,0])
        
        x[:,0] = np.where(np.abs(x[:,0]%1-0.5) > self.slope_base, np.round(x[:,0]),
                                             np.where(x[:,1] > self.slope_value,
                                                    np.ceil(x[:,0]),
                                                    np.floor(x[:,0])))
        
        
        return predictions
    
    def fit_transform(self, x, y=None):
        self.fit(x)
        return self.transform(x)

In [13]:
minmax_features = ['S1_Temp','S2_Temp','S3_Temp','S4_Temp']

standard_features = ['S1_Light','S2_Light','S3_Light','S4_Light',
                   'S1_Sound','S2_Sound','S3_Sound','S4_Sound']

robust_features = ['S5_CO2']
pir = ['PIR']

column_transformer = ColumnTransformer([
    ('std_scaler', StandardScaler(), standard_features),
    ('minmax', MinMaxScaler(), minmax_features),
    ('robust', RobustScaler(), robust_features),
], remainder='drop')


pipe = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('classifier', TransformLinearRegression()),
])

In [14]:
slope_col = ColumnTransformer([('slope','passthrough',['S5_CO2_Slope'])])
pir_for_roll = ColumnTransformer([('s6 pir and s7 pir', 'passthrough',['S6_PIR', 'S7_PIR'])])
date_time = ColumnTransformer([('date and time', 'passthrough',['Date', 'Time'])])

regression_postprocess = [('pipeline', pipe), ('slope_column', slope_col), ('pir_for_roll',pir_for_roll), ('Date and time', date_time)]

combined = FeatureUnion(regression_postprocess)


In [15]:
model = Pipeline(steps=[
                        ('predictor', combined),
                        ('pir',PirAgg(6)),
                        ('slope rounder', SlopeRounder(0.25, 0.8, 3))
])
model = model.fit(x_train,y_train)
out_frame = pd.DataFrame({'pred' : model.transform(x_test)})
metrics.accuracy_score(out_frame,y_test)

0.9473324213406292