# Notebook para tratamento dos dados

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

#style.use('seaborn')
%matplotlib inline


In [2]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    from sklearn import preprocessing, metrics
# !pip install -U scikit-fuzzy
import sys
import skfuzzy as fuzz
sys.setrecursionlimit(1000)

In [3]:
data_column_names = ["Date", "Time", "Epoch", "Moteid", "Temperature", "Humidity", "Light", "Voltage"]
dplot = pd.read_csv("../data.txt", sep = ' ', names = data_column_names)

s = 21.0
S19df = dplot[dplot['Moteid']==19]
S20df = dplot[dplot['Moteid']==20]

In [4]:
S21df = pd.read_csv('dfImputation.csv')
S19df.shape,S20df.shape,S21df.shape

((39459, 8), (28835, 8), (65537, 8))

In [5]:
S21df['3'] = s
S21df.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,2004-03-31,03:38:15.757551,0,21.0,0.0,-3.91901,11.04,2.03397
1,2004-02-28,00:59:16.02785,1,21.0,0.0,37.0933,45.08,2.69964
2,2004-02-28,01:03:16.33393,2,21.0,0.0,38.4629,45.08,2.68742
3,2004-02-28,01:06:16.013453,3,21.0,0.0,38.8039,45.08,2.68742
4,2004-02-28,01:06:46.778088,4,21.0,0.0,38.8379,45.08,2.69964


In [6]:
outliersIL = []
outliersILValues = []
for i, row in S20df.iterrows():
    if row['Temperature'] >=26.2:
        outliersIL.append(i)
        outliersILValues.append(row['Temperature'])
print(len(outliersIL))


3363


In [7]:
# Removendo outliers no Sensor
passo3= S20df.copy()
print(passo3.shape)
passo3= passo3.drop(outliersIL)
print(passo3.shape)
passo3.to_csv('S20FINAL.csv')

(28835, 8)
(25472, 8)


In [8]:
outliersIL2 = []
outliersILV2 = []
for i, row in S19df.iterrows():
    if row['Temperature'] >=26.2:
        outliersIL2.append(i)
        outliersILV2.append(row['Temperature'])
print(len(outliersIL2))


13505


In [9]:
# Removendo outliers no S2
passo3_3= S19df.copy()
print(passo3_3.shape)
passo3_3= passo3_3.drop(outliersIL2)
print(passo3_3.shape)
passo3_3.to_csv('S19FINAL.csv')

(39459, 8)
(25954, 8)


In [10]:
outliersIL_3 = []
outliersILV_3 = []
for i, row in S21df.iterrows():
    if row['4'] >=26.2:
        outliersIL_3.append(i)
        outliersILV_3.append(row['4'])
print(len(outliersIL_3))


0


In [11]:
# Removendo outliers no sensor
passo3_2= S21df.copy()
print(passo3_2.shape)
passo3_2= passo3_2.drop(outliersIL_3)
print(passo3_2.shape)
passo3_2.to_csv('S21FINAL.csv')

(65537, 8)
(65537, 8)


In [12]:
S19df_ = pd.read_csv('S19FINAL.csv')
S20df_ = pd.read_csv('S20FINAL.csv')
S21df_ = pd.read_csv('S21FINAL.csv')
S21df_['3'] = s
S20df_.shape, S21df_.shape, S19df_.shape

((25472, 9), (65537, 9), (25954, 9))

In [13]:
S21df_.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7
0,0,2004-03-31,03:38:15.757551,0,21.0,0.0,-3.91901,11.04,2.03397
1,1,2004-02-28,00:59:16.02785,1,21.0,0.0,37.0933,45.08,2.69964
2,2,2004-02-28,01:03:16.33393,2,21.0,0.0,38.4629,45.08,2.68742
3,3,2004-02-28,01:06:16.013453,3,21.0,0.0,38.8039,45.08,2.68742
4,4,2004-02-28,01:06:46.778088,4,21.0,0.0,38.8379,45.08,2.69964


# Sistema Fuzzy

In [14]:
# Modelo Fuzzy
temp = np.arange(-26.2, 26.2, 0.1)
tempF = np.arange(-26.2, 26.2, 0.1)

# Generate fuzzy membership functions
temp_lo = fuzz.trimf(temp, [-26.2, 0.0, 5.0])  
temp_md = fuzz.trimf(temp, [0.0, 5.0, 20.0])      
temp_hi = fuzz.trimf(temp, [17.20, 22.8, 26.2]) 


mfx = fuzz.trapmf(tempF, [-26.2, 5.0, 17.20, 26.2])


tempFinal_lo = fuzz.trapmf(tempF, [-26.2, 5.0, 17.20, 26.2]) 
tempFinal_md = fuzz.trimf(tempF, [0, 5.0, 20.0]) 
tempFinal_hi = fuzz.trimf(tempF, [17.20, 22.8, 26.2])


In [16]:
def fuzification(i, data, data1, data2): 
    if data['3'].ix[i]== s:
        temp_level_lo0 = fuzz.interp_membership(temp, temp_lo,  data1['Temperature'][i]) # ex: [52, [51, 52]]
        temp_level_md0 = fuzz.interp_membership(temp, temp_md,  data1['Temperature'][i])
        temp_level_hi0 = fuzz.interp_membership(temp, temp_hi,  data1['Temperature'][i])

        temp_level_lo1 = fuzz.interp_membership(temp, temp_lo,  data2['Temperature'][i])
        temp_level_md1 = fuzz.interp_membership(temp, temp_md,  data2['Temperature'][i])
        temp_level_hi1 = fuzz.interp_membership(temp, temp_hi,  data2['Temperature'][i])

        active_rule1_0 = np.fmin(temp_level_lo0, temp_level_lo1)
        active_rule1_1 = np.fmin(temp_level_lo0, temp_level_md1)
        active_rule1_2 = np.fmin(temp_level_lo0, temp_level_hi1)
        active_rule1_3 = np.fmin(temp_level_md0, temp_level_lo1)
        active_rule1_4 = np.fmin(temp_level_md0, temp_level_md1)
        active_rule1_5 = np.fmin(temp_level_md0, temp_level_hi1)
        active_rule1_6 = np.fmin(temp_level_hi0, temp_level_lo1)
        active_rule1_7 = np.fmin(temp_level_hi0, temp_level_md1)
        active_rule1_8 = np.fmin(temp_level_hi0, temp_level_hi1)


        active_rule1_Final_0 = np.fmax(active_rule1_0, tempFinal_lo)
        active_rule1_Final_1 = np.fmax(active_rule1_1, tempFinal_lo)
        active_rule1_Final_2 = np.fmax(active_rule1_2, tempFinal_md)
        active_rule1_Final_3 = np.fmax(active_rule1_3, tempFinal_lo)
        active_rule1_Final_4 = np.fmax(active_rule1_4, tempFinal_hi)
        active_rule1_Final_5 = np.fmax(active_rule1_5, tempFinal_hi)
        active_rule1_Final_6 = np.fmax(active_rule1_6, tempFinal_md)
        active_rule1_Final_7 = np.fmax(active_rule1_7, tempFinal_hi)
        active_rule1_Final_8 = np.fmax(active_rule1_8, tempFinal_hi)

        r1_0 = np.fmax(active_rule1_Final_8, np.fmax(active_rule1_Final_7, np.fmax(active_rule1_Final_6, active_rule1_Final_5) ) )  
        r1_1 = np.fmax(active_rule1_Final_5, np.fmax(active_rule1_Final_3, np.fmax(active_rule1_Final_2, np.fmax(active_rule1_Final_1, active_rule1_Final_0)) ) )  
        aggregated = np.fmax(r1_0,r1_1)
        return aggregated  


In [17]:
def defuzz(x, mfx, mode):
    mode = mode.lower()
    x = x.ravel()
    mfx = mfx.ravel()
    n = len(x)
    assert n == len(mfx), 'Length of x and fuzzy membership function must be \
                          identical.'

    if 'Centroid' in mode or 'bisector' in mode:
        tot_area = mfx.sum()
        assert tot_area != 0, 'Total area is zero in defuzzification!'

        if 'Centroid' in mode:
            return centroid(x, mfx)

        elif 'bisector' in mode:
            tmp = 0
            for k in range(n):
                tmp += mfx[k]
                if tmp >= tot_area / 2.:
                    return x[k]

    elif 'mom' in mode:
        return np.mean(x[mfx == mfx.max()])

    elif 'som' in mode:
        tmp = x[mfx == mfx.max()]
        return tmp[tmp == np.abs(tmp).min()][0]

    elif 'lom' in mode:
        tmp = x[mfx == mfx.max()]
        return tmp[tmp == np.abs(tmp).max()][0]

    else:
        raise ValueError('The input for `mode`, %s, was incorrect.' % (mode))



In [None]:
aux = 0.0
for i, row in S21df_.iterrows():
        if S21df_['4'][i] == 0.0:            
            try:
                S21df_.at[i, '4'] = defuzz(tempF,fuzification(i, S21df_,  S19df_,S20df_ ), 'lom')
                aux = defuzz(tempF,fuzification(i, S21df_, S19df_,S20df_ ), 'lom')
            except:
                S21df_.at[i, '4'] = aux             
S21df_.tail()



In [None]:
print(S21df_.shape)
S21df_.to_csv('finalsensor.csv', index=False)

# Final