# Biblitecas

In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
import seaborn as sns
from numpy import nan
import math
from datetime import datetime
import matplotlib.pyplot as plt

# Carregamento de Dados

In [4]:
df_sensor = pd.read_csv('../raw_data/Sensor_FieldPRO.csv')
df_sensor

Unnamed: 0,Datetime – utc,air_humidity_100,air_temperature_100,atm_pressure_main,num_of_resets,piezo_charge,piezo_temperature
0,2020-09-30T23:00:00Z,38.0,31.366,9412,0,45123,30
1,2020-10-01T00:00:00Z,,,9412,0,45025,31
2,2020-10-01T01:00:00Z,39.0,31.366,9419,0,44923,31
3,2020-10-01T02:00:00Z,39.0,31.322,9419,0,44825,31
4,2020-10-01T03:00:00Z,38.0,31.240,9416,0,44728,31
...,...,...,...,...,...,...,...
1706,2020-12-11T09:00:00Z,82.0,19.480,9422,3,20669,17
1707,2020-12-11T10:00:00Z,69.0,24.000,9429,3,20606,19
1708,2020-12-11T11:00:00Z,63.0,25.440,9436,3,20529,25
1709,2020-12-11T12:00:00Z,60.0,27.160,9439,3,20442,28


- piezo_charge: carga do acumulador é medida de hora em hora

- piezo_temperature: temperatura da placa (também medida de hora em hora)

- num_of_resets: número total de resets da placa desde que foi ligada pela primeira vez

OBS.: Um evento de reset na placa pode afetar o comportamento do acumulador de
carga



In [5]:
df_estacao = pd.read_csv('../raw_data/Estacao_Convencional.csv')
df_estacao

Unnamed: 0,data,Hora (Brasília),chuva
0,2020-09-01,00:00:00,0.0
1,2020-09-01,01:00:00,0.0
2,2020-09-01,02:00:00,0.0
3,2020-09-01,03:00:00,0.0
4,2020-09-01,04:00:00,0.0
...,...,...,...
2251,2020-12-03,19:00:00,0.0
2252,2020-12-03,20:00:00,0.0
2253,2020-12-03,21:00:00,0.0
2254,2020-12-03,22:00:00,0.0


# Limpeza e arrumação de dados

In [9]:
df_sensor.shape

(1711, 7)

In [8]:
df_sensor.head()

Unnamed: 0,Datetime – utc,air_humidity_100,air_temperature_100,atm_pressure_main,num_of_resets,piezo_charge,piezo_temperature
0,2020-09-30T23:00:00Z,38.0,31.366,9412,0,45123,30
1,2020-10-01T00:00:00Z,,,9412,0,45025,31
2,2020-10-01T01:00:00Z,39.0,31.366,9419,0,44923,31
3,2020-10-01T02:00:00Z,39.0,31.322,9419,0,44825,31
4,2020-10-01T03:00:00Z,38.0,31.24,9416,0,44728,31


In [7]:
df_sensor.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1711 entries, 0 to 1710
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Datetime – utc       1711 non-null   object 
 1   air_humidity_100     1705 non-null   float64
 2   air_temperature_100  1705 non-null   float64
 3   atm_pressure_main    1711 non-null   int64  
 4   num_of_resets        1711 non-null   int64  
 5   piezo_charge         1711 non-null   int64  
 6   piezo_temperature    1711 non-null   int64  
dtypes: float64(2), int64(4), object(1)
memory usage: 93.7+ KB


## Eliminar linhas com valores NaN

In [14]:
df_sensor.isna().sum()

Datetime – utc         0
air_humidity_100       6
air_temperature_100    6
atm_pressure_main      0
num_of_resets          0
piezo_charge           0
piezo_temperature      0
dtype: int64

In [15]:
df_sensor.dropna()

Unnamed: 0,Datetime – utc,air_humidity_100,air_temperature_100,atm_pressure_main,num_of_resets,piezo_charge,piezo_temperature
0,2020-09-30T23:00:00Z,38.0,31.366,9412,0,45123,30
2,2020-10-01T01:00:00Z,39.0,31.366,9419,0,44923,31
3,2020-10-01T02:00:00Z,39.0,31.322,9419,0,44825,31
4,2020-10-01T03:00:00Z,38.0,31.240,9416,0,44728,31
5,2020-10-01T04:00:00Z,39.0,30.828,9411,0,44632,30
...,...,...,...,...,...,...,...
1706,2020-12-11T09:00:00Z,82.0,19.480,9422,3,20669,17
1707,2020-12-11T10:00:00Z,69.0,24.000,9429,3,20606,19
1708,2020-12-11T11:00:00Z,63.0,25.440,9436,3,20529,25
1709,2020-12-11T12:00:00Z,60.0,27.160,9439,3,20442,28


In [17]:
df_sensor.dropna().reset_index().drop(columns='index')

Unnamed: 0,Datetime – utc,air_humidity_100,air_temperature_100,atm_pressure_main,num_of_resets,piezo_charge,piezo_temperature
0,2020-09-30T23:00:00Z,38.0,31.366,9412,0,45123,30
1,2020-10-01T01:00:00Z,39.0,31.366,9419,0,44923,31
2,2020-10-01T02:00:00Z,39.0,31.322,9419,0,44825,31
3,2020-10-01T03:00:00Z,38.0,31.240,9416,0,44728,31
4,2020-10-01T04:00:00Z,39.0,30.828,9411,0,44632,30
...,...,...,...,...,...,...,...
1700,2020-12-11T09:00:00Z,82.0,19.480,9422,3,20669,17
1701,2020-12-11T10:00:00Z,69.0,24.000,9429,3,20606,19
1702,2020-12-11T11:00:00Z,63.0,25.440,9436,3,20529,25
1703,2020-12-11T12:00:00Z,60.0,27.160,9439,3,20442,28


In [18]:
df_sensor2 = df_sensor.dropna().reset_index().drop(columns='index')
df_sensor2

Unnamed: 0,Datetime – utc,air_humidity_100,air_temperature_100,atm_pressure_main,num_of_resets,piezo_charge,piezo_temperature
0,2020-09-30T23:00:00Z,38.0,31.366,9412,0,45123,30
1,2020-10-01T01:00:00Z,39.0,31.366,9419,0,44923,31
2,2020-10-01T02:00:00Z,39.0,31.322,9419,0,44825,31
3,2020-10-01T03:00:00Z,38.0,31.240,9416,0,44728,31
4,2020-10-01T04:00:00Z,39.0,30.828,9411,0,44632,30
...,...,...,...,...,...,...,...
1700,2020-12-11T09:00:00Z,82.0,19.480,9422,3,20669,17
1701,2020-12-11T10:00:00Z,69.0,24.000,9429,3,20606,19
1702,2020-12-11T11:00:00Z,63.0,25.440,9436,3,20529,25
1703,2020-12-11T12:00:00Z,60.0,27.160,9439,3,20442,28


## Tratar da coluna Datetime - utc

In [19]:
df_sensor2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1705 entries, 0 to 1704
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Datetime – utc       1705 non-null   object 
 1   air_humidity_100     1705 non-null   float64
 2   air_temperature_100  1705 non-null   float64
 3   atm_pressure_main    1705 non-null   int64  
 4   num_of_resets        1705 non-null   int64  
 5   piezo_charge         1705 non-null   int64  
 6   piezo_temperature    1705 non-null   int64  
dtypes: float64(2), int64(4), object(1)
memory usage: 93.4+ KB
