# Datasets preparation

## Energy datasets

#### Import libraries

In [13]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#### Read data

In [14]:
df_energy1 = pd.read_csv("../../../datasets/parte2/treino/energia_202109-202112.csv", na_filter=False)
df_energy2 = pd.read_csv("../../../datasets/parte2/treino/energia_202201-202212.csv", na_filter=False)

#### Concatenate datasets

In [15]:
df_energy = pd.concat([df_energy1, df_energy2])

#### 'Hora' column to int

In [16]:
df_energy['Hora'] = df_energy['Hora'].astype(int)

#### Types Visualization

In [17]:
df_energy.dtypes

Data                        object
Hora                         int64
Normal (kWh)               float64
Horário Económico (kWh)    float64
Autoconsumo (kWh)          float64
Injeção na rede (kWh)       object
dtype: object

## Meteo datasets

#### Read data

In [18]:
df_meteo1 = pd.read_csv("../../../datasets/parte2/treino/meteo_202109-202112.csv", na_filter=False)
df_meteo2 = pd.read_csv("../../../datasets/parte2/treino/meteo_202201-202212.csv", na_filter=False)

#### Concatenate datasets

In [19]:
df_meteo = pd.concat([df_meteo1, df_meteo2])

#### Types Visualization

In [20]:
df_meteo.dtypes

dt                       int64
dt_iso                  object
city_name               object
temp                   float64
feels_like             float64
temp_min               float64
temp_max               float64
pressure                 int64
sea_level               object
grnd_level              object
humidity                 int64
wind_speed             float64
rain_1h                 object
clouds_all               int64
weather_description     object
dtype: object

#### Transforming 'dt_iso' column to 'Date' and 'Hora' columns

In [21]:
df_meteo['dt_iso'] = pd.to_datetime(df_meteo['dt_iso'].str.replace(' UTC', ''), format='%Y-%m-%d %H:%M:%S %z')
df_meteo['Data'] = df_meteo['dt_iso'].dt.strftime('%Y-%m-%d')
df_meteo['Hora'] = df_meteo['dt_iso'].dt.strftime('%H')
df_meteo['Hora'] = df_meteo['Hora'].astype(int)

#### Drop 'dt_iso' column

In [22]:
df_meteo = df_meteo.drop('dt_iso',axis = 1)

## Merge datasets by 'Data' and 'Hora' columns

In [23]:
df = pd.merge(df_meteo, df_energy, on=['Data','Hora'], how='inner')

## Columns renaming

In [24]:
df.rename(columns={'Data':'date', 'Hora':'hour', 'Normal (kWh)':'normal', 'Horário Económico (kWh)':'economic_schedule', 'Autoconsumo (kWh)':'self-consumption', 'Injeção na rede (kWh)':'injection'}, inplace=True)

## Write new csv


In [25]:
df.to_csv('../../../datasets/parte2/treino/dataset.csv', index=False)