# INMET Data Analysis Tool

This notebook was developed to help researchers explore data available on INMET Data Portal (https://bdmep.inmet.gov.br/)

You can download any data as a CSV file and open it on this Tool to extract information graphically by doing an Exploratory Analysis.

Remember to change the correct variables in the code cells to configure this Tool for your computing environment.

**Premise**: Most data files on the INMET Data Portal have the same encoding and column names. We added some information in the text cells on how to solve errors if your data files follow a different standard. As future work, we look forward to developing a generic version of this Tool.

## Setup

In [1]:
!pip install matplotlib pandas numpy ipywidgets

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Using cached jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
Installing collected packages: jedi
Successfully installed jedi-0.19.1


In [2]:
%matplotlib inline

import ipywidgets as widgets
from ipywidgets import Layout
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import time
import seaborn as sns
from statistics import median

Mounting Google Drive

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


Show all dataframe columns

In [4]:
pd.set_option('display.max_columns', None)

Setting folder and files names.

Change the "folder" variable value to your Google Drive folder and the "in_file" to your file name

In [5]:
folder = '/content/drive/MyDrive/DataINMET/'
in_file = 'data_SP_2013_2022.csv'
out_file = 'data_SP_2013_2022_processed.csv'

## Importing Data

Opening data on a dataframe

The first 8 rows from INMET date file are always the file header that's why we need to skip them

You might need to add "encoding='latin-1'" in the Pandas read_csv() function to open the CSV file depending on the downloaded file

In [24]:
df = pd.read_csv(folder+in_file, sep=";", decimal=",", skiprows=8)
df

Unnamed: 0,DATA (YYYY-MM-DD),HORA (UTC),"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (KJ/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C),UMIDADE REL. MAX. NA HORA ANT. (AUT) (%),UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)",Unnamed: 19
0,2013-01-01,00:00,0.0,925.7,925.7,925.0,-9999.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4,
1,2013-01-01,01:00,0.0,925.9,926.0,925.7,-9999.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3,
2,2013-01-01,02:00,0.0,924.6,925.9,924.6,-9999.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2,
3,2013-01-01,03:00,0.0,924.4,924.7,924.4,-9999.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4,
4,2013-01-01,04:00,0.0,923.7,924.4,923.7,-9999.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022/12/31,1900 UTC,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3,
87644,2022/12/31,2000 UTC,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8,
87645,2022/12/31,2100 UTC,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5,
87646,2022/12/31,2200 UTC,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1,


In [25]:
df.dtypes

Unnamed: 0,0
DATA (YYYY-MM-DD),object
HORA (UTC),object
"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)",float64
"PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",float64
PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),float64
PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),float64
RADIACAO GLOBAL (KJ/m²),float64
"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",float64
TEMPERATURA DO PONTO DE ORVALHO (°C),float64
TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),float64


In [26]:
df.describe()

Unnamed: 0,"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (KJ/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C),UMIDADE REL. MAX. NA HORA ANT. (AUT) (%),UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)",Unnamed: 19
count,87644.0,87646.0,87646.0,87646.0,70965.0,87646.0,87643.0,87646.0,87646.0,87644.0,87642.0,87646.0,87646.0,87645.0,87646.0,87646.0,87646.0,0.0
mean,-44.551406,878.74804,878.74457,878.252045,-2628.271775,-24.093143,-30.653385,-23.880654,-24.947702,-30.499963,-31.461402,26.133377,21.112384,24.00753,109.929694,-39.522093,-42.666906,
std,667.230544,726.308951,728.181634,728.148843,5475.728303,666.8928,666.460252,669.479965,669.405578,669.036077,668.980223,673.01787,672.726024,670.320252,682.078531,667.5638,665.636687,
min,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,
25%,0.0,924.7,925.0,924.5,-9999.0,17.3,11.6,17.6,16.9,12.1,11.1,61.0,53.0,57.0,87.0,3.4,0.9,
50%,0.0,927.0,927.3,926.8,215.2,20.2,14.4,20.7,19.8,14.8,13.9,76.0,71.0,74.0,133.0,5.1,1.8,
75%,0.0,929.5,929.8,929.3,1558.8,23.3,16.8,24.0,22.6,17.2,16.3,83.0,80.0,82.0,193.0,6.9,2.6,
max,77.8,940.3,940.4,940.2,3999.6,37.3,22.3,38.4,36.1,22.8,21.5,98.0,98.0,98.0,360.0,24.6,10.9,


## Data Preprocessing

Droping last row, which was empty

In [27]:
df = df.drop('Unnamed: 19', axis=1)
df

Unnamed: 0,DATA (YYYY-MM-DD),HORA (UTC),"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (KJ/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C),UMIDADE REL. MAX. NA HORA ANT. (AUT) (%),UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)"
0,2013-01-01,00:00,0.0,925.7,925.7,925.0,-9999.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013-01-01,01:00,0.0,925.9,926.0,925.7,-9999.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013-01-01,02:00,0.0,924.6,925.9,924.6,-9999.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013-01-01,03:00,0.0,924.4,924.7,924.4,-9999.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013-01-01,04:00,0.0,923.7,924.4,923.7,-9999.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022/12/31,1900 UTC,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022/12/31,2000 UTC,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022/12/31,2100 UTC,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022/12/31,2200 UTC,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


Changing Measurement Time from "Object" into "Datetime"

In [28]:
df['DATA (YYYY-MM-DD)'] = pd.to_datetime(df['DATA (YYYY-MM-DD)'], format='mixed')
df

Unnamed: 0,DATA (YYYY-MM-DD),HORA (UTC),"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (KJ/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C),UMIDADE REL. MAX. NA HORA ANT. (AUT) (%),UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)"
0,2013-01-01,00:00,0.0,925.7,925.7,925.0,-9999.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013-01-01,01:00,0.0,925.9,926.0,925.7,-9999.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013-01-01,02:00,0.0,924.6,925.9,924.6,-9999.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013-01-01,03:00,0.0,924.4,924.7,924.4,-9999.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013-01-01,04:00,0.0,923.7,924.4,923.7,-9999.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022-12-31,1900 UTC,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022-12-31,2000 UTC,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022-12-31,2100 UTC,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022-12-31,2200 UTC,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


Changing hour column from "Object" into "Integer"

In [29]:
df['HORA (UTC)'] = df['HORA (UTC)'].astype('str').str[0] + df['HORA (UTC)'].astype('str').str[1]
df['HORA (UTC)'] = df['HORA (UTC)'].astype('int')
df

Unnamed: 0,DATA (YYYY-MM-DD),HORA (UTC),"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (KJ/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C),UMIDADE REL. MAX. NA HORA ANT. (AUT) (%),UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)"
0,2013-01-01,0,0.0,925.7,925.7,925.0,-9999.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013-01-01,1,0.0,925.9,926.0,925.7,-9999.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013-01-01,2,0.0,924.6,925.9,924.6,-9999.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013-01-01,3,0.0,924.4,924.7,924.4,-9999.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013-01-01,4,0.0,923.7,924.4,923.7,-9999.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022-12-31,19,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022-12-31,20,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022-12-31,21,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022-12-31,22,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


In [30]:
df.dtypes

Unnamed: 0,0
DATA (YYYY-MM-DD),datetime64[ns]
HORA (UTC),int64
"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)",float64
"PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",float64
PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),float64
PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),float64
RADIACAO GLOBAL (KJ/m²),float64
"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",float64
TEMPERATURA DO PONTO DE ORVALHO (°C),float64
TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),float64


Splitting the Date column into three new columns (year, month and day)

In [31]:
year = df['DATA (YYYY-MM-DD)'].dt.year
month = df['DATA (YYYY-MM-DD)'].dt.month
day = df['DATA (YYYY-MM-DD)'].dt.day
df.insert(0, 'day', day)
df.insert(0, 'month', month)
df.insert(0, 'year', year)

df

Unnamed: 0,year,month,day,DATA (YYYY-MM-DD),HORA (UTC),"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)",PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB),PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB),RADIACAO GLOBAL (KJ/m²),"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)",TEMPERATURA DO PONTO DE ORVALHO (°C),TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C),TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C),UMIDADE REL. MAX. NA HORA ANT. (AUT) (%),UMIDADE REL. MIN. NA HORA ANT. (AUT) (%),"UMIDADE RELATIVA DO AR, HORARIA (%)","VENTO, DIREÇÃO HORARIA (gr) (° (gr))","VENTO, RAJADA MAXIMA (m/s)","VENTO, VELOCIDADE HORARIA (m/s)"
0,2013,1,1,2013-01-01,0,0.0,925.7,925.7,925.0,-9999.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013,1,1,2013-01-01,1,0.0,925.9,926.0,925.7,-9999.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013,1,1,2013-01-01,2,0.0,924.6,925.9,924.6,-9999.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013,1,1,2013-01-01,3,0.0,924.4,924.7,924.4,-9999.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013,1,1,2013-01-01,4,0.0,923.7,924.4,923.7,-9999.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022,12,31,2022-12-31,19,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022,12,31,2022-12-31,20,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022,12,31,2022-12-31,21,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022,12,31,2022-12-31,22,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


Renaming Collumns

Check if your Dataframe columns have the same name. In some cases, the name might have lowercase letters instead of uppercase letters

In [32]:
df = df.rename(columns={'DATA (YYYY-MM-DD)' : 'date'})
df = df.rename(columns={'HORA (UTC)' : 'hour'})
df = df.rename(columns={"PRECIPITAÇÃO TOTAL, HORÁRIO (mm)" : "Precip"})
df = df.rename(columns={'PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA (mB)' : 'Pstn'})
df = df.rename(columns={'PRESSÃO ATMOSFERICA MAX.NA HORA ANT. (AUT) (mB)' : 'Pmax'})
df = df.rename(columns={'PRESSÃO ATMOSFERICA MIN. NA HORA ANT. (AUT) (mB)' : 'Pmin'})
df = df.rename(columns={'RADIACAO GLOBAL (KJ/m²)' : 'Rad'})
df = df.rename(columns={'TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)' : 'Temp'})
df = df.rename(columns={'TEMPERATURA DO PONTO DE ORVALHO (°C)' : 'Dew'})
df = df.rename(columns={'TEMPERATURA MÁXIMA NA HORA ANT. (AUT) (°C)' : 'Tmax'})
df = df.rename(columns={'TEMPERATURA MÍNIMA NA HORA ANT. (AUT) (°C)' : 'Tmin'})
df = df.rename(columns={'TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT) (°C)' : 'Dewmax'})
df = df.rename(columns={'TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT) (°C)' : 'Dewmin'})
df = df.rename(columns={'UMIDADE REL. MAX. NA HORA ANT. (AUT) (%)' : 'RHmax' })
df = df.rename(columns={'UMIDADE REL. MIN. NA HORA ANT. (AUT) (%)' : 'RHmin'})
df = df.rename(columns={'UMIDADE RELATIVA DO AR, HORARIA (%)' : 'RH'})
df = df.rename(columns={'VENTO, DIREÇÃO HORARIA (gr) (° (gr))' : 'WSdir'})
df = df.rename(columns={'VENTO, RAJADA MAXIMA (m/s)' : 'WSmax'})
df = df.rename(columns={'VENTO, VELOCIDADE HORARIA (m/s)' : 'WS'})

df

Unnamed: 0,year,month,day,date,hour,Precip,Pstn,Pmax,Pmin,Rad,Temp,Dew,Tmax,Tmin,Dewmax,Dewmin,RHmax,RHmin,RH,WSdir,WSmax,WS
0,2013,1,1,2013-01-01,0,0.0,925.7,925.7,925.0,-9999.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013,1,1,2013-01-01,1,0.0,925.9,926.0,925.7,-9999.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013,1,1,2013-01-01,2,0.0,924.6,925.9,924.6,-9999.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013,1,1,2013-01-01,3,0.0,924.4,924.7,924.4,-9999.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013,1,1,2013-01-01,4,0.0,923.7,924.4,923.7,-9999.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022,12,31,2022-12-31,19,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022,12,31,2022-12-31,20,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022,12,31,2022-12-31,21,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022,12,31,2022-12-31,22,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


Changing all -9999.0 values to NaN

In [33]:
df = df.where(df != -9999, np.nan)
df

Unnamed: 0,year,month,day,date,hour,Precip,Pstn,Pmax,Pmin,Rad,Temp,Dew,Tmax,Tmin,Dewmax,Dewmin,RHmax,RHmin,RH,WSdir,WSmax,WS
0,2013,1,1,2013-01-01,0,0.0,925.7,925.7,925.0,,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013,1,1,2013-01-01,1,0.0,925.9,926.0,925.7,,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013,1,1,2013-01-01,2,0.0,924.6,925.9,924.6,,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013,1,1,2013-01-01,3,0.0,924.4,924.7,924.4,,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013,1,1,2013-01-01,4,0.0,923.7,924.4,923.7,,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022,12,31,2022-12-31,19,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022,12,31,2022-12-31,20,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022,12,31,2022-12-31,21,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022,12,31,2022-12-31,22,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


Dropping rows with multiple NaN values

If the "Precip" value is NaN we know that all other values will be too (measurement error)

In [34]:
indexNames = df[df['Precip'].isna()].index
df.drop(indexNames , inplace=True)

df

Unnamed: 0,year,month,day,date,hour,Precip,Pstn,Pmax,Pmin,Rad,Temp,Dew,Tmax,Tmin,Dewmax,Dewmin,RHmax,RHmin,RH,WSdir,WSmax,WS
0,2013,1,1,2013-01-01,0,0.0,925.7,925.7,925.0,,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013,1,1,2013-01-01,1,0.0,925.9,926.0,925.7,,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013,1,1,2013-01-01,2,0.0,924.6,925.9,924.6,,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013,1,1,2013-01-01,3,0.0,924.4,924.7,924.4,,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013,1,1,2013-01-01,4,0.0,923.7,924.4,923.7,,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022,12,31,2022-12-31,19,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022,12,31,2022-12-31,20,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022,12,31,2022-12-31,21,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022,12,31,2022-12-31,22,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


Some of the remaining NaN values on the Dataframe are from the Solar Radiation measurement in the evening.

We change them to zero

In [35]:
df['Rad'] = np.where(df['Rad'].isna(), 0, df['Rad'])
df

Unnamed: 0,year,month,day,date,hour,Precip,Pstn,Pmax,Pmin,Rad,Temp,Dew,Tmax,Tmin,Dewmax,Dewmin,RHmax,RHmin,RH,WSdir,WSmax,WS
0,2013,1,1,2013-01-01,0,0.0,925.7,925.7,925.0,0.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4
1,2013,1,1,2013-01-01,1,0.0,925.9,926.0,925.7,0.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3
2,2013,1,1,2013-01-01,2,0.0,924.6,925.9,924.6,0.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2
3,2013,1,1,2013-01-01,3,0.0,924.4,924.7,924.4,0.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4
4,2013,1,1,2013-01-01,4,0.0,923.7,924.4,923.7,0.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022,12,31,2022-12-31,19,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3
87644,2022,12,31,2022-12-31,20,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8
87645,2022,12,31,2022-12-31,21,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5
87646,2022,12,31,2022-12-31,22,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1


In [36]:
df.isnull().sum(axis = 0)

Unnamed: 0,0
year,0
month,0
day,0
date,0
hour,0
Precip,0
Pstn,0
Pmax,2
Pmin,2
Rad,0


Finally, for the remaining NaN values, we fill them with the mean value.


In [37]:
df["Pstn"] = df.Pstn.ffill().add(df.Pstn.bfill()).div(2)
df["Pmax"] = df.Pmax.ffill().add(df.Pmax.bfill()).div(2)
df["Pmin"] = df.Pmin.ffill().add(df.Pmin.bfill()).div(2)
df["Temp"] = df.Temp.ffill().add(df.Temp.bfill()).div(2)
df["Dew"] = df.Dew.ffill().add(df.Dew.bfill()).div(2)
df["Tmax"] = df.Tmax.ffill().add(df.Tmax.bfill()).div(2)
df["Tmin"] = df.Tmin.ffill().add(df.Tmin.bfill()).div(2)
df["Dewmax"] = df.Dewmax.ffill().add(df.Dewmax.bfill()).div(2)
df["Dewmin"] = df.Dewmin.ffill().add(df.Dewmin.bfill()).div(2)
df["RHmax"] = df.RHmax.ffill().add(df.RHmax.bfill()).div(2)
df["RHmin"] = df.RHmin.ffill().add(df.RHmin.bfill()).div(2)
df["RH"] = df.RH.ffill().add(df.RH.bfill()).div(2)
df["WSdir"] = df.WSdir.ffill().add(df.WSdir.bfill()).div(2)
df["WSmax"] = df.WSmax.ffill().add(df.WSmax.bfill()).div(2)
df["WS"] = df.WS.ffill().add(df.WS.bfill()).div(2)

df.isnull().sum(axis = 0)

Unnamed: 0,0
year,0
month,0
day,0
date,0
hour,0
Precip,0
Pstn,0
Pmax,0
Pmin,0
Rad,0


Saving manipulated Dataframe as CSV file

In [38]:
df.to_csv(folder+out_file, sep=',', decimal='.')

## Exploratory Analysis

In this section we use the ipywidgets Python library to generate the filters used in the Exploratory Analysis

In [39]:
df2 = df.copy()

df2["datetime"] = df2["date"].astype(str) + " " + df2["hour"].astype(str) + ":00:00"
df2['datetime'] = pd.to_datetime(df2['datetime'], format='%Y-%m-%d %H:%M:%S')

df2

Unnamed: 0,year,month,day,date,hour,Precip,Pstn,Pmax,Pmin,Rad,Temp,Dew,Tmax,Tmin,Dewmax,Dewmin,RHmax,RHmin,RH,WSdir,WSmax,WS,datetime
0,2013,1,1,2013-01-01,0,0.0,925.7,925.7,925.0,0.0,24.8,19.1,25.4,24.8,19.9,18.3,73.0,65.0,71.0,335.0,5.4,2.4,2013-01-01 00:00:00
1,2013,1,1,2013-01-01,1,0.0,925.9,926.0,925.7,0.0,24.2,18.5,24.8,24.2,19.2,18.3,72.0,68.0,70.0,348.0,5.4,0.3,2013-01-01 01:00:00
2,2013,1,1,2013-01-01,2,0.0,924.6,925.9,924.6,0.0,23.4,16.7,24.3,23.4,18.4,16.6,70.0,65.0,66.0,302.0,5.8,2.2,2013-01-01 02:00:00
3,2013,1,1,2013-01-01,3,0.0,924.4,924.7,924.4,0.0,22.1,17.3,23.4,22.1,17.3,16.7,74.0,66.0,74.0,294.0,7.7,2.4,2013-01-01 03:00:00
4,2013,1,1,2013-01-01,4,0.0,923.7,924.4,923.7,0.0,22.5,17.4,22.5,22.1,17.5,17.3,75.0,73.0,73.0,40.0,5.1,1.1,2013-01-01 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87643,2022,12,31,2022-12-31,19,0.8,926.2,926.8,926.0,534.1,20.9,17.9,21.5,20.6,18.2,16.6,85.0,74.0,83.0,183.0,4.2,1.3,2022-12-31 19:00:00
87644,2022,12,31,2022-12-31,20,0.0,925.8,926.3,925.8,1497.1,23.0,18.9,23.0,20.8,19.3,17.5,83.0,77.0,78.0,164.0,4.3,1.8,2022-12-31 20:00:00
87645,2022,12,31,2022-12-31,21,0.0,925.9,925.9,925.8,289.1,22.3,18.6,23.0,22.3,19.0,18.2,81.0,77.0,80.0,183.0,4.1,1.5,2022-12-31 21:00:00
87646,2022,12,31,2022-12-31,22,0.0,926.4,926.4,925.9,103.2,21.4,17.3,22.3,21.4,18.7,17.2,80.0,77.0,77.0,157.0,5.1,2.1,2022-12-31 22:00:00


In [40]:
df2.dtypes

Unnamed: 0,0
year,int32
month,int32
day,int32
date,datetime64[ns]
hour,int64
Precip,float64
Pstn,float64
Pmax,float64
Pmin,float64
Rad,float64


Capturing minimum and maximum years to configure the filters.

In [41]:
minimumYear = df2['year'].min()
maximumYear = df2['year'].max()
df2.sort_values(by = ['datetime'], inplace = True)

In [42]:
#####********Define variables**********#####
Precip = '(Precip) Total precipitation (mm)'
Pstn = '(Pstn) Atmospheric pressure at station level (mB)'
Pmax = '(Pmax) Maximum atmospheric pressure in the previous hour (mB)'
Pmin = '(Pmin) Minimum atmospheric pressure in the previous hour (mB)'
Rad = '(Rad) Global radiation (KJ/m²)'
Temp = '(Temp) Air temperature (°C)'
Dew = '(Dew) Dew point temperature (°C)'
Tmax = '(Tmax) Maximum temperature in the previous hour (°C)'
Tmin = '(Tmin) Minimum temperature in the previous hour (°C)'
Dewmax = '(Dewmax) Maximum dew point temperature in the previous hour (°C)'
Dewmin = '(Dewmin) Minimum dew point temperature in the previous hour (°C)'
RHmax = '(RHmax) Maximum relative humidity in the previous hour (%)'
RHmin = '(RHmin) Minimum relative humidity in the previous hour (%)'
RH = '(RH) Relative humidity (%)'
WSdir = '(WSdir) Wind Speed Direction (gr) (° (gr))'
WSmax = '(WSmax) Maximum Wind Speed (m/s)'
WS = '(WS) Wind Speed (m/s)'
param = [(Precip, 'Precip'), (Pstn, 'Pstn'), (Pmax, 'Pmax'), (Pmin, 'Pmin'), (Rad, 'Rad'), (Temp, 'Temp'), (Dew, 'Dew'), (Tmax, 'Tmax'),
          (Tmin, 'Tmin'), (Dewmax, 'Dewmax'), (Dewmin, 'Dewmin'), (RHmax, 'RHmax'), (RHmin, 'RHmin'), (RH, 'RH'), (WSdir, 'WSdir'), (WSmax, 'WSmax'), (WS, 'WS')]
descr = "Parameters: "

optionDropdown = [('Plot', 0),
          ('Multi-Plot', 1),
          ('Histogram', 2),
          ('Box-plot', 3),
          ('Heatmap', 4),
          ('Pairplot',5),
          ('Information Overview',6)
          ]
describeDropdown = 'Chart type:'

In [43]:
##***************************Period slider control****************************##
#Create Widget Selection Range Slider
def WidgetsSelectionRS(start, end, descrip, date):
  if date == True:
    dates = [datetime.date(2014,i,1) for i in range(start,end)]
    set_range = [(i.strftime('%b'), i) for i in dates]
  else:
    set_range = [i for i in range(start, end)]

  selectionPeriod = widgets.SelectionRangeSlider(
    options=set_range,
    index=(0,(end-start-1)),
    description=descrip,
    disabled=False,
    layout={'width': '500px'},
    style = {'description_width': 'initial'}
  )
  return selectionPeriod

####**********************Parameter selector control*************************###
#Create Widget SelectMultiple
def WidgetsSelectMultiple(opt, descrip):
  selectionWords = widgets.SelectMultiple(
    options = opt,
    rows = 10,
    description = descrip,
    disabled = False,
    layout=Layout(display="flex", flex_flow='column')
  )
  return selectionWords

####**********************Graphic Format Dropdown*************************###
def WidgetsDropdown(format, descrip):
  selectionFormat = widgets.Dropdown(
      options = format,
      value=0,
      description = descrip,
      layout={'width': 'max-content'},
      disabled=False
  )
  return selectionFormat

In [44]:
#There are not tuples
def notTuples():
  print('\n-------------------------------------------------------------------')
  print("Please! select one or more parameters.")
  print('* Use click to select only one parameter.')
  print('* Use Ctrl + click or Shift + click to select multiple parameters.')
  print('-----------------------------------------------------------------\n\n')

#Create new plots large size
def createPlot(tuples,df2):
  if not tuples:
    notTuples()
  else:
    n = len(tuples)
    for i in range(n):
      plt.figure(figsize=(20,8))
      plt.grid(linestyle='--', color='silver')
      df2.sort_values(by = ['datetime'], inplace = True)
      plt.plot(df2['datetime'],df2[tuples[i]], marker='o', linestyle='solid')
      str = tuples[i]
      plt.ylabel(globals()[str])
      plt.xlabel('Date time (UTC)')
      plt.title('Time Series Plot')
      plt.show()

# Multiplot
def createMultiPlot(tuples, df2):
  if not tuples:
    notTuples()
  else:
    n = len(tuples)
    plt.figure(figsize=(18,7))
    plt.grid(linestyle='--', color='silver')
    for i in range(n):
      df2.sort_values(by = ['datetime'], inplace = True)
      plt.plot(df2['datetime'],df2[tuples[i]], marker='o', linestyle='dashed')
    plt.legend(tuples, loc='upper left')
    plt.ylabel('y')
    plt.xlabel('Date time (UTC)')
    plt.title('Time Series Plot')
    plt.show()

#Create histogram
def createHistogram(tuples, df2):
  if not tuples:
    notTuples()
  else:
    n = len(tuples)
    for i in range(n):
      fig = plt.figure(figsize=(6,4))
      df2.sort_values(by = ['datetime'], inplace = True)
      plt.grid(linestyle='--', color='silver')
      plt.hist(df2[tuples[i]], bins=25, color='red', alpha=0.7, edgecolor='white', linewidth=1)
      str = tuples[i]
      plt.ylabel('Quantity')
      plt.xlabel(globals()[str])
      plt.show()

#Create Box-Plot
def createBoxPlot(tuples, df2):
  if not tuples:
    notTuples()
  else:
    n = len(tuples)
    for i in range(n):
      plt.figure(figsize =(5, 3))
      plt.grid(linestyle='--', color='silver')
      sns.boxplot(x=df2[tuples[i]])
      str = tuples[i]
      plt.title(globals()[str])
      plt.show()
      print(df2[tuples[i]].describe())

#Create Heatmap
def createHeatmap(tuples, df2):
  if not tuples:
    notTuples()
  else:
    n = len(tuples)
    d = list(tuples)
    plt.subplots(figsize=(n*0.7, n*0.5))
    df3 = df2[d]
    sns.heatmap(df3.corr().round(2), annot=True, linewidths=.5);
    plt.show()

#Create Pairplot
def createPairPlot(tuples, df2):
  if not tuples:
    notTuples()
  else:
    d = list(tuples)
    sns.pairplot(df2[d])
    plt.show()

# Overview of descriptive graphics
def OverviewDescripGraphics(tuples, df2):
  if not tuples:
    notTuples()
  else:
    n = len(tuples)
    for i in range(n):
      plt.figure(figsize=(20,8))
      df2.sort_values(by = ['datetime'], inplace = True)
      x = df2['datetime']
      y = df2[tuples[i]]

      #plot 1:
      plt.subplot(2, 1, 1)
      plt.grid(linestyle='--', color='silver')
      plt.plot(x, y, marker='o', linestyle='solid')
      str = tuples[i]
      plt.ylabel(globals()[str])
      plt.xlabel('Date time (UTC)')
      plt.title('Time Series Plot')

      #plot 2:
      plt.subplot(2, 2, 3)
      plt.grid(linestyle='--', color='silver')
      sns.boxplot(x=y, color='#54B331')
      str = tuples[i]
      plt.title(globals()[str])

      #plt.plot(x, y, marker='o', linestyle='solid')

      #plot 3:
      plt.subplot(2, 2, 4)
      plt.grid(linestyle='--', color='silver')
      plt.hist(y, bins=25, color='#F8B62A', alpha=0.7, edgecolor='white', linewidth=1, ec="black")
      str = tuples[i]
      plt.ylabel('Quantity')
      plt.xlabel(globals()[str])

      plt.show()

      print(y.describe())
      print('mode: ', y.mode())
      print('median: ', y.median())
      print('mean: ', y.mean())

In [45]:
#Generate graph distribution
def graph_distribution(df, start_year, end_year, start_month, end_month, start_day, end_day, start_hour, end_hour, parameter, p):
  dfa = df.loc[(df['year'] >= start_year) & (df['year'] <= end_year) & (df['month'] >= start_month) & (df['month'] <= end_month)]
  data = dfa.loc[(dfa['day'] >= start_day) & (dfa['day'] <= end_day) & (dfa['hour'] >= start_hour) & (dfa['hour'] <= end_hour)]
  if p == 0:
    createPlot(parameter,data)
  elif p ==1:
    createMultiPlot(parameter,data)
  elif p == 2:
    createHistogram(parameter,data)
  elif p == 3:
    createBoxPlot(parameter, data)
  elif p == 4:
    createHeatmap(parameter, data)
  elif p == 5:
    createPairPlot(parameter, data)
  elif p == 6:
    OverviewDescripGraphics(parameter, data)
  else:
    print('\n-------------------------------')
    print("Under build. Check back soon!")
    print('-------------------------------\n\n')

#Modify the graph parameters based on the selected options
def set_params_graph(year, month, day, hour, presentation, parameter):
  start_year = year[0]
  end_year = year[1]
  start_month = month[0].month
  end_month = month[1].month
  start_day = day[0]
  end_day = day[1]
  start_hour = hour[0]
  end_hour = hour[1]
  graph_distribution(df2, start_year, end_year, start_month, end_month, start_day, end_day, start_hour, end_hour, parameter, presentation)

In [46]:
####**********************Generate controls*************************###
Years = WidgetsSelectionRS(minimumYear, maximumYear+1, 'Years', False)
Months = WidgetsSelectionRS(1, 13, 'Months', True)
Days = WidgetsSelectionRS(1, 32, 'Days', False)
Hours = WidgetsSelectionRS(0, 24, 'Hours', False)

FormatGraphics = WidgetsDropdown(optionDropdown, describeDropdown)

Parameters = WidgetsSelectMultiple(param, descr)

In [47]:
widgets.interact(
    set_params_graph,
    year = Years,
    month = Months,
    day = Days,
    hour = Hours,
    presentation = FormatGraphics,
    parameter = Parameters
)

interactive(children=(SelectionRangeSlider(description='Years', index=(0, 9), layout=Layout(width='500px'), op…