# Smooth data
This is applied principaly to time series data

Content
- Promedio móvil simple
- Promedio móvil ponderado
- Promedio móvil exponencial

Sources

- Source dataset: https://www.tensorflow.org/tutorials/structured_data/time_series?hl=es-419

- Sorce dataset v2: https://keras.io/examples/timeseries/timeseries_weather_forecasting/
  
- Source lines charts: https://plotly.com/python/line-charts/

### 0. Package and load data
**Dataset: Tensorflow climate**

In [1]:
import tensorflow as tf
import os
import numpy as np
import pandas as pd

# plotly
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

In [2]:
# get data
zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True)
csv_path, _ = os.path.splitext(zip_path)

# read data
data = pd.read_csv(csv_path)

# transform index into datetime format
index_datetime = pd.to_datetime(data.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')

# set datetime as index
data.set_index(index_datetime, inplace =  True)

In [3]:
data.head()

Unnamed: 0_level_0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2009-01-01 00:10:00,996.52,-8.02,265.4,-8.9,93.3,3.33,3.11,0.22,1.94,3.12,1307.75,1.03,1.75,152.3
2009-01-01 00:20:00,996.57,-8.41,265.01,-9.28,93.4,3.23,3.02,0.21,1.89,3.03,1309.8,0.72,1.5,136.1
2009-01-01 00:30:00,996.53,-8.51,264.91,-9.31,93.9,3.21,3.01,0.2,1.88,3.02,1310.24,0.19,0.63,171.6
2009-01-01 00:40:00,996.51,-8.31,265.12,-9.07,94.2,3.26,3.07,0.19,1.92,3.08,1309.19,0.34,0.5,198.0
2009-01-01 00:50:00,996.51,-8.27,265.15,-9.04,94.1,3.27,3.08,0.19,1.92,3.09,1309.0,0.32,0.63,214.3


In [4]:
data.shape

(420551, 14)

## Auxiliar function to compare trends with data with different types of smooth

In [None]:
def plot_compare_tendencias(df1, df2, number_columns=2):
    '''
    Plotear todos los tags presentes en los dos dataframes de entrada en un solo gráfico.
    Función para plotear tendencia de 2 dataframes para comparar data original vs data suavizada
    Se plotea cada uno de los tags en un gráfico aparte (subplots).
    
    Inputs:
        - df1: primer dataframe a ser ploteado
        - df2: segundo dataframe a ser ploteado
        - number_columns: la cantidad de columnas que se quiere que tenga el gráfico
    '''

    # Obtener lista de tags de ambos dataframes
    list_tags = list(set(df1.columns.tolist() + df2.columns.tolist()))

    # Calcular el número de filas (considerando la cantidad de columnas definidas para los gráficos)
    if (len(list_tags) % number_columns) != 0:
        number_rows = (len(list_tags) // number_columns) + 1
    else:
        number_rows = (len(list_tags) // number_columns)

    # Crear el gráfico
    fig = make_subplots(rows=number_rows, cols=number_columns, subplot_titles=tuple(list_tags))

    # Agregar gráfico de cada uno de los tags de df1
    for index_tag in range(len(list_tags)):
        tag = list_tags[index_tag]

        # Obtener índices en el subplot (en Plotly los índices comienzan en 1)
        row = (index_tag // number_columns) + 1
        column = (index_tag % number_columns) + 1

        # Graficar tag del df1 en gris
        if tag in df1.columns:
            fig.add_trace(
                go.Scatter(
                    x=df1.index,
                    y=df1[tag],
                    name='df1 - ' + tag,
                    line=dict(color='gray')
                ),
                row=row,
                col=column
            )

        # Graficar tag del df2 en naranja
        if tag in df2.columns:
            fig.add_trace(
                go.Scatter(
                    x=df2.index,
                    y=df2[tag],
                    name='df2 - ' + tag,
                    line=dict(color='orange')
                ),
                row=row,
                col=column
            )

    # Ajustar el tamaño del subplot
    fig.update_layout(
        height=350 * number_rows,  # largo
        width=850 * number_columns,  # ancho
        title_text="Gráficos de Tendencia",
        title_x=0.5
    )

    fig.show()

## Promedio Móvil Simple

## Promedio Móvil Ponderado

## Promedio Móvil Exponencial