In [189]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import datetime

## IMPORTACIÓN

In [190]:
# Importación de datos
datos = pd.read_csv('Datos_Estaciones_Noviembre_2019/combined.csv') 
datos.head(2)

Unnamed: 0,Fecha_Hora,codigoSerial,pm25,calidad_pm25,pm10,calidad_pm10,pm1,calidad_pm1,no,calidad_no,...,p_ssr,calidad_p_ssr,pliquida_ssr,calidad_pliquida_ssr,rglobal_ssr,calidad_rglobal_ssr,taire10_ssr,calidad_taire10_ssr,vviento_ssr,calidad_vviento_ssr
0,2019-11-01 00:00:00,11,-9999.0,1.0,30.0,1.0,-9999.0,1.0,-9999.0,1.0,...,-9999.0,1.0,-9999.0,1.0,-9999.0,1.0,-9999.0,1.0,-9999.0,1.0
1,2019-11-01 01:00:00,11,-9999.0,1.0,31.0,1.0,-9999.0,1.0,-9999.0,1.0,...,-9999.0,1.0,-9999.0,1.0,-9999.0,1.0,-9999.0,1.0,-9999.0,1.0


## LIMPIAR

In [191]:
pos_del = []
for i in range(datos.shape[0]):
    if datos.iloc[i,0] == 'Fecha_Hora':
        pos_del.append(i)

In [192]:
datos = datos.drop(pos_del)

In [193]:
datos['Fecha_Hora'] = pd.to_datetime(datos['Fecha_Hora'])

In [194]:
conservar = ['Fecha_Hora', 'codigoSerial', 'pm25','pm10']
datos = datos[conservar]

In [195]:
datos.iloc[:,1:] = datos.iloc[:,1:].astype(float)

In [196]:
index_pm25 = np.where(datos['pm25'] <0) 
index_pm10 = np.where(datos['pm10'] <0)

index_pm25 = index_pm25[0]
index_pm10 = index_pm10[0]

In [197]:
datos.iloc[index_pm25,2]  = None
datos.iloc[index_pm10,3] = None

In [198]:
np.mean(datos['pm25'])

546.1123107368115

## DATOS GRANDES ERRONEOS

In [199]:
datos['Semana'] = datos['Fecha_Hora'].dt.dayofweek

In [200]:
a = datos.loc[datos['codigoSerial'] == 12]

In [201]:
import plotly.graph_objects as go
import numpy as np

fig = go.Figure(data=go.Scatter(
    x= a['Fecha_Hora'],
    y = a['pm25'],
    mode='markers',
    marker=dict(
        size=16,
        #color=np.random.randn(500), #set color equal to a variable
        #colorscale='Viridis', # one of plotly colorscales
        #showscale=True
    )
))

fig.show()

In [202]:
index_pm25 = np.where(datos['pm25'] > 200) 
index_pm10 = np.where(datos['pm10'] > 200)

index_pm25 = index_pm25[0]
index_pm10 = index_pm10[0]

In [203]:
datos.iloc[index_pm25,2]  = None
datos.iloc[index_pm10,3] = None

In [204]:
datos

Unnamed: 0,Fecha_Hora,codigoSerial,pm25,pm10,Semana
0,2019-11-01 00:00:00,11.0,,30.0,4
1,2019-11-01 01:00:00,11.0,,31.0,4
2,2019-11-01 02:00:00,11.0,,52.0,4
3,2019-11-01 03:00:00,11.0,,32.0,4
4,2019-11-01 04:00:00,11.0,,41.0,4
...,...,...,...,...,...
20903,2019-11-30 19:00:00,94.0,5.43292,,5
20904,2019-11-30 20:00:00,94.0,8.13821,,5
20905,2019-11-30 21:00:00,94.0,8.12470,,5
20906,2019-11-30 22:00:00,94.0,4.05730,,5


## SEMANAL

In [205]:
#top n estaciones pm25

def get_top_estaciones(valor, n):
    
    #promedio total (mensual)
    d_total_mensual = datos.groupby(by= [datos['codigoSerial'], pd.DatetimeIndex(datos['Fecha_Hora']).month]).mean()
    d_total_mensual = d_total_mensual.reset_index()
    d_total_mensual['Total'] = d_total_mensual['pm25'] + d_total_mensual['pm10']
    
    #datos organizados
    total = d_total_mensual.sort_values(by= valor, ascending= False).head(n)
    
    #index del top
    max_est = np.array(list(set(total['codigoSerial'].values)))
    return max_est
    

max_est_25 = get_top_estaciones('pm25', 3)
max_est_10 = get_top_estaciones('pm10', 3)

In [206]:
def get_semanal(datos):
    d_semanal = datos.groupby(by= [datos['codigoSerial'], datos['Semana']]).mean()
    d_semanal = d_semanal.reset_index()
    return d_semanal
d_semanal = get_semanal(datos)


In [207]:
def get_estaciones(datos):
    return np.array(list(set(datos['codigoSerial'].values)))

In [208]:
colors = [
    'salmon',
    'mediumaquamarine',
    'slateblue',
    'grey',
    'lightpink',
    'lemonchiffon'
]

In [209]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def fig_bar(max_est_1, max_est_2, datos):
    
    d_semanal = get_semanal(datos)
    
    
    #figura
    fig = make_subplots(rows=2, 
                        cols=1, 
                        shared_xaxes=True, 
                        vertical_spacing = 0.03,
                        specs=[[{"secondary_y": True}],
                           [{"secondary_y": True}]]
                       )

    #opacidad
    op = 0.75


    #Traces 
    
    #top estaciones
    
    for i in range(np.size(max_est_1)):
        
        #query for data of each station
        datos_temp = d_semanal.loc[d_semanal['codigoSerial'] == max_est_1[i]]


        # Add traces

        fig.add_trace(go.Bar(x=datos_temp['Semana']+1, y=datos_temp['pm25'],
                             name='Estación ' + str(int(max_est_1[i])),
                             legendgroup = 'Estación ' + str(int(max_est_1[i])),
                             opacity= op,
                             marker_color = colors[i]
                            ),
                      row=1, col=1)



    for i in range(np.size(max_est_2)):
        
        showlegend = False
        color = colors[i]
        
        if np.isin(max_est_2[i], max_est_1) != True:
            showlegend = True
            color = colors[-1]
        
        #query for data of each station
        datos_temp = d_semanal.loc[d_semanal['codigoSerial'] == max_est_2[i]]

        fig.add_trace(go.Bar(x=datos_temp['Semana']+1, y=datos_temp['pm10'],
                             name='Estación ' + str(int(max_est_2[i])),
                             legendgroup = 'Estación ' + str(int(max_est_2[i])),
                             opacity= op,
                             marker_color = color,
                             showlegend=showlegend
                            ),
                     row=2, col=1)   
        
    #Total
    
    d_temp = datos.groupby(by = datos['Semana']).mean()[['pm25', 'pm10']]
    d_temp = d_temp.reset_index()

    fig.add_trace(go.Bar(x=d_temp['Semana']+1, y=d_temp['pm25'],
                         name='Promedio total ',
                         opacity= op,
                         legendgroup = 'Promedio total ',
                         marker_color = colors[i+1]
                                ),
                  row=1, col=1)    

    fig.add_trace(go.Bar(x=d_temp['Semana']+1, y=d_temp['pm10'],
                         name='Promedio total ',
                         legendgroup = 'Promedio total ',
                         opacity= op,
                         marker_color = colors[i+1],
                         showlegend=False),
                  row=2, col=1)   
    
    # Porcentaje diario
    
    d_temp[['pm25','pm10']] = d_temp[['pm25','pm10']]/np.sum(d_temp[['pm25','pm10']])
    
    fig.add_trace(go.Scatter(x=d_temp['Semana']+1, y=d_temp['pm25'],
                             name = 'Porcentaje global',
                             legendgroup = 'Porcentaje global',
                             mode = 'lines+markers',
                             showlegend = True,
                             line = dict(color='black', width=1.5, dash = 'dash')),
                  row=1, col=1, secondary_y=True,)
    
    fig.add_trace(go.Scatter(x=d_temp['Semana']+1, y=d_temp['pm10'],
                             name = 'Porcentaje global',
                             legendgroup = 'Porcentaje global',
                             mode = 'lines+markers',
                             showlegend = False,
                             line = dict(color='black', width=1.5, dash = 'dash')),
                  row=2, col=1, secondary_y=True,)
       
    #Figure layout
    
    fig.update_yaxes(title_text= 'pm 2.5',  row=1, col=1, secondary_y=False )
    fig.update_yaxes(title_text= 'pm 10', row=2, col=1, secondary_y=False)
    
    fig.update_yaxes(tickformat= '%', row=1, col=1, secondary_y=True)
    fig.update_yaxes(tickformat= '%', row=2, col=1, secondary_y=True)


    dim = 1200

    fig.update_layout(height=9*dim/16, width=dim, title= 'Comparativo promedio diario por estación (Nov, 2019)',
                      title_font_size=20, 
                      font_size=12,
                      legend=dict(
                                    orientation="h",
                                    yanchor="bottom",
                                    y=1.0,
                                    xanchor="right",
                                    x=0.8
                                )
                     )
    fig.show()

    #Write HTML file
    fig.write_html("Datos_Estaciones_Noviembre_2019/comparativo_diario.html")

d_temp = fig_bar(max_est_25, max_est_10, datos)

## CONTINUO

In [210]:
def scatter_datos(max_est_1, max_est_2, datos):
    
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing = 0.01)

    op = 0.75

    s = 10

    w = 4

    
    #ADD TRACES 
    
    #TOP ESTACIONES

    ##1
    
    for i in range(np.size(max_est_1)):

        datos_temp = datos.loc[datos['codigoSerial'] == max_est_1[i]]


        # Add traces

        fig.add_trace(go.Scatter(x= datos_temp['Fecha_Hora'],y = datos_temp['pm25'],
                                 mode='markers',
                                 opacity= op,
                                 name = 'Estación ' + str(int(max_est_1[i])),
                                 legendgroup = 'Estación ' + str(int(max_est_1[i])),
                                 marker=dict(
                                     size=s,
                                     color = colors[i],)
                                ),
                      row=1, col=1)

    ##2
        
    for i in range(np.size(max_est_2)):
        
        showlegend = False
        color = colors[i]
        
        if np.isin(max_est_2[i], max_est_1) != True:
            showlegend = True
            color = colors[-1]
        
        #query for data of each station
        datos_temp = datos.loc[datos['codigoSerial'] == max_est_2[i]]

        fig.add_trace(go.Scatter(x= datos_temp['Fecha_Hora'], y = datos_temp['pm10'],
                                 mode='markers',
                                 opacity= op,
                                 name = 'Estación ' + str(int(max_est_2[i])),
                                 legendgroup = 'Estación ' + str(int(max_est_2[i])),
                                 showlegend = showlegend,
                                 marker=dict(
                                     size=s,
                                     color = color,)
                                ),
                      row=2, col=1)

    """
    # TOTAL DATOS
    
    datos_otras = datos.groupby(by=datos['Fecha_Hora']).mean()
    datos_otras = datos_otras.reset_index()



    fig.add_trace(go.Scatter(
                            x= datos_otras['Fecha_Hora'],
                            y = datos_otras['pm25'],
                            mode='markers',
                             opacity= op,
                            name = 'Promedio total',
                            legendgroup = 'Promedio total',
                            showlegend = True,
                            marker=dict(
                                size=s,
                                color = colors[i+1],
                            )),
                     row=1, col=1)



    fig.add_trace(go.Scatter(
                            x= datos_otras['Fecha_Hora'],
                            y = datos_otras['pm10'],
                            mode='markers',
                            opacity= op,
                            name = 'Promedio total',
                            legendgroup = 'Promedio total',
                            showlegend = False,
                            marker=dict(
                                size=s,
                                color = colors[i+1],
                            )),
                     row=2, col=1)

    """
    
    # MEDIA MÓVIL
    
    media = datos.groupby(by=datos['Fecha_Hora']).mean()
    media = media.reset_index()
    media['pm25']  = media['pm25'].ewm(span=30,adjust=False).mean()
    media['pm10']  = media['pm10'].ewm(span=30,adjust=False).mean()



    fig.add_trace(go.Scatter(x= media['Fecha_Hora'],y = media['pm25'],
                             mode='lines',
                             name = 'Media Móvil',
                             legendgroup = 'Media Móvil',
                             showlegend = True,
                             line = dict(color='black', width=w)),
                  row=1, col=1)



    fig.add_trace(go.Scatter(x= media['Fecha_Hora'],y = media['pm10'],
                             mode='lines',
                             name = 'Otras',
                             legendgroup = 'Media Móvil',
                             showlegend = False,
                             line = dict(color='black', width=w)),
                  row=2, col=1)

    
    
    # FIGURE LAYOUT
    
    
    dim = 1200
    
    fig.update_layout(height=9*dim/16, width=dim, title= 'Comparativo por estación (Nov, 2019)',
                      title_font_size=20, 
                      font_size=12,
                      legend=dict(
                          orientation="h",
                          yanchor="bottom",
                          y=1.0,
                          xanchor="right", 
                          x = 0.8)
                 
                 )
    
    fig.update_yaxes(title= 'pm 2.5',  row=1, col=1)
    fig.update_yaxes(title= 'pm 10', row=2, col=1)
    
    fig.show()
    
    
    #Write HTML file
    fig.write_html("Datos_Estaciones_Noviembre_2019/comparativo_estacion.html")
        
scatter_datos(max_est_25, max_est_10, datos)

## PORCENTUAL

In [212]:
def medias_moviles(max_est_1, max_est_2, datos):
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing = 0.01,
                        specs=[[{"secondary_y": True}],
                           [{"secondary_y": True}]]
                   )
    
    op = 0.85

    s = 10

    w = 4
    
    # TRACES
    
    # MEDIA MÓVIL
    
    media = datos.groupby(by=datos['Fecha_Hora']).mean()
    media = media.reset_index()
    media['pm25']  = media['pm25'].ewm(span=30,adjust=False).mean()
    media['pm10']  = media['pm10'].ewm(span=30,adjust=False).mean()


    fig.add_trace(go.Scatter(x= media['Fecha_Hora'],y = media['pm25'],
                             mode='lines',
                             name = 'Total',
                             legendgroup = 'Total',
                             showlegend = True,
                             line = dict(color='black', width=w)),
                  row=1, col=1)



    fig.add_trace(go.Scatter(x= media['Fecha_Hora'],y = media['pm10'],
                             mode='lines',
                             name = 'Total',
                             legendgroup = 'Total',
                             showlegend = False,
                             line = dict(color='black', width=w)),
                  row=2, col=1)

    
    #TOP ESTACIONES

    ##1
    
    for i in range(np.size(max_est_1)):
    
        datos_temp = datos.loc[datos['codigoSerial'] == max_est_1[i]]

        datos_temp.loc[:,'pm25']  = datos_temp.loc[:,'pm25'].ewm(span=40,adjust=False).mean()
        datos_temp.loc[:,'pm10']  = datos_temp.loc[:,'pm10'].ewm(span=40,adjust=False).mean()

        # Add traces

        fig.add_trace(go.Scatter(x= datos_temp['Fecha_Hora'],y = datos_temp['pm25'],
                                 opacity= op,
                                 name = 'Estación ' + str(int(max_est_1[i])),
                                 legendgroup = 'Estación ' + str(int(max_est_1[i])),
                                 line = dict(color=colors[i], width=w)),
                      row=1, col=1)

    ##2
    
    for i in range(np.size(max_est_2)):
        
        showlegend = False
        color = colors[i]
        
        if np.isin(max_est_2[i], max_est_1) != True:
            showlegend = True
            color = colors[-1]
        

        datos_temp = datos.loc[datos['codigoSerial'] == max_est_2[i]]

        datos_temp.loc[:,'pm25']  = datos_temp.loc[:,'pm25'].ewm(span=40,adjust=False).mean()
        datos_temp.loc[:,'pm10']  = datos_temp.loc[:,'pm10'].ewm(span=40,adjust=False).mean()
        
        fig.add_trace(go.Scatter(x= datos_temp['Fecha_Hora'],y = datos_temp['pm10'],                     
                                 opacity= op,
                                 name = 'Estación ' + str(int(max_est_2[i])),
                                 legendgroup = 'Estación ' + str(int(max_est_2[i])),
                                 showlegend = showlegend,
                                 line = dict(color=color, width=w)),
                      row=2, col=1)

    
    # UPDATE LAYOUT
    fig.update_yaxes(title_text= 'pm 2.5',  row=1, col=1, secondary_y=False )
    fig.update_yaxes(title_text= 'pm 10', row=2, col=1, secondary_y=False)
    
    fig.update_yaxes(tickformat= '%', row=1, col=1, secondary_y=True)
    fig.update_yaxes(tickformat= '%', row=2, col=1, secondary_y=True)


    dim = 1200

    fig.update_layout(height=9*dim/16, width=dim, title= 'Comparativo media móvil (Nov, 2019)',
                      title_font_size=20, 
                      font_size=12,
                      legend=dict(orientation="h",
                                  yanchor="bottom",
                                  y=1.0,
                                  xanchor="right",
                                  x=0.75
                                )
                     )
    
    fig.show()
    fig.write_html("Datos_Estaciones_Noviembre_2019/comparativo_media_móvil.html")

medias_moviles(max_est_25, max_est_10, datos)

## DIARIO

In [437]:
np.shape(d_diario)

(8784, 1)

In [438]:
d_diario['Mes'] = d_diario.index.month
d_diario['Dia'] = d_diario.index.day



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [439]:
d_mean = d_diario.groupby(by=[d_diario.index.month, d_diario.index.day]).mean()

In [440]:
d_mean = d_mean.rename(columns={'calidad_pm25':'mean_pm25'})

In [441]:
new_df = pd.merge(d_diario, d_mean,  how='right', on=['Mes','Dia'])

In [442]:
new_df.index = d_diario.index

In [434]:
new_df = new_df.between_time('00:00:00','5:00:00')
#new_df = new_df.drop('mean_pm25', axis=1)

In [474]:
X = np.array(new_df.iloc[:,:-1].T)
Y = np.array(new_df.iloc[:,-1])

In [475]:
X

array([[ 1. ,  2.3,  2.1, ...,  1. ,  1. ,  1. ],
       [ 1. ,  1. ,  1. , ..., 12. , 12. , 12. ],
       [ 1. ,  1. ,  1. , ..., 31. , 31. , 31. ]])

In [456]:
Y = Y/np.max(Y)

In [466]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X.T, Y, test_size=0.33, random_state=42)

In [457]:
from tensorflow import keras

In [467]:
model = tf.keras.models.Sequential([tf.keras.layers.Flatten(), 
                                    tf.keras.layers.Dense(128, activation=tf.nn.relu), 
                                    tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)])
model.compile(optimizer = tf.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])



In [471]:
# GRADED FUNCTION: train_happy_sad_model
def train_happy_sad_model():
    # Please write your code only where you are indicated.
    # please do not remove # model fitting inline comments.

    DESIRED_ACCURACY = 0.999

    class myCallback(tf.keras.callbacks.Callback):
         # Your Code
        def on_epoch_end(self, epoch, logs={}):
            if(logs.get('acc')>0.90):
                print("/nReached 99.9% accuracy so cancelling training!")
                self.model.stop_training = True
        
    callbacks = myCallback()
    
    # This Code Block should Define and Compile the Model. Please assume the images are 150 X 150 in your implementation.
    model = tf.keras.models.Sequential([tf.keras.layers.Flatten(), 
                                        tf.keras.layers.Dense(128, activation=tf.nn.relu), 
                                        tf.keras.layers.Dense(128, activation= tf.nn.relu),
                                        tf.keras.layers.Dropout(0.2),
                                        tf.keras.layers.Dense(10, activation=tf.nn.sigmoid)
                                       ])


    model.compile(optimizer = tf.optimizers.Adam(),
              loss = 'sparse_categorical_crossentropy',
              metrics=['acc'])
        


    # This code block should call model.fit_generator and train for
    # a number of epochs.
    # model fitting
    history = model.fit(X_train, y_train,
        steps_per_epoch=8,  
        epochs=10,
        verbose=1,
        callbacks = [callbacks]
    )
    
    # model fitting
    return history.history['acc'][-1]

In [472]:
train_happy_sad_model()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


0.0