In [1]:
import pandas as pd
import numpy as np
from graph_traffic.config import data_path

import matplotlib as mpl
import os

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

## Localizaciones

In [3]:
locs = pd.read_csv(f'{data_path}/01-raw/meteo/Estaciones_control_datos_meteorologicos.csv', sep=';', encoding="cp1252", usecols=['CÓDIGO_CORTO', 'LONGITUD', 'LATITUD'])\
    .dropna(subset=['CÓDIGO_CORTO'])
locs.columns = ['codigo_corto', 'longitud', 'latitud']
locs.head()

Unnamed: 0,codigo_corto,longitud,latitud
0,4.0,-3.712257,40.423882
1,8.0,-3.682316,40.421553
2,16.0,-3.639242,40.440046
3,18.0,-3.731836,40.394782
4,24.0,-3.747345,40.419358


In [49]:
locs.to_csv('files/estaciones_proc.csv', index=False)

In [3]:
cods_cortos = sorted(locs.codigo_corto.values)

## Datos meteo

In [4]:
hcols = [f"H{d:02}" for d in range(1, 25)]
vcols = [f"V{d:02}" for d in range(1, 25)]

In [5]:
files20 = [f'files/2020-meteo/{file}' for file in os.listdir('files/2020-meteo')]
files21 = [f'files/2021-meteo/{file}' for file in os.listdir('files/2021-meteo')]
for file in files20 + files21:
    df = pd.read_csv(file, sep=';')
    df['fecha'] = pd.to_datetime(dict(year=df.ANO, month=df.MES, day=df.DIA))
    magnitudes = {
        80: 'radiacion_ultravioleta',
        81: 'velocidad_viento',
        82: 'dir_viento',
        83: 'temperatura',
        86: 'humedad_relativa',
        87: 'presion_barometrica',
        88: 'radiacion_solar',
        89: 'precipitacion'
    }
    df['magnitud'] = df.MAGNITUD.apply(lambda x: magnitudes[x])
    del df['PROVINCIA'], df['MUNICIPIO'], df['PUNTO_MUESTREO'], df['ANO'], df['MES'], df['DIA'], df['MAGNITUD']
    df = df.rename(columns={'ESTACION': 'estacion'})
    fechas = sorted(df.fecha.unique())

    magnitudes = ['velocidad_viento', 'dir_viento', 'temperatura', 'humedad_relativa', 'presion_barometrica', 'radiacion_solar', 'precipitacion']
    magnitudes_x = [f'{m}_x' for m in magnitudes]
    magnitudes_y = [f'{m}_y' for m in magnitudes]
    dfn = pd.DataFrame([], columns=['estacion', 'fecha', 'hora'] + magnitudes_x + magnitudes_y)

    for estacion in cods_cortos:
        for fecha in fechas:
            dff = df[(df.fecha==fecha) & (df.estacion==estacion)]

            dfh = dff[['magnitud']+hcols].set_index('magnitud').transpose()
            dfh['hora'] = dfh.index.to_series().apply(lambda x: int(x[1:]))

            dfv = dff[['magnitud']+vcols].set_index('magnitud').transpose()
            dfv['hora'] = dfv.index.to_series().apply(lambda x: int(x[1:]))

            dfm = dfh.merge(dfv, on='hora')
            dfm['estacion'] = estacion
            dfm['fecha'] = fecha

            dfn = dfn.append(dfm)

    dfn[magnitudes_x] = np.where(dfn[magnitudes_y]=='V', dfn[magnitudes_x], np.nan)
    dfn = dfn.rename(columns=dict(zip(magnitudes_x, magnitudes)))
    for m in magnitudes_y:
        del dfn[m]

    dfn.loc[dfn.hora==24, 'fecha'] = dfn.loc[dfn.hora==24, 'fecha'] + pd.DateOffset(days=1)
    dfn.loc[dfn.hora==24, 'hora'] = 0
    dfn['hora'] = dfn.hora.apply(lambda x: f'{x:02}:00:00')
    dfn.fecha = pd.to_datetime(dfn.fecha.astype(str) + " " + dfn.hora)
    del dfn['hora']
    dfn.to_csv(f'files/procesados/por_mes/{file.split("/")[-1]}', index=False)

In [6]:
def transform_df(df):
    df.loc[df.temperatura<-25] = np.nan
    df.loc[df.humedad_relativa<0] = np.nan

    wv = df.pop('velocidad_viento')
    wd_rad = df.pop('dir_viento')*np.pi / 180
    df['vientox'] = wv*np.cos(wd_rad)
    df['vientoy'] = wv*np.sin(wd_rad)
    
    return df

In [21]:
df20 = pd.DataFrame([])
df21 = pd.DataFrame([])
for file in sorted(os.listdir('files/procesados/por_mes')):
    path = 'files/procesados/por_mes/' + file
    if file.endswith('20.csv'):
        df20 = df20.append(pd.read_csv(path))
    elif file.endswith('21.csv'):
        df21 = df21.append(pd.read_csv(path))
df20 = df20.sort_values('fecha')
df21 = df21.sort_values('fecha')

df20 = transform_df(df20)
df21 = transform_df(df21)

df = pd.concat([df20, df21], ignore_index=True).sort_values('fecha').dropna(subset=['estacion'])

estaciones = set(df.estacion.unique())

for est in estaciones:
    df[df.estacion==est].to_csv(f'files/procesados/anual/estacion-{est:.0f}.csv', index=False)

In [25]:
df = df[df.estacion==est].reset_index(drop=True)

In [26]:
df

Unnamed: 0,estacion,fecha,temperatura,humedad_relativa,presion_barometrica,radiacion_solar,precipitacion,vientox,vientoy
0,115.0,2020-01-01 01:00:00,3.1,91.0,,,,,
1,115.0,2020-01-01 02:00:00,2.5,91.0,,,,,
2,115.0,2020-01-01 03:00:00,2.1,91.0,,,,,
3,115.0,2020-01-01 04:00:00,1.7,90.0,,,,,
4,115.0,2020-01-01 05:00:00,1.3,91.0,,,,,
...,...,...,...,...,...,...,...,...,...
17539,115.0,2021-12-31 20:00:00,11.4,79.0,,,,,
17540,115.0,2021-12-31 21:00:00,9.8,81.0,,,,,
17541,115.0,2021-12-31 22:00:00,8.7,83.0,,,,,
17542,115.0,2021-12-31 23:00:00,7.6,86.0,,,,,


In [43]:
df = pd.concat([pd.read_csv(f'files/procesados/anual/{e}') for e in os.listdir('files/procesados/anual/')])
meteo_magn = ['temperatura', 'humedad_relativa', 'presion_barometrica',
              'radiacion_solar', 'precipitacion', 'vientox', 'vientoy']
for m in meteo_magn:
    df[f'period_no_change_{m}'] = df[m].groupby(
        ((df[m] != df[m].shift())).cumsum()
    ).transform('size')
pd.concat([
    df[f'period_no_change_{m}'].value_counts(normalize=True).sort_index().cumsum() for m in meteo_magn
], axis=1).head(30)

Unnamed: 0,period_no_change_temperatura,period_no_change_humedad_relativa,period_no_change_presion_barometrica,period_no_change_radiacion_solar,period_no_change_precipitacion,period_no_change_vientox,period_no_change_vientoy
1,0.93979,0.82297,0.727812,0.863291,0.647531,0.995716,0.995008
2,0.990643,0.942131,0.767927,0.867141,0.650995,0.99747,0.997165
3,0.997878,0.975977,0.806902,0.869495,0.653034,0.998049,0.997862
4,0.999132,0.98657,0.839963,0.871714,0.654595,0.998557,0.99838
5,0.999406,0.990461,0.870063,0.873643,0.656184,0.998766,0.998599
6,0.999458,0.99225,0.895845,0.875787,0.657618,0.998937,0.99877
7,0.999474,0.993156,0.916654,0.877767,0.659168,0.999167,0.999
8,,0.993769,0.934368,0.88808,0.660764,0.999395,0.999228
9,,0.994243,0.947924,0.903529,0.662205,0.999494,0.999327
10,,0.994835,0.960858,0.924575,0.66363,,


In [2]:
import pandas as pd
import os

In [42]:
path = "../processed/meteo/files_per_station/"
mdict = dict()
mmagns = ['temperatura', 'humedad_relativa', 'presion_barometrica',
              'radiacion_solar', 'precipitacion', 'vientox', 'vientoy']
for file in os.listdir(path):
    df = pd.read_csv(path + file)
    notnull = df[mmagns].notna().any()
    estacion = int(file.split('-')[1].split(".")[0])
    mdict[estacion] = notnull.to_dict()

In [51]:
locs.set_index('codigo_corto').merge(pd.DataFrame(mdict).transpose(), left_index=True, right_index=True).to_csv('files/estaciones_proc.csv', index_label='estacion')

In [50]:
locs.set_index('codigo_corto').merge(pd.DataFrame(mdict).transpose(), left_index=True, right_index=True).

Unnamed: 0,longitud,latitud,temperatura,humedad_relativa,presion_barometrica,radiacion_solar,precipitacion,vientox,vientoy
4.0,-3.712257,40.423882,True,False,False,False,False,False,False
8.0,-3.682316,40.421553,True,True,False,False,False,False,False
16.0,-3.639242,40.440046,False,True,False,False,False,False,False
18.0,-3.731836,40.394782,True,False,False,False,False,False,False
24.0,-3.747345,40.419358,True,True,True,True,True,True,True
35.0,-3.703166,40.419209,True,True,False,False,False,False,False
36.0,-3.64531,40.407952,True,True,False,False,False,False,False
38.0,-3.70713,40.445544,True,True,False,False,False,False,False
39.0,-3.711536,40.478232,False,True,False,False,True,False,False
54.0,-3.612139,40.373012,True,True,False,True,True,True,True


In [33]:
pd.DataFrame(mdict).transpose()

Unnamed: 0,temperatura,humedad_relativa,presion_barometrica,radiacion_solar,precipitacion,vientox,vientoy
114,True,True,False,False,False,False,False
112,True,True,False,False,False,False,False
107,True,True,True,True,True,True,True
8,True,True,False,False,False,False,False
58,True,True,False,False,False,False,False
103,True,True,True,True,True,True,True
4,True,False,False,False,False,False,False
36,True,True,False,False,False,False,False
38,True,True,False,False,False,False,False
104,False,False,False,False,False,True,True
