In [178]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [179]:
df = pd.read_csv("./UH_2023/DATOS_METEO.txt", sep = "|")

In [180]:
columns_to_drop = ["precip2Day", "precip3Day", "precip7Day", "precipMtd", "precipYtd", "pressureMeanSeaLevel", "snow2Day", "snow3Day", "snow7Day", "snowMtd", "snowSeason", "snowYtd", "windDirection", "windGust", "precip24Hour", "precip6Hour", "snow24Hour", "snow6Hour", "temperatureChange24Hour", "temperatureMax24Hour", "temperatureMin24Hour"]

In [181]:
df2 = df.drop(columns_to_drop, axis=1)

In [182]:
cols_to_modify = list(df2.isna().sum()[df2.isna().sum() != 0].index)

In [183]:
df2[cols_to_modify] = (df2[cols_to_modify].ffill()+df2[cols_to_modify].bfill())/2

In [184]:
df2.validTimeUtc = pd.to_datetime(df2.validTimeUtc)

### GroupBy amb day

In [185]:
df2_mean = df2.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).mean()
df2_max = df2.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).max()
df2_min = df2.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).min()

In [186]:
cols_mean = [i + "DayAvg" for i in df2_mean.columns]
df2_mean.columns = cols_mean
cols_max = [i + "DayMax" for i in df2_max.columns]
df2_max.columns = cols_max
cols_min = [i + "DayMin" for i in df2_min.columns]
df2_min.columns = cols_min

### GroupyBy amb daytime

In [187]:
df2_daytime = df2[(7 < df2.validTimeUtc.dt.hour) & (df2.validTimeUtc.dt.hour < 19)]

df2_mean_daytime = df2.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).mean()
df2_max_daytime = df2.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).max()
df2_min_daytime = df2.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).min()

In [188]:
# Periodes normals
dic_periodes = {"Day": (0, 24), "Daytime": (7, 19), "Morning": (7, 13), "Afternoon": (13, 19)}
llista_df = {}
for k,(i,j) in dic_periodes.items():
    df_aux = df2[(i < df2.validTimeUtc.dt.hour) & (df2.validTimeUtc.dt.hour < j)]
    df2_mean = df_aux.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).mean()
    df2_max = df_aux.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).max()
    df2_min = df_aux.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).min()
    
    cols_mean = [c + k + "Avg" for c in df2_mean.columns]
    df2_mean.columns = cols_mean
    cols_max = [c + k + "Max" for c in df2_max.columns]
    df2_max.columns = cols_max
    cols_min = [c + k + "Min" for c in df2_min.columns]
    df2_min.columns = cols_min
    llista_df[k] = [df2_mean, df2_max, df2_min]

In [189]:
# Periodes raros
dic_periodes = {"Nighttime": (19, 7), "Evening": (19, 1), "Overnight": (1, 7)}
for k,(i,j) in dic_periodes.items():
    df_aux = df2[(i < df2.validTimeUtc.dt.hour) | (df2.validTimeUtc.dt.hour < j)]
    df_aux.loc[:, "validTimeUtc"] = df_aux.loc[:, "validTimeUtc"] - pd.Timedelta(hours=10)
    df2_mean = df_aux.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).mean()
    df2_max = df_aux.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).max()
    df2_min = df_aux.groupby([pd.Grouper(key="validTimeUtc", freq="1D"), pd.Grouper(key = 'ID_ESTACION')]).min()
    
    cols_mean = [c + k + "Avg" for c in df2_mean.columns]
    df2_mean.columns = cols_mean
    cols_max = [c + k + "Max" for c in df2_max.columns]
    df2_max.columns = cols_max
    cols_min = [c + k + "Min" for c in df2_min.columns]
    df2_min.columns = cols_min
    llista_df[k] = [df2_mean, df2_max, df2_min]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aux.loc[:, "validTimeUtc"] = df_aux.loc[:, "validTimeUtc"] - pd.Timedelta(hours=10)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_aux.loc[:, "validTimeUtc"] = df_aux.loc[:, "validTimeUtc"] - pd.Timedelta(hours=10)


In [190]:
first_df = llista_df["Day"][0]
llista = [j for j in [i for i in llista_df.values()]]
df_tractats = [item for sublist in llista for item in sublist]
df_tractats = df_tractats[1:]

In [191]:
for ds in df_tractats:
    first_df = pd.merge(first_df, ds, left_index=True, right_index=True)

In [192]:
first_df = first_df.reset_index()

In [193]:
first_df

Unnamed: 0,validTimeUtc,ID_ESTACION,precip1HourDayAvg,pressureChangeDayAvg,relativeHumidityDayAvg,snow1HourDayAvg,temperatureDayAvg,temperatureDewPointDayAvg,temperatureFeelsLikeDayAvg,uvIndexDayAvg,...,precip1HourOvernightMin,pressureChangeOvernightMin,relativeHumidityOvernightMin,snow1HourOvernightMin,temperatureOvernightMin,temperatureDewPointOvernightMin,temperatureFeelsLikeOvernightMin,uvIndexOvernightMin,visibilityOvernightMin,windSpeedOvernightMin
0,2015-06-30,0,0.0,-0.321739,44.700000,0.0,26.534783,12.095652,26.565217,2.521739,...,0.0,-1.5,21.6,0.0,18.3,10.4,18.3,0.0,16.09,3.6
1,2015-06-30,1,0.0,-0.308696,38.773913,0.0,26.834783,10.226087,26.834783,2.652174,...,0.0,-1.2,19.3,0.0,18.3,8.6,18.3,0.0,16.09,4.7
2,2015-06-30,2,0.0,-0.330435,44.865217,0.0,26.821739,12.252174,26.839130,2.565217,...,0.0,-1.0,21.6,0.0,17.5,10.5,17.5,0.0,16.09,3.6
3,2015-06-30,3,0.0,-0.321739,41.652174,0.0,27.213043,11.456522,27.213043,2.565217,...,0.0,-1.3,19.7,0.0,18.5,9.5,18.5,0.0,16.09,4.7
4,2015-06-30,4,0.0,-0.308696,40.960870,0.0,26.852174,10.904348,26.865217,2.565217,...,0.0,-1.2,20.0,0.0,18.1,9.5,18.1,0.0,16.09,4.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51135,2022-06-30,15,0.0,0.230435,53.113043,0.0,23.769565,12.200000,23.791304,2.565217,...,0.0,-1.4,25.7,0.0,18.7,8.3,18.7,0.0,9.06,3.6
51136,2022-06-30,16,0.0,0.239130,52.556522,0.0,24.182609,12.626087,24.200000,2.565217,...,0.0,-1.2,25.9,0.0,19.6,8.9,19.6,0.0,8.21,2.5
51137,2022-06-30,17,0.0,0.226087,48.439130,0.0,23.582609,10.091304,23.586957,2.652174,...,0.0,-1.7,21.3,0.0,17.9,5.3,17.9,0.0,11.90,4.7
51138,2022-06-30,18,0.0,0.304348,52.656522,0.0,24.195652,13.030435,24.239130,2.521739,...,0.0,-1.4,30.2,0.0,19.5,10.7,19.5,0.0,8.05,4.3
