In [1]:
import pandas as pd

In [33]:
weather_data = pd.read_csv("./hourlyWeather_final.csv")

In [34]:
weather_data['datetime'] = pd.to_datetime(weather_data['datetime'])
# create new date column to aggregate
weather_data["date"] = weather_data["datetime"].dt.date

In [35]:
weather_data.columns

Index(['Unnamed: 0', 'datetime', 'temperature_2m', 'relative_humidity_2m',
       'dew_point_2m', 'apparent_temperature', 'precipitation', 'rain',
       'weather_code', 'pressure_msl', 'surface_pressure', 'cloud_cover',
       'et0_fao_evapotranspiration', 'vapour_pressure_deficit',
       'wind_speed_10m', 'wind_speed_100m', 'wind_direction_10m',
       'wind_direction_100m', 'wind_gusts_10m', 'soil_temperature_0_to_7cm',
       'soil_temperature_7_to_28cm', 'soil_moisture_0_to_7cm',
       'soil_moisture_7_to_28cm', 'shortwave_radiation', 'direct_radiation',
       'diffuse_radiation', 'direct_normal_irradiance',
       'global_tilted_irradiance', 'terrestrial_radiation', 'date'],
      dtype='object')

In [36]:
#create new df, and aggregate temperature_2m with different functions
daily_weather = pd.DataFrame()
daily_weather = weather_data.groupby('date')['temperature_2m'].agg(['mean', 'min', 'max']).reset_index()
daily_weather

Unnamed: 0,date,mean,min,max
0,2020-01-01,1.000000,-2.3,7.5
1,2020-01-02,0.291667,-3.1,6.7
2,2020-01-03,1.316667,-2.2,7.7
3,2020-01-04,3.954167,-2.4,9.0
4,2020-01-05,5.483333,1.3,10.7
...,...,...,...,...
1456,2023-12-27,7.312500,4.5,10.9
1457,2023-12-28,7.525000,5.6,9.4
1458,2023-12-29,7.633333,6.0,9.9
1459,2023-12-30,6.329167,1.7,10.0


In [37]:
#rename for clarity
daily_weather.rename(columns = {'mean':'temperature_2m_average', 'min':'temperature_2m_min', 'max':'temperature_2m_max'}, inplace = True)
daily_weather

Unnamed: 0,date,temperature_2m_average,temperature_2m_min,temperature_2m_max
0,2020-01-01,1.000000,-2.3,7.5
1,2020-01-02,0.291667,-3.1,6.7
2,2020-01-03,1.316667,-2.2,7.7
3,2020-01-04,3.954167,-2.4,9.0
4,2020-01-05,5.483333,1.3,10.7
...,...,...,...,...
1456,2023-12-27,7.312500,4.5,10.9
1457,2023-12-28,7.525000,5.6,9.4
1458,2023-12-29,7.633333,6.0,9.9
1459,2023-12-30,6.329167,1.7,10.0


In [38]:
#now we set up a dict with the desired aggregation functions for each column except temperature_2m
agg_cols = {'apparent_temperature': 'mean',
    'relative_humidity_2m': 'mean',
    'dew_point_2m': 'mean',
    'precipitation': 'sum',
    'rain': 'sum',
    'weather_code': 'max',
    'pressure_msl': 'mean',
    'surface_pressure': 'mean',
    'cloud_cover': 'mean',
    'et0_fao_evapotranspiration': 'sum',
    'vapour_pressure_deficit': 'mean',
    'wind_speed_10m': 'max',
    'wind_speed_100m': 'max',
    'wind_direction_10m': 'mean',
    'wind_direction_100m': 'mean',
    'wind_gusts_10m': 'max',
    'soil_temperature_0_to_7cm': 'mean',
    'soil_temperature_7_to_28cm': 'mean',
    'soil_moisture_0_to_7cm': 'mean',
    'soil_moisture_7_to_28cm': 'mean',
    'shortwave_radiation': 'mean',
    'direct_radiation': 'mean',
    'diffuse_radiation': 'mean',
    'direct_normal_irradiance': 'mean',
    'global_tilted_irradiance': 'mean',
    'terrestrial_radiation': 'mean'
}

In [39]:
#Define function to aggregate
def aggregate_cols(data,old_data,agg_cols):
    for col in agg_cols.keys():
        data[col+"_"+agg_cols[col]] = old_data.groupby('date')[col].agg(agg_cols[col]).reset_index()[col]

In [40]:
aggregate_cols(daily_weather,weather_data,agg_cols)

In [41]:
display(daily_weather)

Unnamed: 0,date,temperature_2m_average,temperature_2m_min,temperature_2m_max,apparent_temperature_mean,relative_humidity_2m_mean,dew_point_2m_mean,precipitation_sum,rain_sum,weather_code_max,...,soil_temperature_0_to_7cm_mean,soil_temperature_7_to_28cm_mean,soil_moisture_0_to_7cm_mean,soil_moisture_7_to_28cm_mean,shortwave_radiation_mean,direct_radiation_mean,diffuse_radiation_mean,direct_normal_irradiance_mean,global_tilted_irradiance_mean,terrestrial_radiation_mean
0,2020-01-01,1.000000,-2.3,7.5,-2.447826,83.739130,-1.547826,0.0,0.0,0.0,...,0.769565,3.030435,0.366696,0.375130,80.347826,63.782609,16.565217,230.830435,80.347826,127.117391
1,2020-01-02,0.291667,-3.1,6.7,-3.150000,78.833333,-3.129167,0.0,0.0,0.0,...,-0.770833,1.425000,0.371792,0.378208,75.791667,59.625000,16.166667,214.454167,75.791667,122.454167
2,2020-01-03,1.316667,-2.2,7.7,-2.000000,82.291667,-1.491667,0.0,0.0,2.0,...,-0.237500,1.470833,0.376958,0.379708,59.750000,28.041667,31.708333,96.870833,59.750000,123.145833
3,2020-01-04,3.954167,-2.4,9.0,0.358333,73.833333,-0.620833,0.0,0.0,1.0,...,2.283333,2.570833,0.380708,0.383458,74.208333,56.291667,17.916667,202.579167,74.208333,123.887500
4,2020-01-05,5.483333,1.3,10.7,1.920833,64.708333,-1.025000,0.0,0.0,0.0,...,3.750000,3.775000,0.375708,0.381083,77.916667,61.541667,16.375000,218.579167,77.916667,124.687500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1456,2023-12-27,7.312500,4.5,10.9,5.520833,89.458333,5.625000,0.0,0.0,3.0,...,7.441667,7.820833,0.335750,0.346458,37.625000,11.166667,26.458333,44.158333,37.625000,119.450000
1457,2023-12-28,7.525000,5.6,9.4,5.800000,89.291667,5.879167,0.5,0.5,51.0,...,7.937500,8.179167,0.346208,0.346333,20.166667,2.958333,17.208333,10.320833,20.166667,119.804167
1458,2023-12-29,7.633333,6.0,9.9,5.841667,84.750000,5.191667,0.0,0.0,3.0,...,8.062500,8.250000,0.346625,0.347542,24.083333,3.333333,20.750000,9.720833,24.083333,120.237500
1459,2023-12-30,6.329167,1.7,10.0,4.241667,86.833333,4.200000,0.1,0.1,51.0,...,7.600000,8.195833,0.343208,0.347500,64.041667,43.500000,20.541667,154.312500,64.041667,120.716667


In [42]:
daily_weather.columns

Index(['date', 'temperature_2m_average', 'temperature_2m_min',
       'temperature_2m_max', 'apparent_temperature_mean',
       'relative_humidity_2m_mean', 'dew_point_2m_mean', 'precipitation_sum',
       'rain_sum', 'weather_code_max', 'pressure_msl_mean',
       'surface_pressure_mean', 'cloud_cover_mean',
       'et0_fao_evapotranspiration_sum', 'vapour_pressure_deficit_mean',
       'wind_speed_10m_max', 'wind_speed_100m_max', 'wind_direction_10m_mean',
       'wind_direction_100m_mean', 'wind_gusts_10m_max',
       'soil_temperature_0_to_7cm_mean', 'soil_temperature_7_to_28cm_mean',
       'soil_moisture_0_to_7cm_mean', 'soil_moisture_7_to_28cm_mean',
       'shortwave_radiation_mean', 'direct_radiation_mean',
       'diffuse_radiation_mean', 'direct_normal_irradiance_mean',
       'global_tilted_irradiance_mean', 'terrestrial_radiation_mean'],
      dtype='object')

All done

In [43]:
#daily_weather.to_csv("dailyWeather.csv", index=False)