In [None]:
import pandas as pd
import numpy as np

# Download daily weather information from Meteostat

NOTE: please, insert the identifier of the desired station. Identifiers can be found at https://meteostat.net/.

EXAMPLE: if we want to download historical daily data concerning the city of Paris, we can access the station with ID 07156,
         which corresponds to the Paris-Montsouris weather station.

In [None]:
# Identifier of the station of interest.
station = '07156'

# These are the columns expected to be found in a Meteostat daily weather CSV
# (see also https://dev.meteostat.net/bulk/daily.html#endpoints).
list_columns = ['date', 'tavg', 'tmin', 'tmax', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun']

meteo = pd.read_csv(f"https://bulk.meteostat.net/v2/daily/{station}.csv.gz", names = list_columns)

### Remove weather records that occur before a given cutoff_date

In [None]:
cutoff_date = '2020-01-01'
meteo = meteo.loc[(meteo['date'] >= cutoff_date), :]
display(meteo)
display(meteo.info())

### Select the columns of interest

In [None]:
meteo = meteo.loc[:, ['date', 'tavg', 'prcp']] # Select the columns of interest.
meteo['tavg'] = meteo['tavg'].interpolate() # Interpolate the avg temperature when it's missing.
meteo['prcp'] = meteo['prcp'].fillna(0) # Assume it didn't rain when the precipitation value is missing.

display(meteo.info())
display(meteo)

### Determine the overall weather conditions based on the precipitation (in mm) that has fallen in a given day

In [None]:
meteo['conditions'] = 'violent rain'
meteo.loc[:, 'conditions'] = 'heavy rain'
meteo.loc[meteo['prcp'] < 7.6, 'conditions'] = 'moderate rain'
meteo.loc[meteo['prcp'] < 2.5, 'conditions'] = 'light rain'
meteo.loc[meteo['prcp'] == 0, 'conditions'] = 'sunny'

display(meteo['conditions'].value_counts())
display(meteo.info())
display(meteo)

### Prepare the processed dataframe for storage

In [None]:
name_file_output = "weather_paris.parquet"

meteo.drop(columns = 'prcp', inplace = True)
meteo.rename(columns = {'date' : 'DATE', 'tavg' : 'TAVG_C', 'conditions' : 'DESCRIPTION'}, inplace = True)
meteo.reset_index(drop = True, inplace = True)
display(meteo.info())
display(meteo)

meteo.to_parquet(name_file_output)