In [10]:
import xarray as xr
import pandas as pd
from datetime import datetime

year = 2020
month = 12
days = list(range(5,11))

df_list = []
for day in days:
    file = f'data/{year}{month}{day:02}_tmin_15days_forecast.nc'

    ds = xr.open_dataset(file)
    df = ds.to_dataframe()

    lyon_area_df = df[df.index.get_level_values('lat').isin([45.0, 46.0, 47.0])]
    lyon_area_df = lyon_area_df[lyon_area_df.index.get_level_values('lon').isin([4.0, 5.0, 6.0])]
    
    #temp_df = lyon_area_df.droplevel([0,1], axis="index")
    temp_df = lyon_area_df.reset_index()
    temp_df.drop('stepType', axis=1, inplace=True)
    print(temp_df.head())
    prediction_date = datetime(year, month, day)
    temp_df['prediction_date'] = prediction_date
    df_list.append(temp_df)

aggregated_df = pd.concat(df_list, axis=0)
aggregated_df.head()

# types conversion
numeric_colums = ['tmin', 'startStep', 'endStep']
aggregated_df[numeric_colums] = aggregated_df[numeric_colums].apply(pd.to_numeric)

# convert to datetime
aggregated_df['time'] = aggregated_df['time'].apply(
    pd.to_datetime, format='%Y-%m-%d'
)

print(aggregated_df.size)

# save intermediate results
aggregated_df.to_csv('stage_data/all_days.csv', sep=',', encoding='utf-8',index=False)

    lat  lon       time        tmin  startStep  endStep
0  45.0  4.0 2020-12-05  267.979614        0.0     24.0
1  45.0  4.0 2020-12-06  269.063660       24.0     48.0
2  45.0  4.0 2020-12-07  269.149109       48.0     72.0
3  45.0  4.0 2020-12-08  268.851990       72.0     96.0
4  45.0  4.0 2020-12-09  266.737610       96.0    120.0
    lat  lon       time        tmin  startStep  endStep
0  45.0  4.0 2020-12-06  268.945221        0.0     24.0
1  45.0  4.0 2020-12-07  268.537292       24.0     48.0
2  45.0  4.0 2020-12-08  269.277618       48.0     72.0
3  45.0  4.0 2020-12-09  268.295929       72.0     96.0
4  45.0  4.0 2020-12-10  268.642853       96.0    120.0
    lat  lon       time        tmin  startStep  endStep
0  45.0  4.0 2020-12-07  269.501678        0.0     24.0
1  45.0  4.0 2020-12-08  269.201233       24.0     48.0
2  45.0  4.0 2020-12-09  268.853973       48.0     72.0
3  45.0  4.0 2020-12-10  268.922058       72.0     96.0
4  45.0  4.0 2020-12-11  270.859314       96.0  

In [11]:
# get results for specific date
filtered_results = aggregated_df[aggregated_df['time'] == '2020-12-10']
print(filtered_results.size)
filtered_results.head()

378


Unnamed: 0,lat,lon,time,tmin,startStep,endStep,prediction_date
5,45.0,4.0,2020-12-10,267.317566,120.0,144.0,2020-12-05
20,45.0,5.0,2020-12-10,272.136536,120.0,144.0,2020-12-05
35,45.0,6.0,2020-12-10,262.137573,120.0,144.0,2020-12-05
50,46.0,4.0,2020-12-10,270.147583,120.0,144.0,2020-12-05
65,46.0,5.0,2020-12-10,271.356537,120.0,144.0,2020-12-05


In [12]:
prediction_dates = filtered_results['prediction_date'].unique()
for prediction_date in prediction_dates:
    day_predictions = filtered_results[filtered_results['prediction_date'] == prediction_date]
    min_temperature_predicted = day_predictions['tmin'].min()
    print(f'Miminum temperature expected for day {prediction_date} is {min_temperature_predicted}')

Miminum temperature expected for day 2020-12-05T00:00:00.000000000 is 262.1375732421875
Miminum temperature expected for day 2020-12-06T00:00:00.000000000 is 263.452880859375
Miminum temperature expected for day 2020-12-07T00:00:00.000000000 is 264.1320495605469
Miminum temperature expected for day 2020-12-08T00:00:00.000000000 is 263.95367431640625
Miminum temperature expected for day 2020-12-09T00:00:00.000000000 is 263.8420715332031
Miminum temperature expected for day 2020-12-10T00:00:00.000000000 is 263.6176452636719


In [20]:
filtered_results.head()

Unnamed: 0,lat,lon,time,tmin,startStep,endStep,prediction_date
5,45.0,4.0,2020-12-10,267.317566,120.0,144.0,2020-12-05
20,45.0,5.0,2020-12-10,272.136536,120.0,144.0,2020-12-05
35,45.0,6.0,2020-12-10,262.137573,120.0,144.0,2020-12-05
50,46.0,4.0,2020-12-10,270.147583,120.0,144.0,2020-12-05
65,46.0,5.0,2020-12-10,271.356537,120.0,144.0,2020-12-05


In [24]:
print(filtered_results['tmin'])

5      267.317566
20     272.136536
35     262.137573
50     270.147583
65     271.356537
80     268.266541
95     270.236511
110    270.516541
125    267.907593
4      268.642853
19     272.632721
34     263.452881
49     271.332855
64     272.432861
79     269.612854
94     272.272858
109    272.232727
124    269.432861
3      268.922058
18     272.482056
33     264.132050
48     271.822052
63     272.402039
78     269.332062
93     272.499695
108    272.282043
123    269.262054
2      268.623657
17     272.023682
32     263.953674
47     271.683655
62     271.477356
77     269.003662
92     270.817352
107    271.933655
122    268.813660
1      268.602081
16     272.162079
31     263.842072
46     272.952087
61     272.142090
76     268.222076
91     273.483063
106    272.103058
121    269.192078
0      268.397644
15     271.967651
30     263.617645
45     272.447662
60     271.803833
75     267.947662
90     271.653809
105    271.263824
120    268.927643
Name: tmin, dtype: float32


In [26]:
import plotly.express as px
import plotly.graph_objects as go


fig = px.scatter(filtered_results, x="prediction_date", y="tmin", color='tmin')
fig.show()

In [35]:
import numpy as np

date_grouped = filtered_results.groupby('prediction_date')['tmin'].agg(['mean', 'std', 'count'])
date_grouped['ci'] = 1.96 * date_grouped['std'] / np.sqrt(date_grouped['count']) # confindence interval
date_grouped['ci_lower'] = date_grouped['mean'] - date_grouped['ci']
date_grouped['ci_upper'] = date_grouped['mean'] + date_grouped['ci']
date_grouped.head()

                       mean       std  count
prediction_date                             
2020-12-05       268.891449  3.001784      9
2020-12-06       270.227264  2.947260      9
2020-12-07       270.348450  2.788350      9
2020-12-08       269.814484  2.599576      9
2020-12-09       270.300079  3.110670      9


Unnamed: 0_level_0,mean,std,count,ci,ci_lower,ci_upper
prediction_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-05,268.891449,3.001784,9,1.961166,266.930283,270.852615
2020-12-06,270.227264,2.94726,9,1.925543,268.301721,272.152808
2020-12-07,270.34845,2.78835,9,1.821722,268.526728,272.170172
2020-12-08,269.814484,2.599576,9,1.69839,268.116094,271.512873
2020-12-09,270.300079,3.11067,9,2.032304,268.267775,272.332384


In [None]:
fig, ax = plt.subplots()
x = df_grouped['pickup_date']
ax.plot(x, df_grouped['mean'])
ax.fill_between(
    x, df_grouped['ci_lower'], df_grouped['ci_upper'], color='b', alpha=.15)
ax.set_ylim(ymin=0)