In [102]:
%matplotlib widget

In [103]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.dates as mdates

df =  pd.read_csv('dataset.csv')
df.sort_values(by = 'Date', inplace=True)
df.reset_index(inplace=True)
df.drop(labels = 'index',axis =1, inplace=True)
df['Data_Value']= df['Data_Value'].apply(lambda x:  x/10)

In [104]:
df_min = df[df['Element'] == 'TMIN']
df_min.drop(labels = 'Element', inplace=True, axis= 1)
df_min = df_min.groupby('Date').agg(min)

In [105]:
df_max = df[df['Element'] == 'TMAX']
df_max.drop(labels = 'Element', inplace=True, axis=1)
df_max = df_max.groupby('Date').agg(max)

In [106]:
df_max = df_max.merge(df_min, how = 'inner', left_index=True, right_index=True)

In [107]:
df_max.drop(labels=['ID_x','ID_y'], inplace=True, axis=1)
labels = ['T_max','T_min']
df_max.columns = labels

In [108]:
# make the index as datetime format
df_max.index = pd.to_datetime(df_max.index) 

In [109]:
df_lineplot=df_max.loc['2005':'2014'].copy(deep =True) #filters out everything after 2014
df_lineplot = df_lineplot.loc[~((df_lineplot.index.month == 2) & (df_lineplot.index.day == 29))] ##remove leap

In [110]:
df_lineplot.index = df_lineplot.index.map(lambda t: t.replace(year=1999))
df = df_lineplot
df['date']=pd.to_datetime(df.index)
df['year']=df['date'].dt.year
df['month']=df['date'].dt.month
df['day']=df['date'].dt.day
df_grouped=df.pivot_table(index=('month','day'),values=['T_max','T_min'],aggfunc=['max','min'])
df_grouped2 = pd.concat([df_grouped['max','T_max'], df_grouped['min','T_min']], axis=1)

In [111]:
series1 = df_grouped['max','T_max']
series1.reset_index(inplace=True,drop=True)
series1.index = pd.date_range(start='1999-01-01',end='1999-12-31',freq='D')
series2 = df_grouped['min','T_min']
series2.reset_index(inplace=True,drop=True)
series2.index = pd.date_range(start='1999-01-01',end='1999-12-31',freq='D')

In [112]:
plt.figure(figsize=(10,5))
plt.plot(series1,'-',linewidth=.5,color='red')
plt.plot(series2,'-', linewidth=.5, color='blue')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x1daa5f30880>]

In [113]:
plt.gca().fill_between(series1.index,series2,series1,alpha =.2,color = '#A7F158')
#specify x-axis (index in this case), and the two curves

<matplotlib.collections.PolyCollection at 0x1daa5f599d0>

In [114]:
plt.title('Record High and Low Temperature Observations')
plt.xticks(ticks = (pd.date_range(start='1999-01-01',end='1999-12-31',freq='3M')), labels=['31-jan','30-apr','31-jul','31-oct'])

([<matplotlib.axis.XTick at 0x1daa56263d0>,
  <matplotlib.axis.XTick at 0x1daa5626040>,
  <matplotlib.axis.XTick at 0x1daa606d850>,
  <matplotlib.axis.XTick at 0x1daa5f30af0>],
 [Text(10622.0, 0, '31-jan'),
  Text(10711.0, 0, '30-apr'),
  Text(10803.0, 0, '31-jul'),
  Text(10895.0, 0, '31-oct')])

In [115]:
plt.gca().margins(0.0) #stretch to the ends
plt.ylabel('Temperature $[°C]$')
plt.xlabel('Measurement Date')

Text(0.5, 31.652777777777764, 'Measurement Date')

In [116]:
df_2015 = df_max.loc['2015'].copy(deep = True)
df_2015.head()

Unnamed: 0_level_0,T_max,T_min
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01,1.1,-13.3
2015-01-02,3.9,-12.2
2015-01-03,3.9,-6.7
2015-01-04,4.4,-8.8
2015-01-05,2.8,-15.5


In [117]:
df_2015['date']=pd.to_datetime(df_2015.index)
df_2015['year']=df_2015['date'].dt.year
df_2015['month']=df_2015['date'].dt.month
df_2015['day']=df_2015['date'].dt.day
df_2015=df_2015.pivot_table(index=('month','day'),values=['T_max','T_min'],aggfunc=['max','min'])
df_2015 = pd.concat([df_2015['max','T_max'], df_2015['min','T_min']], axis=1)

In [118]:
df_2015 =df_2015.merge(df_grouped2, right_index=True,left_index=True)
df_2015.columns = ['max 15','min 15','max 05-14', 'min 05-14']

In [119]:
df_final = df_2015[(df_2015['max 15'] > df_2015['max 05-14']) | (df_2015['min 15'] < df_2015['min 05-14'])]
df_final.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,max 15,min 15,max 05-14,min 05-14
month,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5,2.8,-15.5,12.8,-15.0
1,11,0.6,-20.0,15.6,-18.3
2,3,-2.1,-23.8,8.9,-23.2
2,9,8.3,-11.6,7.8,-21.0
2,14,-2.7,-23.9,10.6,-21.7


In [120]:
rec_high_15 = df_final[df_final['max 15'] > df_final['max 05-14']]
rec_high_15 =  rec_high_15['max 15']
rec_high_15 = pd.concat({'1999': rec_high_15}, names=['Year'])
rec_high_15 =rec_high_15.to_frame()

In [121]:
rec_high_15['date']=(rec_high_15.index)
rec_high_15.reset_index(inplace=True)
rec_high_15.index = pd.to_datetime(rec_high_15[['Year','month','day']]) 

In [122]:
plt.plot(rec_high_15['max 15'],'1',color ='black',markersize =7)

[<matplotlib.lines.Line2D at 0x1daa5f41fa0>]

In [123]:
rec_low_15 = df_final[df_final['min 15'] < df_final['min 05-14']]
rec_low_15 = rec_low_15['min 15']
rec_low_15 = pd.concat({'1999': rec_high_15}, names=['year'])
rec_low_15.reset_index(inplace=True)

In [124]:
rec_low_15.index = pd.to_datetime(rec_low_15[['Year','month','day']]) 
plt.plot(rec_low_15['min 15'],'1',color ='black',markersize =7)

KeyError: 'min 15'