In [35]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [36]:
# Load demand data
load_df = pd.read_csv('ieso_ga_master_dataset_allWeather_updated2020.csv')
load_df.rename(columns={'timestamp_eastern': 'ts'}, inplace=True)
load_df['ts'] = pd.to_datetime(load_df['ts'])
# load_df.head(48)

In [37]:
# Load forecasts
forecast2_df = pd.read_csv('ga_forecasts_top_2.csv', index_col=0)
forecast2_df['ts'] = pd.to_datetime(forecast2_df['ts'])
forecast2_df['ts_future'] = pd.to_datetime(forecast2_df['ts_future'])
# forecast2_df.head(48)

In [38]:
# Look at the forecasts at 10:00 am on each day
mask = forecast2_df.ts.dt.time == datetime.time(10, 0)
forecast2_df = forecast2_df[mask]
forecast2_df

Unnamed: 0,forecast,ts,ts_future,horizon_hr
216,0.0,2016-05-08 10:00:00,2016-05-08 11:00:00,1.0
217,0.0,2016-05-08 10:00:00,2016-05-08 12:00:00,2.0
218,0.0,2016-05-08 10:00:00,2016-05-08 13:00:00,3.0
219,0.0,2016-05-08 10:00:00,2016-05-08 14:00:00,4.0
220,0.0,2016-05-08 10:00:00,2016-05-08 15:00:00,5.0
...,...,...,...,...
171331,0.0,2020-02-29 10:00:00,2020-03-01 06:00:00,20.0
171332,0.0,2020-02-29 10:00:00,2020-03-01 07:00:00,21.0
171333,0.0,2020-02-29 10:00:00,2020-03-01 08:00:00,22.0
171334,0.0,2020-02-29 10:00:00,2020-03-01 09:00:00,23.0


In [39]:
# Find out the highest peak for each day in a season
load_df['rankings_per_day'] = load_df.groupby(['season', load_df.ts.dt.date]).adjusted_demand_MW.rank(ascending=False)
# load_df.head(48)


In [40]:
# Filter out the max value in each day and store as peaks_df
mask = load_df['rankings_per_day']==1.0
peaks_df = load_df[mask]
# peaks_df

In [41]:
# Filter out the top 1, 5, 10, 20 peaks in each season
# Reset index
peaks_df.reset_index(drop=True, inplace=True)

# Group by season and rank by adjusted_demand_MW
peaks_df['rankings_per_season'] = peaks_df.groupby(['season']).adjusted_demand_MW.rank(ascending=False)
peaks_df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,adjusted_demand_MW,demand_MW,hour_ending_eastern,season,ts,timestamp_utc,altitude_Toronto,apparentTemperature_Toronto,azimuth_Toronto,cloudCover_Toronto,...,summary_Milton,temperature_Milton,uvIndex_Milton,visibility_Milton,windBearing_Milton,windGust_Milton,windSpeed_Milton,population_Milton,rankings_per_day,rankings_per_season
0,16100.0,16206.0,20,2013-2014,2013-05-01 20:00:00,2013-05-02 01:00:00,0.0,18.33,0.0,0.07,...,Clear,14.35,0.0,16.093,89.0,1.9,1.84,101715.0,1.0,303.0
1,16032.0,16460.0,21,2013-2014,2013-05-02 21:00:00,2013-05-03 02:00:00,0.0,16.82,0.0,0.02,...,Clear,13.06,0.0,16.093,72.0,2.47,2.04,101715.0,1.0,308.0
2,15428.0,15780.0,20,2013-2014,2013-05-03 20:00:00,2013-05-04 01:00:00,0.0,17.8,0.0,0.02,...,Clear,13.94,0.0,16.093,58.0,4.95,2.46,101715.0,1.0,335.0
3,14191.0,14473.0,21,2013-2014,2013-05-04 21:00:00,2013-05-05 02:00:00,0.0,16.43,0.0,0.01,...,Clear,11.99,0.0,16.093,54.0,2.02,1.22,101715.0,1.0,362.0
4,14713.0,14944.0,21,2013-2014,2013-05-05 21:00:00,2013-05-06 02:00:00,0.0,18.52,0.0,0.3,...,Clear,13.08,0.0,16.093,68.0,1.39,1.39,101715.0,1.0,352.0
5,16157.0,16481.0,20,2013-2014,2013-05-06 20:00:00,2013-05-07 01:00:00,0.0,15.81,0.0,0.05,...,Partly Cloudy,13.43,0.0,16.093,65.0,1.17,1.01,101715.0,1.0,300.0
6,16275.0,16648.0,21,2013-2014,2013-05-07 21:00:00,2013-05-08 02:00:00,0.0,18.49,0.0,0.47,...,Partly Cloudy,14.89,0.0,16.093,52.0,0.93,0.9,101715.0,1.0,295.0
7,16324.0,16694.0,20,2013-2014,2013-05-08 20:00:00,2013-05-09 01:00:00,0.0,16.84,0.0,0.98,...,Mostly Cloudy,14.06,0.0,15.875,81.0,1.1,0.88,101715.0,1.0,291.0
8,16239.0,16611.0,21,2013-2014,2013-05-09 21:00:00,2013-05-10 02:00:00,0.0,17.04,0.0,0.02,...,Clear,16.97,0.0,16.093,261.0,1.6,1.6,101715.0,1.0,297.0
9,15681.0,16032.0,20,2013-2014,2013-05-10 20:00:00,2013-05-11 01:00:00,0.0,13.28,0.0,1.0,...,Overcast,13.74,0.0,14.903,214.0,3.91,1.79,101715.0,1.0,325.0


In [42]:
# Filter the top 1 peak in each season
mask_top1 = peaks_df['rankings_per_season']==1.0
top1_df = peaks_df[mask_top1]
top1_df

Unnamed: 0,adjusted_demand_MW,demand_MW,hour_ending_eastern,season,ts,timestamp_utc,altitude_Toronto,apparentTemperature_Toronto,azimuth_Toronto,cloudCover_Toronto,...,summary_Milton,temperature_Milton,uvIndex_Milton,visibility_Milton,windBearing_Milton,windGust_Milton,windSpeed_Milton,population_Milton,rankings_per_day,rankings_per_season
77,24141.0,24794.0,17,2013-2014,2013-07-17 17:00:00,2013-07-17 22:00:00,29.0,37.42,272.0,0.43,...,Partly Cloudy,32.5,2.0,16.093,283.0,6.14,3.26,101715.0,1.0,1.0
616,21119.0,21595.0,19,2014-2015,2015-01-07 19:00:00,2015-01-08 00:00:00,0.0,-20.87,0.0,0.01,...,Clear,-18.32,0.0,16.093,272.0,2.53,1.92,101715.0,1.0,1.0
819,22016.0,22516.0,17,2015-2016,2015-07-28 17:00:00,2015-07-28 22:00:00,28.0,30.39,270.0,0.05,...,Partly Cloudy,30.38,2.0,16.093,264.0,2.12,1.89,101715.0,1.0,1.0
1197,22637.0,23100.0,18,2016-2017,2016-08-10 18:00:00,2016-08-10 23:00:00,14.0,34.03,277.0,0.3,...,Clear,33.5,0.0,16.093,268.0,2.35,1.62,101715.0,1.0,1.0
1608,21170.0,21786.0,17,2017-2018,2017-09-25 17:00:00,2017-09-25 22:00:00,12.0,29.22,257.0,0.56,...,Partly Cloudy,27.6,0.0,16.093,141.0,2.48,2.0,101715.0,1.0,1.0
1953,22551.0,23039.0,17,2018-2019,2018-09-05 17:00:00,2018-09-05 22:00:00,18.0,32.57,261.0,0.03,...,Humid,30.19,1.0,16.093,231.0,11.05,5.97,101715.0,1.0,1.0
2074,21275.0,21716.0,17,2019-2020,2019-07-05 17:00:00,2019-07-05 22:00:00,30.0,31.51,274.0,0.25,...,Possible Light Rain and Humid,29.58,2.0,16.093,185.0,4.05,2.87,101715.0,1.0,1.0


In [43]:
# Filter the top 5 peak in each season
mask_top5 = peaks_df['rankings_per_season']<=5.0
top5_df = peaks_df[mask_top5]
top5_df.head(10)

Unnamed: 0,adjusted_demand_MW,demand_MW,hour_ending_eastern,season,ts,timestamp_utc,altitude_Toronto,apparentTemperature_Toronto,azimuth_Toronto,cloudCover_Toronto,...,summary_Milton,temperature_Milton,uvIndex_Milton,visibility_Milton,windBearing_Milton,windGust_Milton,windSpeed_Milton,population_Milton,rankings_per_day,rankings_per_season
75,23107.0,23557.0,17,2013-2014,2013-07-15 17:00:00,2013-07-15 22:00:00,29.0,31.08,272.0,0.01,...,Clear,31.12,2.0,16.093,310.0,2.32,1.42,101715.0,1.0,5.0
76,23551.0,24224.0,17,2013-2014,2013-07-16 17:00:00,2013-07-16 22:00:00,29.0,32.59,272.0,0.02,...,Clear,31.77,2.0,16.093,277.0,2.51,2.35,101715.0,1.0,2.0
77,24141.0,24794.0,17,2013-2014,2013-07-17 17:00:00,2013-07-17 22:00:00,29.0,37.42,272.0,0.43,...,Partly Cloudy,32.5,2.0,16.093,283.0,6.14,3.26,101715.0,1.0,1.0
78,23504.0,24099.0,17,2013-2014,2013-07-18 17:00:00,2013-07-18 22:00:00,29.0,36.15,272.0,0.74,...,Partly Cloudy,32.63,2.0,16.093,271.0,6.41,2.97,101715.0,1.0,3.0
79,23234.0,23875.0,14,2013-2014,2013-07-19 14:00:00,2013-07-19 19:00:00,60.0,37.62,229.0,0.75,...,Partly Cloudy,32.91,6.0,16.093,238.0,9.6,5.63,101715.0,1.0,4.0
482,20967.0,21363.0,17,2014-2015,2014-08-26 17:00:00,2014-08-26 22:00:00,21.0,30.96,264.0,0.75,...,Mostly Cloudy,28.33,1.0,16.093,230.0,6.09,3.01,101715.0,1.0,3.0
492,20831.0,21123.0,17,2014-2015,2014-09-05 17:00:00,2014-09-05 22:00:00,18.0,32.31,261.0,0.4,...,Humid,28.84,1.0,16.093,207.0,7.96,4.42,101715.0,1.0,5.0
616,21119.0,21595.0,19,2014-2015,2015-01-07 19:00:00,2015-01-08 00:00:00,0.0,-20.87,0.0,0.01,...,Clear,-18.32,0.0,16.093,272.0,2.53,1.92,101715.0,1.0,1.0
659,20976.0,21494.0,20,2014-2015,2015-02-19 20:00:00,2015-02-20 01:00:00,0.0,-26.12,0.0,0.01,...,Clear,-21.03,0.0,14.705,291.0,5.16,2.84,101715.0,1.0,2.0
663,20862.0,21107.0,20,2014-2015,2015-02-23 20:00:00,2015-02-24 01:00:00,0.0,-24.46,0.0,0.01,...,Clear,-19.87,0.0,16.093,248.0,4.75,2.13,101715.0,1.0,4.0


In [44]:
# Filter the top 10 peak in each season
mask_top10 = peaks_df['rankings_per_season']<=10.0
top10_df = peaks_df[mask_top10]
top10_df.head(10)

Unnamed: 0,adjusted_demand_MW,demand_MW,hour_ending_eastern,season,ts,timestamp_utc,altitude_Toronto,apparentTemperature_Toronto,azimuth_Toronto,cloudCover_Toronto,...,summary_Milton,temperature_Milton,uvIndex_Milton,visibility_Milton,windBearing_Milton,windGust_Milton,windSpeed_Milton,population_Milton,rankings_per_day,rankings_per_season
54,22055.0,22662.0,17,2013-2014,2013-06-24 17:00:00,2013-06-24 22:00:00,30.0,32.1,275.0,0.56,...,Partly Cloudy,29.81,2.0,16.093,246.0,6.0,3.13,101715.0,1.0,10.0
75,23107.0,23557.0,17,2013-2014,2013-07-15 17:00:00,2013-07-15 22:00:00,29.0,31.08,272.0,0.01,...,Clear,31.12,2.0,16.093,310.0,2.32,1.42,101715.0,1.0,5.0
76,23551.0,24224.0,17,2013-2014,2013-07-16 17:00:00,2013-07-16 22:00:00,29.0,32.59,272.0,0.02,...,Clear,31.77,2.0,16.093,277.0,2.51,2.35,101715.0,1.0,2.0
77,24141.0,24794.0,17,2013-2014,2013-07-17 17:00:00,2013-07-17 22:00:00,29.0,37.42,272.0,0.43,...,Partly Cloudy,32.5,2.0,16.093,283.0,6.14,3.26,101715.0,1.0,1.0
78,23504.0,24099.0,17,2013-2014,2013-07-18 17:00:00,2013-07-18 22:00:00,29.0,36.15,272.0,0.74,...,Partly Cloudy,32.63,2.0,16.093,271.0,6.41,2.97,101715.0,1.0,3.0
79,23234.0,23875.0,14,2013-2014,2013-07-19 14:00:00,2013-07-19 19:00:00,60.0,37.62,229.0,0.75,...,Partly Cloudy,32.91,6.0,16.093,238.0,9.6,5.63,101715.0,1.0,4.0
120,22279.0,22833.0,16,2013-2014,2013-08-29 16:00:00,2013-08-29 21:00:00,31.0,30.02,251.0,0.27,...,Clear,28.85,2.0,16.093,87.0,2.24,1.33,101715.0,1.0,6.0
133,22092.0,22669.0,15,2013-2014,2013-09-11 15:00:00,2013-09-11 20:00:00,36.0,33.54,235.0,0.6,...,Mostly Cloudy,29.06,3.0,16.093,248.0,6.45,2.95,101715.0,1.0,9.0
251,22182.0,22774.0,19,2013-2014,2014-01-07 19:00:00,2014-01-08 00:00:00,0.0,-25.55,0.0,0.7,...,Mostly Cloudy,-17.97,0.0,16.093,238.0,10.13,5.46,101715.0,1.0,7.0
265,22127.0,22737.0,19,2013-2014,2014-01-22 19:00:00,2014-01-23 00:00:00,0.0,-13.22,0.0,0.46,...,Mostly Cloudy,-13.73,0.0,12.072,218.0,0.9,0.9,101715.0,1.0,8.0


In [45]:
# Filter the top 20 peak in each season
mask_top20 = peaks_df['rankings_per_season']<=20.0
top20_df = peaks_df[mask_top20]
top20_df.head(10)

Unnamed: 0,adjusted_demand_MW,demand_MW,hour_ending_eastern,season,ts,timestamp_utc,altitude_Toronto,apparentTemperature_Toronto,azimuth_Toronto,cloudCover_Toronto,...,summary_Milton,temperature_Milton,uvIndex_Milton,visibility_Milton,windBearing_Milton,windGust_Milton,windSpeed_Milton,population_Milton,rankings_per_day,rankings_per_season
54,22055.0,22662.0,17,2013-2014,2013-06-24 17:00:00,2013-06-24 22:00:00,30.0,32.1,275.0,0.56,...,Partly Cloudy,29.81,2.0,16.093,246.0,6.0,3.13,101715.0,1.0,10.0
56,21660.0,22275.0,17,2013-2014,2013-06-26 17:00:00,2013-06-26 22:00:00,30.0,26.41,275.0,0.67,...,Partly Cloudy,26.93,2.0,16.093,219.0,3.58,2.15,101715.0,1.0,19.0
75,23107.0,23557.0,17,2013-2014,2013-07-15 17:00:00,2013-07-15 22:00:00,29.0,31.08,272.0,0.01,...,Clear,31.12,2.0,16.093,310.0,2.32,1.42,101715.0,1.0,5.0
76,23551.0,24224.0,17,2013-2014,2013-07-16 17:00:00,2013-07-16 22:00:00,29.0,32.59,272.0,0.02,...,Clear,31.77,2.0,16.093,277.0,2.51,2.35,101715.0,1.0,2.0
77,24141.0,24794.0,17,2013-2014,2013-07-17 17:00:00,2013-07-17 22:00:00,29.0,37.42,272.0,0.43,...,Partly Cloudy,32.5,2.0,16.093,283.0,6.14,3.26,101715.0,1.0,1.0
78,23504.0,24099.0,17,2013-2014,2013-07-18 17:00:00,2013-07-18 22:00:00,29.0,36.15,272.0,0.74,...,Partly Cloudy,32.63,2.0,16.093,271.0,6.41,2.97,101715.0,1.0,3.0
79,23234.0,23875.0,14,2013-2014,2013-07-19 14:00:00,2013-07-19 19:00:00,60.0,37.62,229.0,0.75,...,Partly Cloudy,32.91,6.0,16.093,238.0,9.6,5.63,101715.0,1.0,4.0
112,21696.0,22179.0,17,2013-2014,2013-08-21 17:00:00,2013-08-21 22:00:00,23.0,27.97,265.0,0.6,...,Mostly Cloudy,27.84,1.0,16.093,209.0,5.97,3.63,101715.0,1.0,18.0
119,21814.0,22563.0,17,2013-2014,2013-08-28 17:00:00,2013-08-28 22:00:00,21.0,26.16,263.0,0.03,...,Clear,25.04,1.0,16.093,98.0,2.33,2.01,101715.0,1.0,16.0
120,22279.0,22833.0,16,2013-2014,2013-08-29 16:00:00,2013-08-29 21:00:00,31.0,30.02,251.0,0.27,...,Clear,28.85,2.0,16.093,87.0,2.24,1.33,101715.0,1.0,6.0


In [46]:
# Summarize all results in a dataframe
top1_results = top1_df[['adjusted_demand_MW', 'demand_MW', 'season','ts','rankings_per_day','rankings_per_season']]
top1_results
# Append forecasts for each of the peak
forecast2_results = forecast2_df[['ts_future','forecast']]
forecast2_results = forecast2_results.rename(columns={'ts_future':'ts'})
forecast2_results
top1_results = pd.merge(top1_results, forecast2_results, on='ts')
top1_results

Unnamed: 0,adjusted_demand_MW,demand_MW,season,ts,rankings_per_day,rankings_per_season,forecast
0,22637.0,23100.0,2016-2017,2016-08-10 18:00:00,1.0,1.0,0.278592
1,21170.0,21786.0,2017-2018,2017-09-25 17:00:00,1.0,1.0,0.770833
2,22551.0,23039.0,2018-2019,2018-09-05 17:00:00,1.0,1.0,0.686901
3,21275.0,21716.0,2019-2020,2019-07-05 17:00:00,1.0,1.0,0.662037


In [47]:
# Summarize all results in a dataframe
top5_results = top5_df[['adjusted_demand_MW', 'demand_MW', 'season','ts','rankings_per_day','rankings_per_season']]
# Append forecasts for each of the peak
forecast2_results = forecast2_df[['ts_future','forecast']]
forecast2_results = forecast2_results.rename(columns={'ts_future':'ts'})
top5_results = pd.merge(top5_results, forecast2_results, on='ts')
top5_results.head(10)

Unnamed: 0,adjusted_demand_MW,demand_MW,season,ts,rankings_per_day,rankings_per_season,forecast
0,22188.0,22659.0,2016-2017,2016-07-13 18:00:00,1.0,4.0,0.253472
1,22637.0,23100.0,2016-2017,2016-08-10 18:00:00,1.0,1.0,0.278592
2,22318.0,22812.0,2016-2017,2016-08-11 17:00:00,1.0,3.0,0.767677
3,21904.0,22402.0,2016-2017,2016-08-12 17:00:00,1.0,5.0,0.838235
4,22527.0,23213.0,2016-2017,2016-09-07 17:00:00,1.0,2.0,0.738095
5,20702.0,21168.0,2017-2018,2017-06-12 17:00:00,1.0,3.0,0.536
6,20122.0,20536.0,2017-2018,2017-07-19 18:00:00,1.0,5.0,0.258278
7,21170.0,21786.0,2017-2018,2017-09-25 17:00:00,1.0,1.0,0.770833
8,21039.0,21542.0,2017-2018,2017-09-26 17:00:00,1.0,2.0,0.872832
9,20238.0,20906.0,2017-2018,2018-01-05 18:00:00,1.0,4.0,0.415094


In [48]:
# Summarize all results in a dataframe
top10_results = top10_df[['adjusted_demand_MW', 'demand_MW', 'season','ts','rankings_per_day','rankings_per_season']]
# Append forecasts for each of the peak
forecast2_results = forecast2_df[['ts_future','forecast']]
forecast2_results = forecast2_results.rename(columns={'ts_future':'ts'})
top10_results = pd.merge(top10_results, forecast2_results, on='ts')
top10_results.head(10)

Unnamed: 0,adjusted_demand_MW,demand_MW,season,ts,rankings_per_day,rankings_per_season,forecast
0,22188.0,22659.0,2016-2017,2016-07-13 18:00:00,1.0,4.0,0.253472
1,21665.0,22024.0,2016-2017,2016-07-22 17:00:00,1.0,7.0,0.775641
2,21868.0,22312.0,2016-2017,2016-08-04 17:00:00,1.0,6.0,0.753247
3,21427.0,21900.0,2016-2017,2016-08-05 12:00:00,1.0,9.0,0.0
4,22637.0,23100.0,2016-2017,2016-08-10 18:00:00,1.0,1.0,0.278592
5,22318.0,22812.0,2016-2017,2016-08-11 17:00:00,1.0,3.0,0.767677
6,21904.0,22402.0,2016-2017,2016-08-12 17:00:00,1.0,5.0,0.838235
7,21641.0,22150.0,2016-2017,2016-09-06 17:00:00,1.0,8.0,0.640288
8,22527.0,23213.0,2016-2017,2016-09-07 17:00:00,1.0,2.0,0.738095
9,21324.0,21817.0,2016-2017,2016-09-08 17:00:00,1.0,10.0,0.774074


In [49]:
# Summarize all results in a dataframe
top20_results = top20_df[['adjusted_demand_MW', 'demand_MW', 'season','ts','rankings_per_day','rankings_per_season']]
# Append forecasts for each of the peak
forecast2_results = forecast2_df[['ts_future','forecast']]
forecast2_results = forecast2_results.rename(columns={'ts_future':'ts'})
top20_results = pd.merge(top20_results, forecast2_results, on='ts')
top20_results

Unnamed: 0,adjusted_demand_MW,demand_MW,season,ts,rankings_per_day,rankings_per_season,forecast
0,21290.0,21505.0,2016-2017,2016-06-20 17:00:00,1.0,12.5,0.766447
1,21232.0,21582.0,2016-2017,2016-06-27 17:00:00,1.0,14.0,0.776119
2,21055.0,21382.0,2016-2017,2016-07-12 18:00:00,1.0,18.0,0.216667
3,22188.0,22659.0,2016-2017,2016-07-13 18:00:00,1.0,4.0,0.253472
4,21665.0,22024.0,2016-2017,2016-07-22 17:00:00,1.0,7.0,0.775641
...,...,...,...,...,...,...,...
65,21147.0,21480.0,2019-2020,2019-07-20 17:00:00,1.0,2.0,0.655022
66,21068.0,21791.0,2019-2020,2019-07-29 17:00:00,1.0,3.0,0.809701
67,20536.0,21000.0,2019-2020,2019-08-13 17:00:00,1.0,9.0,0.827160
68,20533.0,21145.0,2019-2020,2019-08-20 17:00:00,1.0,10.0,0.773414


In [50]:
# Calculate hit rate for each season
top1_successes = top1_results.groupby('season').forecast.apply(lambda x: (x > 0).sum()).rename('successes')
top1_successes

season
2016-2017    1
2017-2018    1
2018-2019    1
2019-2020    1
Name: successes, dtype: int64

In [51]:
top1_hit_rate = top1_successes.apply(lambda x : x/1.).rename('hit rate')
top1_hit_rate

season
2016-2017    1.0
2017-2018    1.0
2018-2019    1.0
2019-2020    1.0
Name: hit rate, dtype: float64

In [52]:
top5_successes = top5_results.groupby('season').forecast.apply(lambda x: (x > 0).sum()).rename('successes')
top5_successes

season
2016-2017    5
2017-2018    5
2018-2019    4
2019-2020    4
Name: successes, dtype: int64

In [53]:
top5_hit_rate = top5_successes.apply(lambda x : x/5.).rename('hit rate')
top5_hit_rate

season
2016-2017    1.0
2017-2018    1.0
2018-2019    0.8
2019-2020    0.8
Name: hit rate, dtype: float64

In [54]:
top10_successes = top10_results.groupby('season').forecast.apply(lambda x: (x > 0).sum()).rename('successes')
top10_successes

season
2016-2017     9
2017-2018    10
2018-2019     7
2019-2020     9
Name: successes, dtype: int64

In [55]:
top10_hit_rate = top10_successes.apply(lambda x : x/10.).rename('hit rate')
top10_hit_rate

season
2016-2017    0.9
2017-2018    1.0
2018-2019    0.7
2019-2020    0.9
Name: hit rate, dtype: float64

In [56]:
top20_successes = top20_results.groupby('season').forecast.apply(lambda x: (x > 0).sum()).rename('successes')
top20_successes

season
2016-2017    17
2017-2018    19
2018-2019    14
2019-2020     9
Name: successes, dtype: int64

In [57]:
top20_hit_rate = top20_successes.apply(lambda x : x/20.).rename('hit rate')
top20_hit_rate

season
2016-2017    0.85
2017-2018    0.95
2018-2019    0.70
2019-2020    0.45
Name: hit rate, dtype: float64