In [9]:
#Facebook's Prophet library already included in Colab
#!pip install fbprophet
#Install World Weather Online library and import other dependencies:
#!pip install wwo-hist
import pandas as pd
import numpy as np
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
from datetime import datetime, timedelta
import requests
from wwo_hist import retrieve_hist_data
from datetime import date
import pytz
import pickle
%matplotlib inline

In [10]:
pkl_path = "jgh-prophet-19-12-22.pkl"

# read the Prophet model object
with open(pkl_path, 'rb') as f:
    m = pickle.load(f)

In [11]:
m

<fbprophet.forecaster.Prophet at 0x120e06090>

In [21]:
import wwofuture

In [24]:
frequency = 24
api_key = '3d51d04f983a478e90f164916191012'
location_list = ['Montreal']
retrieve_future_data(api_key,location_list,frequency,location_label = False, export_csv = True, store_df = False)



Retrieving weather data for Montreal


Time elapsed (hh:mm:ss.ms) 0:00:00.577126


export Montreal completed!




[]

In [37]:
import urllib
import urllib.parse
import json
import pandas as pd
from datetime import datetime

##################################
# function to unnest json for each month
def extract_json_weather_data(data):
    num_days = len(data)
    print(num_days)
    # initialize df_month to store return data
    df_forecast = pd.DataFrame()
    for i in range(num_days):
        # extract this day
        d = data[i]
        # astronomy data is the same for the whole day
        astr_df = pd.DataFrame(d['astronomy'])
        # hourly data; temperature for each hour of the day
        hourly_df = pd.DataFrame(d['hourly'])
        # this wanted_key will be duplicated and use 'ffill' to fill up the NAs
        wanted_keys = ['date', 'maxtempC', 'mintempC', 'totalSnow_cm', 'sunHour', 'uvIndex'] # The keys you want
        subset_d = dict((k, d[k]) for k in wanted_keys if k in d)
        this_df = pd.DataFrame(subset_d,index=[0])        
        df = pd.concat([this_df.reset_index(drop=True), astr_df], axis=1)
        # concat selected astonomy columns with hourly data
        df = pd.concat([df,hourly_df], axis=1)
        df = df.fillna(method='ffill')
        # make date_time columm to proper format
        # fill leading zero for hours to 4 digits (0000-2400 hr)
        df['time'] = df['time'].apply(lambda x: x.zfill(4))
        # keep only first 2 digit (00-24 hr) 
        df['time'] = df['time'].str[:2]
        # convert to pandas datetime
        df['ds'] = pd.to_datetime(df['date'] + ' ' + df['time'])
        # keep only interested columns
        col_to_keep = ['ds', 'maxtempC', 'mintempC', 'totalSnow_cm', 'sunHour', 'uvIndex', 
               'moon_illumination', 
               'DewPointC',  'FeelsLikeC', 'HeatIndexC', 'WindChillC', 'WindGustKmph',
               'cloudcover', 'humidity', 'precipMM', 'pressure', 'tempC', 'visibility',
               'winddirDegree', 'windspeedKmph']
        df = df[col_to_keep]
        df_forecast = pd.concat([df_forecast,df])
    #return(df_month)
    return(df_forecast)

##################################
#function to retrive data by date range and location
#default frequency = 1 hr
#each month costs 1 request (free trial 500 requests/key, as of 30-May-2019)
def retrieve_weather_data(api_key,location,frequency):
    
    #start_time = datetime.now()
    
    # create list of months, convert to month begins (first day of each month)
    #list_mon_begin= pd.date_range(start_date,end_date, freq='1M')-pd.offsets.MonthBegin(1)
    # convert to Series and append first day of the last month
    #list_mon_begin = pd.concat([pd.Series(list_mon_begin), pd.Series(pd.to_datetime(end_date,infer_datetime_format=True).replace(day=1))], ignore_index=True)
    # change the begin date to start_date
    #list_mon_begin[0] = pd.to_datetime(start_date,infer_datetime_format=True)
    
    # create list of months, convert to month ends (last day of each month)
    #list_mon_end = pd.date_range(start_date,end_date, freq='1M')-pd.offsets.MonthEnd(0)
    # convert to Series and append the end_date
    #list_mon_end = pd.concat([pd.Series(list_mon_end), pd.Series(pd.to_datetime(end_date,infer_datetime_format=True))], ignore_index=True)
    
    # count number of months to be retrieved
    #total_months = len(list_mon_begin)

    # initialize df_hist to store return data
    weather_df = pd.DataFrame()
    #for m in range(total_months):
        
        #start_d =str(list_mon_begin[m])[:10]
        #end_d =str(list_mon_end[m])[:10]
        #print('Currently retrieving data for '+location+': from '+start_d+' to '+end_d)
        
    url_page = 'http://api.worldweatheronline.com/premium/v1/weather.ashx?key='+api_key+'&q='+location+'&format=json&num_of_days=15'+'&tp='+str(frequency)
    json_page = urllib.request.urlopen(url_page)
    json_data = json.loads(json_page.read().decode())
    data= json_data['data']['weather']
       # call function to extract json object
    weather_df = extract_json_weather_data(data)
    #df_hist = pd.concat([df_hist,df_this_month])
        
    #time_elapsed = datetime.now() - start_time
    #print('Time elapsed (hh:mm:ss.ms) {}'.format(time_elapsed))
    return(weather_df)

##################################
#main function to retrive the data by location list
def retrieve_future_data(api_key,location_list,frequency,location_label = False, export_csv = True, store_df = False):
    result_list = []
    for location in location_list:
        #print('\n\nRetrieving weather data for '+location+'\n\n')
        df_this_city = retrieve_weather_data(api_key,location,frequency)
        
        if (location_label == True):
        # add city name as prefix to the colnames
            df_this_city = df_this_city.add_prefix(location+'_')
            df_this_city.columns.values[0] = 'date_time'    
        
        if (export_csv == True):
            df_this_city.to_csv('./'+location+'.csv', header=True, index=False) 
            #print('\n\nexport '+location+' completed!\n\n')
        
        if (store_df == True):
        # save result as object in the work space
            result_list.append(df_this_city)
    return(result_list)
##################################

In [38]:
frequency = 24
api_key = '3d51d04f983a478e90f164916191012'
location_list = ['Montreal']
retrieve_future_data(api_key,location_list,frequency)

14


[]

In [39]:
weather_forecast = pd.read_csv('Montreal.csv')
weather_forecast

Unnamed: 0,ds,maxtempC,mintempC,totalSnow_cm,sunHour,uvIndex,uvIndex.1,moon_illumination,DewPointC,FeelsLikeC,...,WindChillC,WindGustKmph,cloudcover,humidity,precipMM,pressure,tempC,visibility,winddirDegree,windspeedKmph
0,2019-12-23,5,-1,0.0,3.3,1,1,14,0,-2,...,-2,31,78,84,0.0,1013,5,10,249,19
1,2019-12-24,-2,-4,0.0,8.7,1,1,7,-10,-6,...,-6,8,32,59,0.0,1024,-2,10,246,6
2,2019-12-25,-3,-5,0.4,3.3,1,1,0,-8,-8,...,-8,14,76,73,0.1,1021,-3,10,49,10
3,2019-12-26,-4,-6,0.0,0.0,1,1,0,-9,-10,...,-10,19,37,72,0.0,1028,-4,10,40,13
4,2019-12-27,-3,-10,0.0,0.0,1,1,0,-6,-8,...,-8,14,85,79,1.9,1025,-3,7,78,9
5,2019-12-28,3,-5,0.0,0.0,11,11,8,0,-1,...,-1,10,88,96,0.9,1017,3,2,216,6
6,2019-12-29,5,1,0.0,0.0,11,11,15,1,-1,...,-1,26,100,93,41.5,993,5,7,120,15
7,2019-12-30,-2,-7,0.0,0.0,11,11,22,-7,-9,...,-9,32,78,70,0.3,992,-2,9,269,25
8,2019-12-31,-5,-10,0.0,0.0,11,11,26,-11,-13,...,-13,19,60,75,0.8,1006,-5,8,237,14
9,2020-01-01,0,-4,0.0,0.0,11,11,37,-4,-5,...,-5,14,97,89,2.3,1006,0,4,154,9
