In [1]:
import urllib
import json
import pandas as pd
import numpy as np
import warnings
import pickle
from datetime import datetime
from datetime import timedelta
from keys import client_id, client_secret, app_id
warnings.filterwarnings('ignore')

In [148]:
DIAMOND_PRINCESS_COORD = (35.4437, 139.638)
BARBADOS_BELIZE_COORD = (13.1939, -59.5432)
CONGO_BRAZZAVILLE_KINSHASA_COORD = (-4.0383, 21.7587)
RADIUS = 350
LOG_PATH = 'weather_logs/'
AUGMENTED_PATH = '../augmented_datasets/'

###### Load datasets
Notes on data: 
1. Notice that some of the dates on the hopkins dataset appear in different format in ecxel, they are infect all in the same format: %-m/%-d/20
2. Column names are modifyed to %-m/%-d/20 to fit weather API queries
3. All three rows reffering to 'Diamnond princess' have been removed
4. The location (0,0) has been removed
5. Two places with the same coordinate have been slightly modified to accomedate indexing
4. source: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series

In [168]:
confirmed_time_data = pd.read_csv('../original_datasets/hopkins_confirmed_time_series0327.csv')

confirmed_time_data = confirmed_time_data.rename({'Country/Region': 'Country_Region',\
                                                  'Province/State': 'Province_State'}, axis=1)

confirmed_time_data.drop(confirmed_time_data[confirmed_time_data['Lat']\
                                             == DIAMOND_PRINCESS_COORD[0]].index, inplace=True)

confirmed_time_data.loc[confirmed_time_data['Country_Region']=='Barbados', 'Lat']\
                        = BARBADOS_BELIZE_COORD[0] + 0.00001

confirmed_time_data.loc[confirmed_time_data['Country_Region']=='Congo (Brazzaville)', 'Lat']\
                        = CONGO_BRAZZAVILLE_KINSHASA_COORD[0] + 0.00001

confirmed_time_data.drop(confirmed_time_data.loc[confirmed_time_data['Lat'] == 0].index, inplace=True)

# death_time_data = pd.read_csv('../original_datasets/hopkins_death_time_series0323.csv')
# death_time_data = confirmed_time_data.rename({'Country/Region': 'Country_Region',\
#                                               'Province/State': 'Province_State'}, axis=1)
# line_data = pd.read_csv('COVID19_open_line_list.csv')
# line_data = line_data.rename({'latitude': 'Lat', 'longitude': 'Long',\
#                               'country': 'Country_Region', 'province': 'Province_State'}, axis=1)
# confirmed_time_data.drop(NOISE_COORD, level=0, inplace=True)

###### Setup multi-index

In [381]:
coords = [x for x in zip(confirmed_time_data.pop('Lat'), confirmed_time_data.pop('Long'))]
confirmed_time_data.index = coords

for param in ['avg_m_wind', 'avg_m_precip', 'avg_m_RH', 'avg_m_tmp',]:
    confirmed_time_data.insert(2, param, np.nan)
confirmed_time_data.insert(6, 'weather', '')

columns = len(confirmed_time_data.columns)

for index in confirmed_time_data.index:
    for param in ['avg_d_tmp', 'avg_d_RH', 'avg_d_wind', 'avg_d_precip']:
        confirmed_time_data = confirmed_time_data.append\
                (pd.Series([np.nan]*6+[param]+[np.nan]*(columns-7), index=confirmed_time_data.columns, name=index))

confirmed_time_data.set_index('weather', append=True, inplace=True)
confirmed_time_data = confirmed_time_data.sort_index()
dates = {date: date + '20' for date in confirmed_time_data.columns[6:]}
confirmed_time_data = confirmed_time_data.rename(columns=dates)

###### Query remote database

In [382]:
days = confirmed_time_data.columns[6:].tolist()
q = 0
log = open('{0}log{1}.txt'.format(LOG_PATH, datetime.now().strftime('%d%m%Y')), 'w')

while days:
    start_time = days[0]
    end_time = days[:30][-1]
    days = days[30:]
    for coord in coords:
        q += 1
        lat = coord[0]
        long = coord[1]
        country = confirmed_time_data.loc[coord]['Country_Region'].values[0]
        province = confirmed_time_data.loc[coord]['Province_State'].values[0]
        url = 'https://api.aerisapi.com/observations/summary/closest?p={0},{1}&from={2}&to={3}&radius={4}miles&plimit=31&limit=1&'\
              'client_id={5}&client_secret={6}'.format(lat, long, start_time, end_time, RADIUS, client_id, client_secret)
        msg = 'collecting for {0} location {1}, {2} from {3} to {4}\n{5}'\
        .format(q, province, country, start_time, end_time, url)
        _ = log.write('\n' + msg)
        print(msg)
        request = urllib.request.urlopen(url)
        response = request.read()
        json_ = json.loads(response)
        if json_['success']: 
            _ = log.write('\nsuccess')
            print('success')
            try:
                for day in json_['response'][0]['periods']:
                    date = datetime.strptime(str(day['summary']['ymd']), '%Y%m%d').strftime("%-m/%-d/%Y")
                    confirmed_time_data.loc[coord].at['avg_d_tmp', date] = day['summary']['temp']['avgC']
                    confirmed_time_data.loc[coord].at['avg_d_RH', date] = day['summary']['rh']['avg']
                    confirmed_time_data.loc[coord].at['avg_d_wind', date] = day['summary']['wind']['avgKPH']
                    confirmed_time_data.loc[coord].at['avg_d_precip', date] = day['summary']['precip']['totalMM']
            except IndexError:
                if json_['error']:
                    msg = 'No data found for {0}, {1} at {2}\n{3}\r\n'.format(country, province, date, json_['error'])
                    _ = log.write(msg)
                    print(msg)
            except Exception:
                msg = 'Unknowen error\n{0}\r\n'.format(json_['error'])
                _ = log.write(msg)
                print(msg)
            finally:
                request.close()
log.close()

collecting for 1 location nan, Afghanistan from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=33.0,65.0&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 2 location nan, Albania from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=41.1533,20.1683&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 3 location nan, Algeria from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=28.0339,1.6596&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 4 location nan, Andorra from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/cl

success
collecting for 28 location nan, Bosnia and Herzegovina from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=43.9159,17.6791&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 29 location nan, Brazil from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=-14.235,-51.9253&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 30 location nan, Brunei from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=4.5353,114.7277&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 31 location nan, Bulgaria from 1/22/2020 to 2/20/2020
https://api.aerisa

success
collecting for 55 location Guangdong, China from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=23.3417,113.4244&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 56 location Guangxi, China from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=23.8298,108.7881&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 57 location Guizhou, China from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=26.8154,106.8748&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 58 location Hainan, China from 1/22/2020 to 2/20/2020
https://api.aerisapi.

success
collecting for 82 location Zhejiang, China from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=29.1832,120.0934&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 83 location nan, Colombia from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=4.5709,-74.2973&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 84 location nan, Congo (Brazzaville) from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=-4.03829,21.7587&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 85 location nan, Congo (Kinshasa) from 1/22/2020 to 2/20/2020
https:

success
collecting for 109 location Guadeloupe, France from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=16.25,-61.5833&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 110 location Mayotte, France from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=-12.8275,45.1662&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 111 location New Caledonia, France from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=-20.9043,165.618&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 112 location Reunion, France from 1/22/2020 to 2/20/2020
https:/

success
collecting for 137 location nan, Italy from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=43.0,12.0&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 138 location nan, Jamaica from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=18.1096,-77.2975&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 139 location nan, Japan from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=36.0,138.0&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 140 location nan, Jordan from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summar

success
collecting for 165 location nan, Nepal from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=28.1667,84.25&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 166 location Aruba, Netherlands from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=12.5186,-70.0358&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 167 location Curacao, Netherlands from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=12.1696,-68.99&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 168 location Sint Maarten, Netherlands from 1/22/2020 to 2/20/2020
https:

success
collecting for 193 location nan, Senegal from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=14.4974,-14.4524&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 194 location nan, Serbia from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=44.0165,21.0059&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 195 location nan, Seychelles from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=-4.6796,55.492&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 196 location nan, Singapore from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/

success
collecting for 221 location Isle of Man, United Kingdom from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=54.2361,-4.5481&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 222 location Montserrat, United Kingdom from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=16.7425,-62.1874&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 223 location nan, United Kingdom from 1/22/2020 to 2/20/2020
https://api.aerisapi.com/observations/summary/closest?p=55.3781,-3.4360000000000004&from=1/22/2020&to=2/20/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 224 location nan, Uruguay from 1/2

success
collecting for 248 location nan, Albania from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=41.1533,20.1683&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 249 location nan, Algeria from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=28.0339,1.6596&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 250 location nan, Andorra from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=42.5063,1.5218&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 251 location nan, Angola from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observa

success
collecting for 275 location nan, Brazil from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-14.235,-51.9253&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 276 location nan, Brunei from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=4.5353,114.7277&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 277 location nan, Bulgaria from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=42.7339,25.4858&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 278 location nan, Burkina Faso from 2/21/2020 to 3/21/2020
https://api.aerisapi.com

success
collecting for 302 location Guangxi, China from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=23.8298,108.7881&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 303 location Guizhou, China from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=26.8154,106.8748&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 304 location Hainan, China from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=19.1959,109.7453&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 305 location Hebei, China from 2/21/2020 to 3/21/2020
https://api.aerisapi.

success
collecting for 329 location nan, Colombia from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=4.5709,-74.2973&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 330 location nan, Congo (Brazzaville) from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-4.03829,21.7587&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 331 location nan, Congo (Kinshasa) from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-4.0383,21.7587&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 332 location nan, Costa Rica from 2/21/2020 to 3/21/2020
htt

success
collecting for 356 location Mayotte, France from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-12.8275,45.1662&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 357 location New Caledonia, France from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-20.9043,165.618&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 358 location Reunion, France from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-21.1351,55.2471&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 359 location Saint Barthelemy, France from 2/21/2020 to 3/21/2020

success
collecting for 384 location nan, Jamaica from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=18.1096,-77.2975&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 385 location nan, Japan from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=36.0,138.0&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 386 location nan, Jordan from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=31.24,36.51&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 387 location nan, Kazakhstan from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations

success
collecting for 412 location Aruba, Netherlands from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=12.5186,-70.0358&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 413 location Curacao, Netherlands from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=12.1696,-68.99&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 414 location Sint Maarten, Netherlands from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=18.0425,-63.0548&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 415 location nan, Netherlands from 2/21/2020 to 3/21/20

success
collecting for 440 location nan, Serbia from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=44.0165,21.0059&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 441 location nan, Seychelles from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-4.6796,55.492&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 442 location nan, Singapore from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=1.2833,103.8333&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 443 location nan, Slovakia from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/

success
collecting for 468 location Montserrat, United Kingdom from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=16.7425,-62.1874&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 469 location nan, United Kingdom from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=55.3781,-3.4360000000000004&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 470 location nan, Uruguay from 2/21/2020 to 3/21/2020
https://api.aerisapi.com/observations/summary/closest?p=-32.5228,-55.7658&from=2/21/2020&to=3/21/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 471 location nan, US from 2/21/2020 to 3/21/202

success
collecting for 495 location nan, Algeria from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=28.0339,1.6596&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 496 location nan, Andorra from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=42.5063,1.5218&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 497 location nan, Angola from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=-11.2027,17.8739&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 498 location nan, Antigua and Barbuda from 3/22/2020 to 3/27/2020
https://api.aerisap

success
collecting for 522 location nan, Brunei from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=4.5353,114.7277&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 523 location nan, Bulgaria from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=42.7339,25.4858&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 524 location nan, Burkina Faso from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=12.2383,-1.5616&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 525 location nan, Cabo Verde from 3/22/2020 to 3/27/2020
https://api.aerisapi.

success
collecting for 549 location Guizhou, China from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=26.8154,106.8748&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 550 location Hainan, China from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=19.1959,109.7453&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 551 location Hebei, China from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=39.549,116.1306&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 552 location Heilongjiang, China from 3/22/2020 to 3/27/2020
https://api.aeris

success
collecting for 576 location nan, Congo (Brazzaville) from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=-4.03829,21.7587&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 577 location nan, Congo (Kinshasa) from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=-4.0383,21.7587&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 578 location nan, Costa Rica from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=9.7489,-83.7534&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 579 location nan, Cote d'Ivoire from 3/22/2020 to 3/27/202

success
collecting for 604 location Reunion, France from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=-21.1351,55.2471&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 605 location Saint Barthelemy, France from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=17.9,-62.8333&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 606 location St Martin, France from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=18.0708,-63.0501&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 607 location Martinique, France from 3/22/2020 to 3/27/2020
htt

success
collecting for 632 location nan, Jordan from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=31.24,36.51&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 633 location nan, Kazakhstan from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=48.0196,66.9237&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 634 location nan, Kenya from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=-0.0236,37.9062&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 635 location nan, Korea, South from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/obs

success
collecting for 660 location Sint Maarten, Netherlands from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=18.0425,-63.0548&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 661 location nan, Netherlands from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=52.1326,5.2913&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 662 location nan, New Zealand from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=-40.9006,174.886&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 663 location nan, Nicaragua from 3/22/2020 to 3/27/2020
https

success
collecting for 688 location nan, Singapore from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=1.2833,103.8333&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 689 location nan, Slovakia from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=48.669,19.699&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 690 location nan, Slovenia from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=46.1512,14.9955&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 691 location nan, Somalia from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/ob

success
collecting for 715 location nan, United Kingdom from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=55.3781,-3.4360000000000004&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 716 location nan, Uruguay from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=-32.5228,-55.7658&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 717 location nan, US from 3/22/2020 to 3/27/2020
https://api.aerisapi.com/observations/summary/closest?p=37.0902,-95.7129&from=3/22/2020&to=3/27/2020&radius=350miles&plimit=31&limit=1&client_id=tXyXXyb3Yf492X1asICz5&client_secret=WJi1enKSDGlhO8Nt4Ze00C1F6LEdMpl2O54JIwaM
success
collecting for 718 location nan, Uzbekistan from 3/22/2020 to 3/27/2020
https://a

###### Verify integrity, handle NaN and backup dataframe
1. Some coordinates are more then 350 miles away from any weather station, resulting in NaN values
2. Some stations don't save data as far back, resulting in NaN values
3. NaNs are not removed, rather when applying aggragate functions we discard then in the calculations

In [383]:
backup = confirmed_time_data.copy()
# data[['Province_State']] = data[['Province_State']].fillna(0)
# confirmed_time_data = confirmed_time_data.dropna()
# confirmed_time_data.isna().sum()
# confirmed_time_data['1/22/2020']
confirmed_time_data.isnull().sum()
# confirmed_time_data['2/1/2020'][confirmed_time_data['2/1/2020'].isnull()]

Province_State    1156
Country_Region     984
avg_m_tmp         1230
avg_m_RH          1230
avg_m_precip      1230
avg_m_wind        1230
1/22/2020           62
1/23/2020           62
1/24/2020           58
1/25/2020           58
1/26/2020           54
1/27/2020           50
1/28/2020           42
1/29/2020           46
1/30/2020           50
1/31/2020           58
2/1/2020            58
2/2/2020            62
2/3/2020            58
2/4/2020            54
2/5/2020            62
2/6/2020            66
2/7/2020            62
2/8/2020            78
2/9/2020            74
2/10/2020           86
2/11/2020           86
2/12/2020           90
2/13/2020           66
2/14/2020           70
                  ... 
2/27/2020           40
2/28/2020           44
2/29/2020           56
3/1/2020            52
3/2/2020            60
3/3/2020            60
3/4/2020            56
3/5/2020            64
3/6/2020            56
3/7/2020            52
3/8/2020            52
3/9/2020            48
3/10/2020  

###### COMPUTE AVG PARAMETERS FOR CONFIRMED CASES

In [384]:
for coord in coords:
    lat = coord[0]
    long = coord[1]
    avg_tmp = confirmed_time_data.loc[coord].loc['avg_d_tmp'].mean(skipna=True)
    avg_rh = confirmed_time_data.loc[coord].loc['avg_d_RH'].mean(skipna=True)
    avg_wind = confirmed_time_data.loc[coord].loc['avg_d_wind'].mean(skipna=True)
    avg_precip = confirmed_time_data.loc[coord].loc['avg_d_precip'].mean(skipna=True)

    confirmed_time_data.loc[coord]['avg_m_tmp'][0] = avg_tmp
    confirmed_time_data.loc[coord]['avg_m_RH'][0] = avg_rh
    confirmed_time_data.loc[coord]['avg_m_wind'][0] = avg_wind
    confirmed_time_data.loc[coord]['avg_m_precip'][0] = avg_precip

###### Data validation, NaN handling

In [593]:
confirmed_time_data['avg_m_tmp'][::5].isna().sum()
confirmed_time_data['avg_m_RH'][::5].isna().sum()
confirmed_time_data['avg_m_wind'][::5].isna().sum()
confirmed_time_data['avg_m_precip'][::5].isna().sum()

nulls = confirmed_time_data['avg_m_precip'][::5].isna().tolist()
for i in range(len(nulls)):
    if nulls[i]:
        nc = confirmed_time_data['avg_m_precip'][::5].index[i][0]
        print('Removing null at coor {0}, {1}'.format(nc, confirmed_time_data.loc[nc, 'Country_Region']))
        confirmed_time_data.drop(nc, level=0, inplace=True)
        for i in range(len(coords)):
            if coords[i] == nc:
                coords.pop(i)
                i += 1

0

0

0

0

###### Compute max cases and max date

In [516]:
backup = confirmed_time_data.copy()
confirmed_time_data.rename(index={'': 'data'}, inplace=True)
confirmed_time_data.rename_axis(['coordinate', 'information'],inplace=True)
for param in ['Max_Cases', 'GF_Q1', 'GF_Q2', 'GF_Q3'][::-1]:
    confirmed_time_data.insert(6, param, np.nan)
confirmed_time_data.insert(7, '5%_Date', '')
confirmed_time_data.insert(7, 'Max_Date', '')

ValueError: cannot insert GF_Q3, already exists

In [597]:
for coord in coords:
    try:
        max_cases = confirmed_time_data.iloc[:,12:].loc[coord].loc['data'].max()
        max_date = confirmed_time_data.iloc[:,12:].loc[coord].loc['data'].argmax()
        confirmed_time_data.loc[coord, 'Max_Cases'].loc['data'] = max_cases
        confirmed_time_data.loc[coord, 'Max_Date'].loc['data'] = max_date
    except Exception as e:
        print(e)
print('Lybia and malta were removed as they had NaN values')

'the label [35.9375] is not in the [index]'
'the label [26.3351] is not in the [index]'
Lybia and malta were removed as they had NaN values


###### Sanity checks
Manual heuristic comparison of selected samples from the dataset
We compared 5 randomly selected data entries as follows:
1. lat and lan on google map
2. Daily information with Aeris API
3. Daily information with a third party climate source - https://www.worldweatheronline.com/
4. monthly information with a third party data source (This will only be an approximation) https://www.timeanddate.com/weather/israel/tel-aviv/climate
Note that coordinates are in decimal representation

We conclude that the data is correct heuristcially, except for precipitation that shows 0 when it is infact more in many cases
Also we see that except for US states coordinates are the same between Tableau and Hopkins datasets

In [646]:
# Israel 31\35
# All 4 check except precipitation
confirmed_time_data.loc[(31,35)]
confirmed_time_data.loc[(31,35)]['3/1/2020']

# # Afghanistan
# # All check
confirmed_time_data.loc[(33,65)]
confirmed_time_data.loc[(33,65)]['3/21/2020']

# # Queensland Australia
# # All check except precipitation
confirmed_time_data.loc[(-28.0167,153.4)]
confirmed_time_data.loc[(-28.0167,153.4)]['3/2/2020']

# Fiji
# All check except precipitation
confirmed_time_data.loc[(-17.7134, 178.065)]
confirmed_time_data.loc[(-17.7134, 178.065)]['3/6/2020']

Unnamed: 0_level_0,Province_State,Country_Region,avg_m_tmp,avg_m_RH,avg_m_precip,avg_m_wind,Max_Cases,Max_Date,5%_Date,GF_Q1,...,3/18/2020,3/19/2020,3/20/2020,3/21/2020,3/22/2020,3/23/2020,3/24/2020,3/25/2020,3/26/2020,3/27/2020
information,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data,,Israel,14.721569,73.941176,0.0,12.321569,3035.0,3/27/2020,,,...,304.0,427.0,529.0,712.0,883.0,1071.0,1238.0,2369.0,2693.0,3035.0
avg_d_RH,,,,,,,,,,,...,64.0,70.0,81.0,76.0,69.0,52.0,64.0,73.0,63.0,60.0
avg_d_precip,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
avg_d_tmp,,,,,,,,,,,...,14.3,13.6,11.0,11.6,13.6,18.2,18.2,16.4,18.1,20.7
avg_d_wind,,,,,,,,,,,...,22.4,24.7,17.0,11.7,9.5,6.1,14.9,9.5,8.1,13.0


information
data            10.0
avg_d_RH        68.0
avg_d_precip     0.0
avg_d_tmp       15.3
avg_d_wind      23.0
Name: 3/1/2020, dtype: float64

Unnamed: 0_level_0,Province_State,Country_Region,avg_m_tmp,avg_m_RH,avg_m_precip,avg_m_wind,Max_Cases,Max_Date,5%_Date,GF_Q1,...,3/18/2020,3/19/2020,3/20/2020,3/21/2020,3/22/2020,3/23/2020,3/24/2020,3/25/2020,3/26/2020,3/27/2020
information,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data,,Afghanistan,11.457576,51.924242,1.462424,8.6,110.0,3/27/2020,,,...,22.0,22.0,24.0,24.0,40.0,40.0,74.0,84.0,94.0,110.0
avg_d_RH,,,,,,,,,,,...,45.0,40.0,39.0,41.0,58.0,79.0,64.0,57.0,51.0,52.0
avg_d_precip,,,,,,,,,,,...,0.0,0.0,0.0,0.0,12.7,7.62,0.0,0.0,0.0,0.0
avg_d_tmp,,,,,,,,,,,...,18.0,19.3,19.6,18.7,17.8,16.1,14.4,15.2,16.7,16.1
avg_d_wind,,,,,,,,,,,...,1.9,2.7,4.8,2.4,19.2,8.5,8.0,3.0,1.2,11.3


information
data            24.0
avg_d_RH        41.0
avg_d_precip     0.0
avg_d_tmp       18.7
avg_d_wind       2.4
Name: 3/21/2020, dtype: float64

Unnamed: 0_level_0,Province_State,Country_Region,avg_m_tmp,avg_m_RH,avg_m_precip,avg_m_wind,Max_Cases,Max_Date,5%_Date,GF_Q1,...,3/18/2020,3/19/2020,3/20/2020,3/21/2020,3/22/2020,3/23/2020,3/24/2020,3/25/2020,3/26/2020,3/27/2020
information,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data,Queensland,Australia,24.078788,78.863636,0.0,15.828788,555.0,3/27/2020,,,...,94.0,144.0,184.0,221.0,259.0,319.0,397.0,443.0,493.0,555.0
avg_d_RH,,,,,,,,,,,...,62.0,69.0,74.0,70.0,73.0,81.0,84.0,76.0,76.0,86.0
avg_d_precip,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
avg_d_tmp,,,,,,,,,,,...,22.0,21.7,22.4,24.5,25.0,23.2,20.9,21.9,22.1,20.0
avg_d_wind,,,,,,,,,,,...,20.1,10.0,13.9,16.6,13.7,17.3,11.8,10.4,11.0,16.0


information
data             9.0
avg_d_RH        78.0
avg_d_precip     0.0
avg_d_tmp       25.6
avg_d_wind      12.8
Name: 3/2/2020, dtype: float64

Unnamed: 0_level_0,Province_State,Country_Region,avg_m_tmp,avg_m_RH,avg_m_precip,avg_m_wind,Max_Cases,Max_Date,5%_Date,GF_Q1,...,3/18/2020,3/19/2020,3/20/2020,3/21/2020,3/22/2020,3/23/2020,3/24/2020,3/25/2020,3/26/2020,3/27/2020
information,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
data,,Fiji,27.039394,84.954545,0.0,6.854545,5.0,3/25/2020,,,...,0.0,1.0,1.0,1.0,2.0,3.0,4.0,5.0,5.0,5.0
avg_d_RH,,,,,,,,,,,...,92.0,97.0,94.0,91.0,89.0,91.0,87.0,86.0,85.0,87.0
avg_d_precip,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
avg_d_tmp,,,,,,,,,,,...,26.2,25.5,25.8,25.6,25.8,25.8,26.5,26.7,26.9,26.8
avg_d_wind,,,,,,,,,,,...,0.8,7.0,4.7,5.1,1.7,4.7,4.9,5.5,5.0,4.8


information
data             0.0
avg_d_RH        90.0
avg_d_precip     0.0
avg_d_tmp       26.6
avg_d_wind       3.2
Name: 3/6/2020, dtype: float64

In [None]:
data = pd.read_csv('../augmented_datasets/tableau_conf_data.csv')
RADIUS = 350

coords = [(31, 35), (33, 65), (-28.0167, 153.4), (-17.7134, 178.065), (47.5289, -99.784)] 
dates = ['03/01/2020', '03/21/2020', '03/02/2020', '03/06/2020', '03/12/2020']

for i in range(5):
    lat = coords[i][0]
    long = coords[i][1]
    start_time = dates[i]
    end_time = dates[i]
    location = data.loc[data['Lat'] == lat]\
                    .loc[data['Long'] == long]\
                     [['Country_Region','Province_State']]
    country = location.iloc[0][0]
    province = location.iloc[0][1]
    url = 'https://api.aerisapi.com/observations/summary/closest?p={0},{1}&from={2}&to={3}&radius={4}miles&plimit=31&limit=1&'\
          'client_id={5}&client_secret={6}'.format(lat, long, start_time, end_time, RADIUS, client_id, client_secret)
    print('collecting for {0} location {1}, {2}\n{3}'.format(i, province, country, url))
    request = urllib.request.urlopen(url)
    response = request.read()
    json_ = json.loads(response)
    if json_['success']: 
        print('success')
        try:
            for day in json_['response'][0]['periods']:
                print('Location: {0}, {1}'.format(json_['response'][0]['place']['name'], json_['response'][0]['place']['city']))
                print('avg tmp: {0}'.format(day['summary']['temp']['avgC']))
                print('avg RH: {0}'.format(day['summary']['rh']['avg']))
                print('avg wind: {0}'.format(day['summary']['wind']['avgKPH']))
                print('avg precip: {0}\n'.format(day['summary']['precip']['totalMM']))
        except IndexError:
            if json_['error']:
                msg = 'No data found for {0}, {1} at {2}\n{3}\r\n'.format(country, province, date, json_['error'])
                print(msg)
        except Exception as e:
            raise e
            msg = 'Unknowen error\n{0}\r\n'.format(json_['error'])
            print(msg)
        finally:
            request.close()


In [None]:

- Israel, 03/01/20, 31	35	15.3	68	23	0	14.99310345	74.5862069	13.11034483	0
    * Google maps
    * Check API
    * Third party daily
    * Third party monthly
- Afghanistan, 03/21/20 33	65	18.7	41	2.4	0	13.71315789	47.07894737	5.684210526	0.855526316
    * Google maps
    * Check API
    * Third party daily
    * Third party monthly

- Queensland Australia, 02/03/20 -28.0167	153.4	25.6	78	12.8	0	24.05	75.55263158	16.66842105	0
    * Google maps
    * Check API
    * Third party daily
    * Third party monthly

- Fiji, 03/06/20 -17.7134	178.065	26.6	90	3.2	0	26.84473684	85.47368421	5.871052632	0
    * Google maps
        - The the northen third of Fiji
    * Check API
        - Checks good for Nausori (60km south east)
    * Third party daily
        - Checks good exepct wind that seems week and precipitation that should be a little
    * Third party monthly
        - seems good exepct for precipitation
- North Dakota, US 03/12/20 47.5289	-99.784	-0.2	73	31.5	0	-4.947368421	80.52631579	18.72368421	0
    * Google maps
        - In the fields around the middle of the state
    * Check API
         - Checks good for harvey (10km north west)
    * Third party daily
        - 
    * Third party monthly
        - checks good except precipitation

After comparing the data we conclude it is relaibale except for precpitation that will be hence forth ignored.

###### Add 5% interval and averages + reloading a saved pickle

In [51]:
# DONT RUN THIS BOX IF YOU RAN THE SCRIPT FROM THE START
confirmed_time_data = pd.DataFrame()
PICKLE_PATH = '../augmented_datasets/hopkins_conf_augmented2903.pkl'
with open(PICKLE_PATH, 'rb') as file:
    confirmed_time_data = pickle.load(file)
coords = set(confirmed_time_data.index.get_level_values(0).tolist())

In [52]:
confirmed_time_data.insert(9, 'avg_interval_RH', np.nan)
confirmed_time_data.insert(9, 'avg_interval_tmp', np.nan)

In [113]:
# confirmed_time_data.loc[coord].loc['data'][14:].index
mask = confirmed_time_data.loc[coord].loc['data'][14:].index > '2/19/2020'
# confirmed_time_data[['3/18/2020','3/19/2020']]
mask = confirmed_time_data.columns
mask.get_loc('2/19/2020')
confirmed_time_data[confirmed_time_data.columns[42:]]

42

Unnamed: 0_level_0,Unnamed: 1_level_0,2/19/2020,2/20/2020,2/21/2020,2/22/2020,2/23/2020,2/24/2020,2/25/2020,2/26/2020,2/27/2020,2/28/2020,...,3/18/2020,3/19/2020,3/20/2020,3/21/2020,3/22/2020,3/23/2020,3/24/2020,3/25/2020,3/26/2020,3/27/2020
coordinate,information,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"(-41.4545, 145.9707)",data,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.00,...,10.0,10.0,10.00,16.00,22.00,28.0,28.00,36.00,47.0,47.0
"(-41.4545, 145.9707)",avg_d_RH,78.0,60.0,53.0,62.00,70.0,78.00,81.0,65.0,63.00,51.00,...,82.0,93.0,81.00,73.00,68.00,64.0,74.00,74.00,68.0,74.0
"(-41.4545, 145.9707)",avg_d_precip,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.00,...,0.0,0.0,0.00,0.00,0.00,0.0,0.00,0.00,0.0,0.0
"(-41.4545, 145.9707)",avg_d_tmp,12.0,14.2,13.9,14.30,17.6,16.90,16.9,13.1,13.50,14.70,...,16.5,14.9,14.90,14.10,12.80,10.6,11.10,12.30,11.2,13.5
"(-41.4545, 145.9707)",avg_d_wind,31.1,14.8,13.4,15.00,14.2,11.90,16.0,22.2,30.90,18.10,...,18.6,8.4,21.20,13.50,14.40,9.8,10.10,8.50,15.2,6.8
"(-40.9006, 174.886)",data,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,1.00,...,20.0,28.0,39.00,52.00,102.00,102.0,155.00,205.00,283.0,368.0
"(-40.9006, 174.886)",avg_d_RH,84.0,84.0,83.0,78.00,75.0,75.00,76.0,75.0,73.00,76.00,...,60.0,72.0,70.00,77.00,76.00,75.0,56.00,71.00,70.0,86.0
"(-40.9006, 174.886)",avg_d_precip,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.00,0.00,...,0.0,0.0,0.00,0.00,0.00,0.0,0.00,0.00,0.0,0.0
"(-40.9006, 174.886)",avg_d_tmp,20.1,17.7,19.8,19.60,15.1,15.50,16.6,18.2,19.30,19.70,...,14.1,17.0,16.40,15.90,18.70,17.3,13.50,14.60,16.6,15.0
"(-40.9006, 174.886)",avg_d_wind,28.1,15.1,36.7,27.40,32.3,11.00,11.5,11.7,34.60,35.00,...,21.4,35.2,25.50,19.20,33.10,26.2,25.20,19.30,34.7,27.4


In [167]:
# confirmed_time_data[confirmed_time_data['Max_Date'] < '3/27/2020'].loc['data']
confirmed_time_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Province_State,Country_Region,avg_m_tmp,avg_m_RH,avg_m_precip,avg_m_wind,Max_Cases,Max_Date,5%_Date,avg_interval_tmp,...,3/18/2020,3/19/2020,3/20/2020,3/21/2020,3/22/2020,3/23/2020,3/24/2020,3/25/2020,3/26/2020,3/27/2020
coordinate,information,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
"(-41.4545, 145.9707)",data,Tasmania,Australia,15.466667,68.818182,0.000000,18.469697,47.0,3/26/2020,1/22/2020,15.564062,...,10.0,10.0,10.00,16.00,22.00,28.0,28.00,36.00,47.0,47.0
"(-41.4545, 145.9707)",avg_d_RH,,,,,,,,,,,...,82.0,93.0,81.00,73.00,68.00,64.0,74.00,74.00,68.0,74.0
"(-41.4545, 145.9707)",avg_d_precip,,,,,,,,,,,...,0.0,0.0,0.00,0.00,0.00,0.0,0.00,0.00,0.0,0.0
"(-41.4545, 145.9707)",avg_d_tmp,,,,,,,,,,,...,16.5,14.9,14.90,14.10,12.80,10.6,11.10,12.30,11.2,13.5
"(-41.4545, 145.9707)",avg_d_wind,,,,,,,,,,,...,18.6,8.4,21.20,13.50,14.40,9.8,10.10,8.50,15.2,6.8
"(-40.9006, 174.886)",data,,New Zealand,17.754545,74.772727,0.000000,26.777273,368.0,3/27/2020,1/22/2020,17.796923,...,20.0,28.0,39.00,52.00,102.00,102.0,155.00,205.00,283.0,368.0
"(-40.9006, 174.886)",avg_d_RH,,,,,,,,,,,...,60.0,72.0,70.00,77.00,76.00,75.0,56.00,71.00,70.0,86.0
"(-40.9006, 174.886)",avg_d_precip,,,,,,,,,,,...,0.0,0.0,0.00,0.00,0.00,0.0,0.00,0.00,0.0,0.0
"(-40.9006, 174.886)",avg_d_tmp,,,,,,,,,,,...,14.1,17.0,16.40,15.90,18.70,17.3,13.50,14.60,16.6,15.0
"(-40.9006, 174.886)",avg_d_wind,,,,,,,,,,,...,21.4,35.2,25.50,19.20,33.10,26.2,25.20,19.30,34.7,27.4


In [144]:
for coord in coords:
    max_cases = int(confirmed_time_data.loc[coord]['Max_Cases']['data'])
    max_date = confirmed_time_data.loc[coord]['Max_Date']
    five_prcnt = int(0.05 * max_cases)
    mask = confirmed_time_data.loc[coord].loc['data'][14:] >= five_prcnt
    five_prcnt_date = mask.index.min()
    confirmed_time_data.loc[coord, '5%_Date']['data'] = five_prcnt_date
    
    five_prct_column = confirmed_time_data.columns.get_loc(five_prcnt_date)
    max_column = confirmed_time_data.columns.get_loc(max_date['data'])
    interval = confirmed_time_data[confirmed_time_data.columns[five_prct_column:max_column]]
    
    confirmed_time_data.loc[coord, 'avg_interval_tmp']['data'] = interval.loc[coord].loc['avg_d_tmp'].mean()
    confirmed_time_data.loc[coord, 'avg_interval_RH']['data'] = interval.loc[coord].loc['avg_d_RH'].mean()

###### Save augmented data
1. Multi index does not save well in csv, so we also save it as a pickle

In [149]:
pickle_path = AUGMENTED_PATH + 'hopkins_conf_augmented{0}.pkl'.format(datetime.now().strftime('%d%m'))
with open(pickle_path, 'wb') as file:
    pickle.dump(confirmed_time_data, file)
confirmed_time_data.to_csv('../augmented_datasets/hopkins_conf_augmented{0}.csv'.format(datetime.now().strftime('%d%m')))