In [2]:
import urllib
import json
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
from keys import client_id, client_secret, app_id

In [194]:
data = pd.read_csv('../original_datasets/tableau_time_sereis0322.csv')

In [195]:
states = data['Province_State'].unique().tolist()
dates = data['Date'].unique().tolist()
coords = data.groupby(['Lat', 'Long']).size().index.tolist()
data['avg_tmp'] = np.nan
data['avg_RH'] = np.nan
data['avg_wind'] = np.nan
data['avg_precip'] = np.nan
# data['avg_d_sun'] = np.nan
data['avg_m_tmp'] = np.nan
data['avg_m_RH'] = np.nan
data['avg_m_wind'] = np.nan
data['avg_m_precip'] = np.nan

In [11]:
START_TIME = '02/14/2020'
END_TIME = '02/24/2020'
RADIUS = 350
q = 0
log = open('log{0}'.format(datetime.now().strftime('%d%m%Y')), 'w')
for coord in coords:
    q += 1
    lat = coord[0]
    long = coord[1]
    location = data.loc[data['Lat'] == lat]\
                    .loc[data['Long'] == long]\
                     [['Country_Region','Province_State']]
    country = location.iloc[0][0]
    province = location.iloc[0][1]
    url = 'https://api.aerisapi.com/observations/summary/closest?p={0},{1}&from={2}&to={3}&radius={4}miles&plimit=31&limit=1&'\
          'client_id={5}&client_secret={6}'.format(lat, long, START_TIME, END_TIME, RADIUS, client_id, client_secret)
    print('collecting for {0} location {1}, {2}\n{3}'.format(q, province, country, url))
    request = urllib.request.urlopen(url)
    response = request.read()
    json_ = json.loads(response)
    if json_['success']: 
        print('success')
        try:
            for day in json_['response'][0]['periods']:
                date = datetime.strptime(str(day['summary']['ymd']), '%Y%m%d').strftime("%-m/%-d/%Y")
                for index in data.loc[data['Date'] == date]\
                                 .loc[data['Lat'] == coord[0]]\
                                 .loc[data['Long'] == coord[1]].index:
                    data.at[index, 'avg_d_tmp'] = day['summary']['temp']['avgC']
                    data.at[index, 'avg_d_RH'] = day['summary']['rh']['avg']
                    data.at[index, 'avg_d_wind'] = day['summary']['wind']['avgKPH']
                    data.at[index, 'avg_d_precip'] = day['summary']['precip']['totalMM']
#                     data.at[index, 'avg_d_sun'] = day['summary']['solrad']['avgWM2']                    
        except IndexError:
            if json_['error']:
                msg = 'No data found for {0}, {1} at {2}\n{3}\r\n'.format(country, province, date, json_['error'])
                print(msg)
                log.write(msg)
        except Exception:
            msg = 'Unknowen error\n{0}\r\n'.format(json_['error'])
            print(msg)
            log.write(msg)
        finally:
            request.close()
log.close()

collecting for 1 location Tasmania, Australia
https://api.aerisapi.com/observations/summary/closest?p=-41.4545,145.9707&from=02/14/2020&to=02/24/2020&radius=350miles&plimit=31&limit=1&client_id=vv2IYjrS6kv2D52249jl9&client_secret=ZbwMRSbTTYKsqBsQRkYyoquHjq2Enr1gkaXHxC33
success
collecting for 2 location nan, New Zealand
https://api.aerisapi.com/observations/summary/closest?p=-40.9006,174.886&from=02/14/2020&to=02/24/2020&radius=350miles&plimit=31&limit=1&client_id=vv2IYjrS6kv2D52249jl9&client_secret=ZbwMRSbTTYKsqBsQRkYyoquHjq2Enr1gkaXHxC33
success
collecting for 3 location nan, Argentina
https://api.aerisapi.com/observations/summary/closest?p=-38.4161,-63.6167&from=02/14/2020&to=02/24/2020&radius=350miles&plimit=31&limit=1&client_id=vv2IYjrS6kv2D52249jl9&client_secret=ZbwMRSbTTYKsqBsQRkYyoquHjq2Enr1gkaXHxC33
success
collecting for 4 location Victoria, Australia
https://api.aerisapi.com/observations/summary/closest?p=-37.8136,144.9631&from=02/14/2020&to=02/24/2020&radius=350miles&plimit

KeyboardInterrupt: 

In [961]:
############################
# HANDLE NaN AND SAVE DATA #
############################

# data = backup.copy()
data[['Province_State']] = data[['Province_State']].fillna(0)
data = data.drop('avg_sun', axis=1)
data = data.dropna()

In [962]:
##############################################
# COMPUTE AVG PARAMETERS FOR CONFIRMED CASES #
##############################################

conf_data = data[data['Case_Type'] == 'Confirmed']
death_data = data[data['Case_Type'] == 'Deaths']
for coord in coords:
    lat = coord[0]
    long = coord[1]
    indices = conf_data.loc[conf_data['Lat'] == lat].loc[conf_data['Long'] == long].index.tolist()
    avg_tmp = conf_data.loc[conf_data['Lat'] == lat]\
                       .loc[conf_data['Long'] == long]['avg_tmp'].mean()
    
    avg_rh = conf_data.loc[conf_data['Lat'] == lat]\
                   .loc[conf_data['Long'] == long]['avg_RH'].mean()
    
    avg_wind = conf_data.loc[conf_data['Lat'] == lat]\
                   .loc[conf_data['Long'] == long]['avg_wind'].mean()
    
    avg_precip = conf_data.loc[conf_data['Lat'] == lat]\
                   .loc[conf_data['Long'] == long]['avg_precip'].mean()                             
    
    for index in indices:
        conf_data.at[index, 'avg_m_tmp'] = avg_tmp
        conf_data.at[index, 'avg_m_RH'] = avg_rh
        conf_data.at[index, 'avg_m_wind'] = avg_wind
        conf_data.at[index, 'avg_m_precip'] = avg_precip                            

In [963]:
###########################################
# COMPUTE AVG PARAMETERS FOR DEATHS CASES #
###########################################

for coord in coords:
    lat = coord[0]
    long = coord[1]
    indices = death_data.loc[data['Lat'] == lat].loc[death_data['Long'] == long].index.tolist()
    avg_tmp = death_data.loc[death_data['Lat'] == lat]\
                       .loc[death_data['Long'] == long]['avg_tmp'].mean()
    
    avg_rh = death_data.loc[death_data['Lat'] == lat]\
                   .loc[death_data['Long'] == long]['avg_RH'].mean()
    
    avg_wind = death_data.loc[death_data['Lat'] == lat]\
                   .loc[death_data['Long'] == long]['avg_wind'].mean()
    
    avg_precip = death_data.loc[death_data['Lat'] == lat]\
                   .loc[death_data['Long'] == long]['avg_precip'].mean()                             
    
    for index in indices:
        death_data.at[index, 'avg_m_tmp'] = avg_tmp
        death_data.at[index, 'avg_m_RH'] = avg_rh
        death_data.at[index, 'avg_m_wind'] = avg_wind
        death_data.at[index, 'avg_m_precip'] = avg_precip                            

In [151]:
#########################
# This was to fix a bug #
#########################

# conf_data = pd.read_csv('../augmented_datasets/tableau_conf_data.csv')
# death_data = pd.read_csv('../augmented_datasets/tableau_death_data.csv')
# conf_data = conf_data.drop('Unnamed: 0', axis=1)
# death_data = death_data.drop('Unnamed: 0', axis=1)
# conf_data['Date'] = conf_data['Date'].apply(lambda x: x.lstrip('0'))
# death_data['Date'] = death_data['Date'].apply(lambda x: x.lstrip('0'))

##############################
# This is for multi-indexing #
##############################

# conf_data['Date'] = pd.DatetimeIndex(conf_data['Date'])
# conf_data.set_index(conf_data['Date'], append=True, inplace=True, drop=True)
# conf_data.loc[2, '2020']

In [183]:
#####################################################################
# COMPUTE GROWTH-FACTOR AND MAX CASES, MAX DATE FOR CONFIRMED CASES #
#####################################################################

conf_data['max_date'] = ''
conf_data['growth_factor'] = np.nan
conf_data['max_cases'] = np.nan

for coord in coords:
    lat = coord[0]
    long = coord[1]
    max_cases = conf_data.loc[conf_data['Lat'] == lat]\
                         .loc[conf_data['Long'] == long]['Cases'].max()
    if not pd.notna(max_cases):
        continue
    max_date = conf_data.loc[conf_data['Lat'] == lat]\
                        .loc[conf_data['Long'] == long][conf_data['Cases'] == max_cases]['Date']
    
    max_date_parsed = datetime.strptime(max_date.values[0], '%m/%d/%Y')
    four_days = max_date_parsed - timedelta(days = 4)
    four_days = four_days.strftime('%-m/%d/%Y')
    
    try:
        four_days_cases = conf_data.loc[conf_data['Lat'] == lat]\
                         .loc[conf_data['Long'] == long]\
                         .loc[conf_data['Date'] == four_days]['Cases'].values[0]
        if max_cases > four_days_cases and four_days_cases != 0:
            rate = max_cases / four_days_cases 
            growth_factor = rate**0.25
        else:
            growth_factor = 1
    
    except IndexError:
        growth_factor = 1
        
    indices = conf_data.loc[conf_data['Lat'] == lat]\
                        .loc[conf_data['Long'] == long].index.tolist()
    for index in indices:
        conf_data.at[index, 'max_date'] = max_date.values[0] 
        conf_data.at[index, 'growth_factor'] = growth_factor
        conf_data.at[index, 'max_cases'] = max_cases

  from ipykernel import kernelapp as app


In [187]:
death_data.loc[death_data['Lat'] == lat]\
                         .loc[death_data['Long'] == long]\
                         .loc[death_data['Date'] == four_days]['Cases']

Series([], Name: Cases, dtype: int64)

In [188]:
##################################################################
# COMPUTE GROWTH-FACTOR AND MAX CASES, MAX DATE FOR DEATH CASES #
##################################################################

death_data['max_date'] = ''
death_data['growth_factor'] = np.nan
death_data['max_cases'] = np.nan

for coord in coords:
    lat = coord[0]
    long = coord[1]
    max_cases = death_data.loc[death_data['Lat'] == lat]\
                         .loc[death_data['Long'] == long]['Cases'].max()
    if not pd.notna(max_cases):
        continue
    max_date = death_data.loc[death_data['Lat'] == lat]\
                        .loc[death_data['Long'] == long][death_data['Cases'] == max_cases]['Date']
    
    max_date_parsed = datetime.strptime(max_date.values[0], '%m/%d/%Y')
    four_days = max_date_parsed - timedelta(days = 4)
    four_days = four_days.strftime('%-m/%d/%Y')
    
    try:
        four_days_cases = death_data.loc[death_data['Lat'] == lat]\
                         .loc[death_data['Long'] == long]\
                         .loc[death_data['Date'] == four_days]['Cases'].values[0]
        if max_cases > four_days_cases and four_days_cases != 0:
            rate = max_cases / four_days_cases 
            growth_factor = rate**0.25
        else:
            growth_factor = 1
    
    except IndexError:
        growth_factor = 1
    
    indices = death_data.loc[death_data['Lat'] == lat]\
                        .loc[death_data['Long'] == long].index.tolist()
    for index in indices:
        death_data.at[index, 'max_date'] = max_date.values[0] 
        death_data.at[index, 'growth_factor'] = growth_factor
        death_data.at[index, 'max_cases'] = max_cases


  app.launch_new_instance()


24


In [226]:
"""
Somehow there are 580 entries in death_cont that are not in coords
resulting in NaN values for growth factor, we will remove them for the meanwhile
"""
death_data = death_data.dropna()

'\nSomehow there are 580 entries in death_cont that are not in coords\nresulting in NaN values for growth factor, we will remove them for the meanwhile\n'

In [229]:
######################
# SAVE COMPUTED DATA #
######################

conf_data.to_csv('../augmented_datasets/tableau_conf_data.csv')
death_data.to_csv('../augmented_datasets/tableau_death_data.csv')