In [None]:
import numpy as np
import pandas as pd
import pickle

import netCDF4
 
import datetime

import psycopg2

import os

from requests import get  # to make GET request


In [None]:
URL = "https://metdatasa.blob.core.windows.net/covid19-response/metoffice_global_daily/"

In [None]:
!mkdir temp

In [None]:
# load level 2 dict
with open('input/dicts/adm_2_to_grid.pkl', 'rb') as handle:
    adm_2_to_grid = pickle.load(handle)

# Query the DB to find the most recent weather data

In [None]:
# Connect to covid19db.org
conn = psycopg2.connect(
    host='covid19db.org',
    port=5432,
    dbname='covid19',
    user='covid19',
    password='covid19'
    )

cur = conn.cursor()

In [None]:
cur.execute("""SELECT date FROM weather""")

date = pd.DataFrame(cur.fetchall(), columns=["date"])
most_recent = date.date.values[-1]

In [None]:
#Define date range
start = most_recent + datetime.timedelta(days=1)
stop = datetime.datetime.now() - datetime.timedelta(days=1)
step = datetime.timedelta(days=1)
DATERANGE = pd.date_range(start, stop, freq=step)
DATERANGE


## Create a DF for each variable and then merge

In [None]:
variables = {'precip_max': {'folder': 'precip_max', 
                            'file': 'global_daily_precip_max_',
                            'variable': 'precipitation_flux'},
            'precip_mean': {'folder': 'precip_mean', 
                            'file': 'global_daily_precip_mean_',
                            'variable': 'precipitation_flux'},
            'specific_humidity_max': {'folder': 'sh_max', 
                                      'file': 'global_daily_sh_max_',
                                      'variable': 'specific_humidity'},
            'specific_humidity_mean': {'folder': 'sh_mean', 
                                       'file': 'global_daily_sh_mean_',
                                       'variable': 'specific_humidity'},
            'specific_humidity_min': {'folder': 'sh_min', 
                                      'file': 'global_daily_sh_min_',
                                      'variable': 'specific_humidity'},
            'short_wave_radiation_max': {'folder': 'sw_max', 
                                         'file': 'global_daily_sw_max_',
                                         'variable': 'm01s01i202'},
            'short_wave_radiation_mean': {'folder': 'sw_mean', 
                                          'file': 'global_daily_sw_mean_',
                                          'variable': 'm01s01i202'},
            'air_temperature_max': {'folder': 't1o5m_max', 
                                     'file': 'global_daily_t1o5m_max_',
                                     'variable': 'air_temperature'},
            'air_temperature_mean': {'folder': 't1o5m_mean', 
                                     'file': 'global_daily_t1o5m_mean_',
                                     'variable': 'air_temperature'},
            'air_temperature_min': {'folder': 't1o5m_min', 
                                     'file': 'global_daily_t1o5m_min_',
                                     'variable': 'air_temperature'},
            'windgust_max': {'folder': 'windgust_max', 
                                     'file': 'global_daily_windgust_max_',
                                     'variable': 'wind_speed_of_gust'},
            'windgust_mean': {'folder': 'windgust_mean', 
                                     'file': 'global_daily_windgust_mean_',
                                     'variable': 'wind_speed_of_gust'},
            'windgust_min': {'folder': 'windgust_min', 
                                     'file': 'global_daily_windgust_min_',
                                     'variable': 'wind_speed_of_gust'},
             
            'windspeed_max': {'folder': 'windspeed_max', 
                                     'file': 'global_daily_windspeed_max_',
                                     'variable': 'wind_speed'},
            'windspeed_mean': {'folder': 'windspeed_mean', 
                                     'file': 'global_daily_windspeed_mean_',
                                     'variable': 'wind_speed'},
            'windspeed_min': {'folder': 'windspeed_min', 
                                     'file': 'global_daily_windspeed_min_',
                                     'variable': 'wind_speed'}
             
            }

In [None]:
# opening netCDF4 files via url is not reliable
# (it requires the package to be built with OPenDAP support)
# we dowload and write to disk the file before opening it
def download_MET_file(url, file_name):
    try:
        os.remove(file_name)
    except: 
        pass
    
    # open in binary mode
    with open(file_name, "wb") as file:
        # get request
        response = get(url)
        # write to file
        file.write(response.content)
        file.close()

In [None]:
def create_aggr_df(indicator, daterange):
    days = []
    country = []
    avg = []
    std = []
    region = []
    city = []

    print("loading data for {} from {} to {}".format(indicator,
                                                     daterange[0].strftime('%Y-%m-%d'),
                                                     daterange[-1].strftime('%Y-%m-%d')))
    
    for day in daterange:
        
        download_MET_file(URL+"{}/{}{}.nc".format(variables[indicator]['folder'],
                                                variables[indicator]['file'],
                                                day.strftime('%Y%m%d')), "temp/netCDF4_file.nc")
        
        
        nc = netCDF4.Dataset("temp/netCDF4_file.nc")
        

        data = nc.variables[variables[indicator]['variable']][:].data.reshape(-1)

        for area_0 in adm_2_to_grid:
            for area_1 in adm_2_to_grid[area_0]:
                for area_2 in adm_2_to_grid[area_0][area_1]:
                    idx_list = [point[0] for point in adm_2_to_grid[area_0][area_1][area_2]]

                    to_avg = [data[idx] for idx in idx_list]

                    days.append(day.strftime('%Y-%m-%d'))
                    country.append(area_0)
                    region.append(area_1)
                    city.append(area_2)
                    avg.append(np.mean(to_avg))
                    std.append(np.std(to_avg))
        
    d = {'day': days, 'country': country, 'region': region, 'city': city,
         indicator+'_avg': avg, 
         indicator+'_std': std }
    return pd.DataFrame(data=d)
    

In [None]:
%%time
dfs = [create_aggr_df(indicator, DATERANGE) for indicator in variables]

In [None]:
dfs[-1]

In [None]:
from functools import reduce
df_final = reduce(lambda left,right: pd.merge(left,right,on=['day', 'country', 'region', 'city']), dfs)

In [None]:
df_final

In [None]:
!mkdir to_upload
df_final.to_pickle("to_upload/NEW_weather.pkl", protocol=3)