In [1]:
# Library
import numpy as np
import pandas as pd

**<font color='blue'> Notebook description</font>**

<font color='blue'>This script converts meteorological features within the reference period into thresholds which determine a warm day. </font>

In [2]:
# Import raw data - Belgium mortality 
ROW_SKIP = 13
climate_brussels = pd.read_csv('../data/Daily_19830101_20081231_brussels.csv', skiprows = ROW_SKIP)
climate_dinant = pd.read_csv('../data/Daily_19830101_20081231_dinant.csv',  skiprows = ROW_SKIP)
climate_antwerp = pd.read_csv('../data/Daily_19830101_20081231_antwerp.csv', skiprows = ROW_SKIP)

In [3]:
# Import raw data - Multiple countries
ROW_SKIP = 11
def read_file(filename, country):
    data = pd.read_csv('../data/multi_countries/' + filename + '.csv', skiprows = ROW_SKIP)
    data['COUNTRY'] = country 
    return data

In [4]:
filenames = ['Belgium', 'France', 'Greece', 'Russia', 'Spain', 'UK', 'Romania']
df_countries = []

for filename in filenames:
    data = read_file(filename + '_ref', filename)
    df_countries.append(data)

In [5]:
# Add region code
region_name = 'REGION'
climate_brussels[region_name] = 4000
climate_antwerp[region_name] = 2000
climate_dinant[region_name] = 3000

In [6]:
# Concatenate dataframes from cities
df_concat = pd.DataFrame().append([climate_brussels, climate_antwerp, climate_dinant])

# Concatenate dataframes from countries
df_countries = pd.DataFrame().append(df_countries)

In [7]:
# Rename date columns
date_dict = {'YEAR': 'YEAR', 'MO': 'MONTH', 'DY': 'DAY'}
df_concat.rename(columns = date_dict, inplace = True)
df_countries.rename(columns = date_dict, inplace = True)

In [8]:
# Monthly distribution for each region
class cleaner_climate():
    def __init__(self, df_concat, group_list):
        self.climate_city = df_concat
        self.group_list = group_list
        
    def skip_year(self, year = None): 
        # Helper function to skip certain year, default: Don't skip
        if year != None:
            return self.climate_city[self.climate_city['YEAR'] != year]
        else:
            return self.climate_city 
        
    def monthly_stat(self):
        df_results = pd.DataFrame()
        df_groupby = self.skip_year().groupby(self.group_list)
        # Definition of threshold: 90th quantile of daily max temperature
        df_results['TEMPMAX_90th'] = df_groupby['T2M_MAX'].quantile(.90)
        return df_results

In [9]:
# Clean data
Cleaner = cleaner_climate(df_concat, ['REGION', 'MONTH'])
climate = Cleaner.monthly_stat()

Cleaner = cleaner_climate(df_countries, ['COUNTRY', 'MONTH'])
climate_countries = Cleaner.monthly_stat()

In [10]:
# Export the table of thresholds
climate.reset_index().to_csv('../data/reference_dist_1983_to_2008.csv', index = False)
climate_countries.reset_index().to_csv('../data/reference_europe_1983_to_2008.csv', index = False)