# Heatwave Analysis by Sub-region

Overall plots showing data by sub-region, as described in Eurodataset.

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
regions = pd.read_csv('regions_matched.csv')
regions = regions.set_index(['lon_25_75', 'lat_25_75'])
regions

Unnamed: 0_level_0,Unnamed: 1_level_0,ID_HDC_G0,GCPNT_LAT,GCPNT_LON,XC_NM_LST,XC_ISO_LST,GRGN_L1,GRGN_L2,UC_NM_MN,UC_NM_LST,P15,urban_center_count
lon_25_75,lat_25_75,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
-122.75,42.25,69,42.337058,-122.868137,United States,USA,Northern America,Northern America,Medford,Medford,9.101015e+04,1
-119.75,34.25,5,34.427664,-119.743693,United States,USA,Northern America,Northern America,Santa Barbara,Santa Barbara,1.147532e+05,1
-118.25,34.75,32,34.584848,-118.131720,United States,USA,Northern America,Northern America,Palmdale,Palmdale; Lancaster,2.771812e+05,1
-109.75,31.25,87,31.323486,-109.541843,Mexico;United States,MEX; USA,Latin America and the Caribbean,Central America,Agua Prieta,Agua Prieta,9.791077e+04,1
-106.75,32.25,147,32.311465,-106.769657,United States,USA,Northern America,Northern America,Las Cruces,Las Cruces,6.407468e+04,1
...,...,...,...,...,...,...,...,...,...,...,...,...
140.75,40.75,12827,40.821641,140.745742,Japan,JPN,Asia,Eastern Asia,Aomori,Aomori,1.874362e+05,1
141.25,43.25,12736,43.070521,141.373814,Japan,JPN,Asia,Eastern Asia,Sapporo,Sapporo; Ebetsu,1.894557e+06,2
141.25,43.25,12734,43.193801,141.014573,Japan,JPN,Asia,Eastern Asia,Otaru,Otaru,7.577249e+04,2
142.75,-3.25,13086,-3.474474,142.689366,Papua New Guinea,PNG,Oceania,Melanesia,,-,7.321599e+04,1


In [4]:
# Use this to create the 'cells' list you need to iterate over
analysis = pd.read_csv('analysis.csv', parse_dates=['year'])
analysis = analysis.set_index(['lon', 'lat'])
cells = np.unique(analysis.index)
cells

array([(-122.75, 42.25), (-119.75, 34.25), (-118.25, 34.75),
       (-109.75, 31.25), (-106.75, 32.25), (-106.25, 28.75),
       (-103.25, 25.75), (-102.75, 20.75), (-99.75, 16.75),
       (-99.75, 17.75), (-98.75, 20.25), (-98.25, 34.75), (-97.75, 25.75),
       (-97.25, 19.75), (-96.25, 42.25), (-91.25, 14.25), (-90.75, 42.75),
       (-88.25, 15.25), (-87.75, 42.25), (-85.75, 42.75), (-84.75, 42.75),
       (-84.25, 33.75), (-79.75, -1.25), (-79.25, 44.25), (-77.75, 21.25),
       (-76.25, 4.25), (-75.25, 7.75), (-73.75, 18.25), (-73.25, -40.75),
       (-73.25, 41.25), (-72.75, 10.25), (-72.75, 46.25),
       (-71.75, -35.75), (-71.25, 10.25), (-69.25, 9.75),
       (-65.75, -27.25), (-64.75, -21.75), (-62.25, 8.25),
       (-60.75, -32.75), (-58.75, -38.75), (-56.25, -25.75),
       (-54.25, -31.25), (-53.25, -26.25), (-51.25, -22.25),
       (-50.25, -29.75), (-50.25, -27.75), (-48.75, -2.75),
       (-47.75, -23.25), (-47.75, -18.25), (-47.25, -2.75),
       (-46.75, -23.75), (-

In [5]:
analysis = pd.read_csv('analysis.csv', parse_dates=['year'])
analysis = analysis.set_index(['lon', 'lat', 'year'])
analysis

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tasmax_num_days,tasmax_pct_days,tasmax_max_temp,tasmin_num_days,tasmin_pct_days,tasmin_max_temp
lon,lat,year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-122.75,42.25,2006-01-01,13,0.035592,310.828217,12,0.032854,294.072998
-122.75,42.25,2007-01-01,14,0.038330,310.177887,12,0.032854,289.881714
-122.75,42.25,2008-01-01,3,0.008214,307.915619,4,0.010951,288.854095
-122.75,42.25,2009-01-01,17,0.046543,316.693970,13,0.035592,295.634766
-122.75,42.25,2010-01-01,13,0.035592,312.144409,12,0.032854,292.761444
...,...,...,...,...,...,...,...,...
144.75,-6.25,2094-01-01,9,0.024641,316.924896,305,0.835044,296.912415
144.75,-6.25,2095-01-01,6,0.016427,316.329315,302,0.826831,296.972382
144.75,-6.25,2096-01-01,10,0.027379,318.168640,256,0.700890,296.531097
144.75,-6.25,2097-01-01,6,0.016427,315.393127,297,0.813142,295.959137


In [6]:
# Example
regions.loc[cells[10]].GRGN_L2.values[0]

'Central America'

In [7]:
# Create 'region' column and populate with correct region for each cell
for cell in cells:
    # values returns a list of regions which has len > 1 if a cell contains more than one city
    # so we select just the first value
    cell_region = regions.loc[cell].GRGN_L2.values[0]
    analysis.loc[cell, 'region'] = cell_region

In [8]:
regions_of_cities = np.unique(analysis.region)
regions_of_cities

array(['Caribbean', 'Central America', 'Eastern Africa', 'Eastern Asia',
       'Eastern Europe', 'Melanesia', 'Middle Africa', 'Northern Africa',
       'Northern America', 'Northern Europe', 'South America',
       'South-Central Asia', 'South-Eastern Asia', 'Southern Europe',
       'Western Africa', 'Western Asia', 'Western Europe'], dtype=object)

In [9]:
analysis.to_csv('analysis_with_regions.csv')