# Location Coding

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [4]:
locations = pd.read_csv('../data/extreme_temp/unique_coordinates.csv').drop(columns=['Unnamed: 0'])

In [5]:
locations

Unnamed: 0,latitude,longitude
0,50.0,60.0
1,50.0,10.0
2,20.0,-100.0
3,-80.0,150.0
4,60.0,60.0
...,...,...
206,-70.0,30.0
207,60.0,-150.0
208,0.0,20.0
209,30.0,90.0


In [6]:
extreme_temp = pd.read_csv('../data/extreme_temp/extreme_temp_2020_2025.csv').drop(columns=['Unnamed: 0'])

In [7]:
extreme_temp

Unnamed: 0,date,temp_max,temp_min,latitude,longitude,is_above_max_extreme,is_below_min_extreme,is_heat_wave,is_cold_wave,month,lat_int,long_int
0,2021-02-21,-22.45165,-31.61204,50.0,60.0,False,True,False,True,2,50,60
1,2021-02-22,-7.08665,-32.61977,50.0,60.0,False,True,False,True,2,50,60
2,2021-02-25,-21.70254,-31.47707,50.0,60.0,False,True,False,True,2,50,60
3,2021-02-26,-17.15050,-30.80697,50.0,60.0,False,True,False,True,2,50,60
4,2021-03-13,-11.63486,-24.28841,50.0,60.0,False,True,False,True,3,50,60
...,...,...,...,...,...,...,...,...,...,...,...,...
2480,2022-04-30,24.60113,8.67733,-20.0,30.0,False,True,False,True,4,-20,30
2481,2022-12-01,23.88186,10.91958,-20.0,30.0,False,True,False,True,12,-20,30
2482,2022-12-02,27.61004,12.09255,-20.0,30.0,False,True,False,True,12,-20,30
2483,2024-03-05,34.28573,18.25308,-20.0,30.0,True,False,True,False,3,-20,30


In [8]:
full_data = pd.merge(
    left= extreme_temp, 
    right= locations, 
    left_on= ['lat_int', 'long_int'], 
    right_on=['latitude', 'longitude']
)

In [9]:
full_data

Unnamed: 0,date,temp_max,temp_min,latitude_x,longitude_x,is_above_max_extreme,is_below_min_extreme,is_heat_wave,is_cold_wave,month,lat_int,long_int,latitude_y,longitude_y
0,2021-02-21,-22.45165,-31.61204,50.0,60.0,False,True,False,True,2,50,60,50.0,60.0
1,2021-02-22,-7.08665,-32.61977,50.0,60.0,False,True,False,True,2,50,60,50.0,60.0
2,2021-02-25,-21.70254,-31.47707,50.0,60.0,False,True,False,True,2,50,60,50.0,60.0
3,2021-02-26,-17.15050,-30.80697,50.0,60.0,False,True,False,True,2,50,60,50.0,60.0
4,2021-03-13,-11.63486,-24.28841,50.0,60.0,False,True,False,True,3,50,60,50.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2480,2022-04-30,24.60113,8.67733,-20.0,30.0,False,True,False,True,4,-20,30,-20.0,30.0
2481,2022-12-01,23.88186,10.91958,-20.0,30.0,False,True,False,True,12,-20,30,-20.0,30.0
2482,2022-12-02,27.61004,12.09255,-20.0,30.0,False,True,False,True,12,-20,30,-20.0,30.0
2483,2024-03-05,34.28573,18.25308,-20.0,30.0,True,False,True,False,3,-20,30,-20.0,30.0


In [26]:
data = full_data[full_data['latitude_y'] > -65]

In [27]:
data.columns

Index(['date', 'temp_max', 'temp_min', 'latitude_x', 'longitude_x',
       'is_above_max_extreme', 'is_below_min_extreme', 'is_heat_wave',
       'is_cold_wave', 'month', 'lat_int', 'long_int', 'latitude_y',
       'longitude_y', 'country', 'region', 'location', 'city', 'notes'],
      dtype='object')

In [28]:
data = data.rename(columns={'latitude_y': 'latitude', 'longitude_y': 'longitude'})

In [29]:
data.columns

Index(['date', 'temp_max', 'temp_min', 'latitude_x', 'longitude_x',
       'is_above_max_extreme', 'is_below_min_extreme', 'is_heat_wave',
       'is_cold_wave', 'month', 'lat_int', 'long_int', 'latitude', 'longitude',
       'country', 'region', 'location', 'city', 'notes'],
      dtype='object')

In [30]:
data = data.drop(columns = ['latitude_x', 'longitude_x', 'lat_int', 'long_int', 'location'])

In [31]:
data

Unnamed: 0,date,temp_max,temp_min,is_above_max_extreme,is_below_min_extreme,is_heat_wave,is_cold_wave,month,latitude,longitude,country,region,city,notes
0,2021-02-21,-22.45165,-31.61204,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
1,2021-02-22,-7.08665,-32.61977,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
2,2021-02-25,-21.70254,-31.47707,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
3,2021-02-26,-17.15050,-30.80697,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
4,2021-03-13,-11.63486,-24.28841,False,True,False,True,3,50,60,Kazakhstan,,Aktobe,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4946,2022-04-30,24.60113,8.67733,False,True,False,True,4,-20,30,Zimbabwe,,Harare,
4947,2022-12-01,23.88186,10.91958,False,True,False,True,12,-20,30,Zimbabwe,,Harare,
4948,2022-12-02,27.61004,12.09255,False,True,False,True,12,-20,30,Zimbabwe,,Harare,
4949,2024-03-05,34.28573,18.25308,True,False,True,False,3,-20,30,Zimbabwe,,Harare,


In [33]:
loc = locations[locations['latitude']>-65]

In [42]:
new_df= pd.read_csv('../data/temp_events/monthly_summary_lat=70_long=-130.csv').drop(columns=["Unnamed: 0"])

In [43]:
new_df['latitude'] = 70
new_df['longitude'] = -130

In [45]:
new_df.columns

Index(['month', 'max_mean', 'max_std', 'max_extreme', 'min_mean', 'min_std',
       'min_extreme', 'latitude', 'longitude'],
      dtype='object')

In [54]:
extreme_data = pd.DataFrame(columns = ['month', 'max_mean', 'max_std', 'max_extreme', 'min_mean', 'min_std',
       'min_extreme', 'latitude', 'longitude'])

In [55]:
for i in loc.index:
    lat = loc.loc[i, 'latitude']
    long = loc.loc[i, 'longitude']

    dir_path = '../data/temp_events/'
    file_name = f"monthly_summary_lat={lat}_long={long}.csv"
    
    df = pd.read_csv(dir_path + file_name).drop(columns=['Unnamed: 0'])
    df['latitude'] = lat
    df['longitude'] = long

    extreme_data = pd.concat([extreme_data, df], ignore_index=True)

  extreme_data = pd.concat([extreme_data, df], ignore_index=True)


In [56]:
extreme_data

Unnamed: 0,month,max_mean,max_std,max_extreme,min_mean,min_std,min_extreme,latitude,longitude
0,1,-10.589673,8.048381,9.531280,-15.676431,8.840515,-37.777719,60,-160
1,2,-7.678298,5.838886,6.918918,-12.897107,7.651830,-32.026683,60,-160
2,3,-6.501635,5.269826,6.672930,-12.802537,6.987687,-30.271754,60,-160
3,4,-1.734576,4.993063,10.748080,-7.282061,6.409844,-23.306671,60,-160
4,5,4.696106,3.745009,14.058627,-0.909495,2.993975,-8.394434,60,-160
...,...,...,...,...,...,...,...,...,...
1723,8,10.417553,4.946011,22.782581,4.422825,3.054746,-3.214039,70,150
1724,9,4.172173,3.751886,13.551889,-0.736435,3.241838,-8.841029,70,150
1725,10,-8.313387,5.895060,6.424264,-13.388035,6.687279,-30.106233,70,150
1726,11,-20.964123,6.545296,-4.600882,-25.701555,6.246190,-41.317030,70,150


In [58]:
data.head()

Unnamed: 0,date,temp_max,temp_min,is_above_max_extreme,is_below_min_extreme,is_heat_wave,is_cold_wave,month,latitude,longitude,country,region,city,notes
0,2021-02-21,-22.45165,-31.61204,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
1,2021-02-22,-7.08665,-32.61977,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
2,2021-02-25,-21.70254,-31.47707,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
3,2021-02-26,-17.1505,-30.80697,False,True,False,True,2,50,60,Kazakhstan,,Aktobe,
4,2021-03-13,-11.63486,-24.28841,False,True,False,True,3,50,60,Kazakhstan,,Aktobe,


In [62]:
big_data = pd.merge(
    left= data, 
    right= extreme_data, 
    how= 'inner',
    left_on=['month', 'latitude', 'longitude'], 
    right_on=['month', 'latitude', 'longitude']
).drop(columns=['is_above_max_extreme', 'is_below_min_extreme'])

In [63]:
big_data.head()

Unnamed: 0,date,temp_max,temp_min,is_heat_wave,is_cold_wave,month,latitude,longitude,country,region,city,notes,max_mean,max_std,max_extreme,min_mean,min_std,min_extreme
0,2021-02-21,-22.45165,-31.61204,False,True,2,50,60,Kazakhstan,,Aktobe,,-5.858933,5.901358,8.894463,-13.182437,6.962362,-30.588342
1,2021-02-22,-7.08665,-32.61977,False,True,2,50,60,Kazakhstan,,Aktobe,,-5.858933,5.901358,8.894463,-13.182437,6.962362,-30.588342
2,2021-02-25,-21.70254,-31.47707,False,True,2,50,60,Kazakhstan,,Aktobe,,-5.858933,5.901358,8.894463,-13.182437,6.962362,-30.588342
3,2021-02-26,-17.1505,-30.80697,False,True,2,50,60,Kazakhstan,,Aktobe,,-5.858933,5.901358,8.894463,-13.182437,6.962362,-30.588342
4,2021-03-13,-11.63486,-24.28841,False,True,3,50,60,Kazakhstan,,Aktobe,,1.137817,6.954127,18.523133,-7.996859,6.382087,-23.952077


In [64]:
heat_waves = big_data[big_data['is_heat_wave']==True][[
    'date', 'temp_max', 'temp_min', 'latitude', 'longitude', 
    'country', 'region', 'city', 'notes', 
    'max_mean', 'max_std', 'max_extreme'
]]

In [65]:
heat_waves

Unnamed: 0,date,temp_max,temp_min,latitude,longitude,country,region,city,notes,max_mean,max_std,max_extreme
8,2022-09-01,34.90664,18.79586,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225
9,2022-09-02,36.77734,21.30377,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225
10,2022-09-03,35.11940,18.95266,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225
13,2023-03-27,18.99990,0.00570,50,60,Kazakhstan,,Aktobe,,1.137817,6.954127,18.523133
14,2023-03-28,19.69106,1.41110,50,60,Kazakhstan,,Aktobe,,1.137817,6.954127,18.523133
...,...,...,...,...,...,...,...,...,...,...,...,...
3310,2020-01-04,34.72660,19.35720,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012
3311,2020-01-05,35.32192,20.07010,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012
3312,2020-01-06,35.53590,21.57280,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012
3319,2024-03-05,34.28573,18.25308,-20,30,Zimbabwe,,Harare,,26.121787,3.037911,33.716565


In [66]:
heat_waves['std_above_extreme'] = heat_waves.apply(lambda row: (row['temp_max'] - row['max_extreme']) / row['max_std'], axis=1)

In [67]:
heat_waves

Unnamed: 0,date,temp_max,temp_min,latitude,longitude,country,region,city,notes,max_mean,max_std,max_extreme,std_above_extreme
8,2022-09-01,34.90664,18.79586,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225,0.158487
9,2022-09-02,36.77734,21.30377,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225,0.489228
10,2022-09-03,35.11940,18.95266,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225,0.196103
13,2023-03-27,18.99990,0.00570,50,60,Kazakhstan,,Aktobe,,1.137817,6.954127,18.523133,0.068559
14,2023-03-28,19.69106,1.41110,50,60,Kazakhstan,,Aktobe,,1.137817,6.954127,18.523133,0.167947
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3310,2020-01-04,34.72660,19.35720,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012,0.204677
3311,2020-01-05,35.32192,20.07010,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012,0.395187
3312,2020-01-06,35.53590,21.57280,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012,0.463664
3319,2024-03-05,34.28573,18.25308,-20,30,Zimbabwe,,Harare,,26.121787,3.037911,33.716565,0.187354


In [76]:
heat_waves = heat_waves.reset_index(drop=True)

In [84]:
heat_waves['date'] = heat_waves.apply(lambda row: datetime.date(datetime.fromisoformat(row['date'])), axis=1)

In [85]:
heat_waves

Unnamed: 0,date,temp_max,temp_min,latitude,longitude,country,region,city,notes,max_mean,max_std,max_extreme,std_above_extreme
0,2022-09-01,34.90664,18.79586,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225,0.158487
1,2022-09-02,36.77734,21.30377,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225,0.489228
2,2022-09-03,35.11940,18.95266,50,60,Kazakhstan,,Aktobe,,19.870005,5.656088,34.010225,0.196103
3,2023-03-27,18.99990,0.00570,50,60,Kazakhstan,,Aktobe,,1.137817,6.954127,18.523133,0.068559
4,2023-03-28,19.69106,1.41110,50,60,Kazakhstan,,Aktobe,,1.137817,6.954127,18.523133,0.167947
...,...,...,...,...,...,...,...,...,...,...,...,...,...
974,2020-01-04,34.72660,19.35720,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012,0.204677
975,2020-01-05,35.32192,20.07010,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012,0.395187
976,2020-01-06,35.53590,21.57280,-20,30,Zimbabwe,,Harare,,26.274844,3.124867,34.087012,0.463664
977,2024-03-05,34.28573,18.25308,-20,30,Zimbabwe,,Harare,,26.121787,3.037911,33.716565,0.187354


In [87]:
heat_waves.to_csv('../data/extreme_temp/heat_waves.csv')

In [93]:
cold_waves = big_data[big_data['is_cold_wave']==True][[
    'date', 'temp_max', 'temp_min', 'latitude', 'longitude', 
    'country', 'region', 'city', 'notes', 
    'min_mean', 'min_std', 'min_extreme'
]]

In [95]:
cold_waves['std_below_extreme'] = cold_waves.apply(lambda row: (-row['temp_min'] + row['min_extreme']) / row['min_std'], axis=1)

In [96]:
cold_waves = cold_waves.reset_index(drop=True)

In [97]:
cold_waves

Unnamed: 0,date,temp_max,temp_min,latitude,longitude,country,region,city,notes,min_mean,min_std,min_extreme,std_below_extreme
0,2021-02-21,-22.45165,-31.61204,50,60,Kazakhstan,,Aktobe,,-13.182437,6.962362,-30.588342,0.147033
1,2021-02-22,-7.08665,-32.61977,50,60,Kazakhstan,,Aktobe,,-13.182437,6.962362,-30.588342,0.291773
2,2021-02-25,-21.70254,-31.47707,50,60,Kazakhstan,,Aktobe,,-13.182437,6.962362,-30.588342,0.127647
3,2021-02-26,-17.15050,-30.80697,50,60,Kazakhstan,,Aktobe,,-13.182437,6.962362,-30.588342,0.031401
4,2021-03-13,-11.63486,-24.28841,50,60,Kazakhstan,,Aktobe,,-7.996859,6.382087,-23.952077,0.052700
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2337,2020-02-27,27.36135,12.72924,-20,30,Zimbabwe,,Harare,,17.735753,1.777715,13.291464,0.316262
2338,2022-04-29,20.91152,8.61123,-20,30,Zimbabwe,,Harare,,14.816893,2.126911,9.499616,0.417688
2339,2022-04-30,24.60113,8.67733,-20,30,Zimbabwe,,Harare,,14.816893,2.126911,9.499616,0.386610
2340,2022-12-01,23.88186,10.91958,-20,30,Zimbabwe,,Harare,,18.296606,2.192378,12.815660,0.864851


In [98]:
cold_waves.to_csv('../data/extreme_temp/cold_waves.csv')