# Extreme Temperature Events -- Full Analysis

In [2]:
import pandas as pd
import numpy as np
import os
# import matplotlib.pyplot as plt
# import seaborn as sns
from datetime import datetime

In [10]:
# get file list
file_list = os.listdir('../data/temp')

In [11]:
file_list.remove('.ipynb_checkpoints')

file_list.remove('.DS_Store')

In [12]:
file_list

['temp_lat=30_long=-100.csv',
 'temp_lat=40.0_long=-90.0.csv',
 'temp_lat=-80_long=-180.csv',
 'temp_lat=-79_long=159.csv',
 'temp_lat=30.0_long=40.0.csv',
 'temp_lat=-70_long=80.csv',
 'temp_lat=0_long=10.csv',
 'temp_lat=-80_long=-10.csv',
 'temp_lat=80.0_long=-100.0.csv',
 'temp_lat=50_long=120.csv',
 'temp_lat=-84_long=-180.csv',
 'temp_lat=80_long=-30.csv',
 'temp_lat=70_long=-70.csv',
 'temp_lat=50_long=80.csv',
 'temp_lat=50_long=-90.csv',
 'temp_lat=20_long=80.csv',
 'temp_lat=60_long=130.csv',
 'temp_lat=40_long=50.csv',
 'temp_lat=70_long=-160.csv',
 'temp_lat=50.0_long=70.0.csv',
 'temp_lat=70_long=70.csv',
 'temp_lat=-30_long=120.csv',
 'temp_lat=-80.0_long=-100.0.csv',
 'temp_lat=-20_long=-60.csv',
 'temp_lat=40_long=-100.csv',
 'temp_lat=-80_long=-140.csv',
 'temp_lat=-70.0_long=70.0.csv',
 'temp_lat=70_long=100.csv',
 'temp_lat=-50_long=-70.csv',
 'temp_lat=-70_long=40.csv',
 'temp_lat=50_long=-110.csv',
 'temp_lat=50_long=40.csv',
 'temp_lat=40_long=110.csv',
 'temp_lat

In [13]:
len(file_list)

274

In [14]:
status_notes = []
data_directory = '../data/temp/'

In [61]:
data_test = pd.read_csv('../data/temp/'+file_list[31]).drop(columns = ['Unnamed: 0'])

In [47]:
type(data_test.loc[567,'date'])

str

In [62]:
data_test.columns

Index(['date', 'temp_mean', 'temp_max', 'temp_min', 'latitude', 'longitude'], dtype='object')

In [63]:
data_test

Unnamed: 0,date,temp_mean,temp_max,temp_min,latitude,longitude
0,2020-01-01 06:00:00,-4.712534,-2.45350,-7.26627,-80.0,-170.0
1,2020-01-02 06:00:00,-7.239511,-5.41675,-10.22324,-80.0,-170.0
2,2020-01-03 06:00:00,-6.816109,-6.14060,-7.45206,-80.0,-170.0
3,2020-01-04 06:00:00,-7.670342,-6.23050,-9.52927,-80.0,-170.0
4,2020-01-05 06:00:00,-8.614050,-7.03266,-9.70997,-80.0,-170.0
...,...,...,...,...,...,...
2022,2025-07-15 06:00:00,-40.519329,-35.29168,-42.88892,-80.0,-170.0
2023,2025-07-16 06:00:00,-42.481691,-35.17032,-49.37826,-80.0,-170.0
2024,2025-07-17 06:00:00,-49.638347,-46.81170,-51.75855,-80.0,-170.0
2025,2025-07-18 06:00:00,-50.609535,-48.85988,-52.49424,-80.0,-170.0


In [15]:
def find_extreme_temp_events(data):
    """
    takes a pandas dataframe and returns the extreme temperature entries

    Parameters
    ----------
    data : DataFrame
        daily entries of date, mean daily temperature, maximum daily temperature, and minimum daily temperature
        along with latitude and longitude

    Returns
    -------
    final_data : DataFrame
        entries of dates experiencing a heat or cold wave
    """

    status = {}

    lat = int(data.loc[0,'latitude'])
    long = int(data.loc[0,'longitude'])

    
    #try: 
        # date in a useful format
    df = pd.DataFrame(columns=['date', 'month'])
        
    for i in data.index:
        date = datetime.date(datetime.fromisoformat(str(data.loc[i,'date'])))
        df.loc[i, 'date'] = date
        df.loc[i, 'month'] = int(date.month)

    new_data = pd.merge(left=df, right=data.drop(columns=['date']), left_index=True, right_index=True)
        # calculate monthly maximum and minimum averages and standard deviation to 
        # calculate maximum and minimum extreme temperatures (avg +- 2.5 std)
    
    month_temp = pd.DataFrame(columns=['month', 'max_extreme', 'min_extreme'])
    
    for month in range (1,13):
            # heat wave
        max_mean = new_data[new_data['month'] == month]['temp_max'].mean()
        max_std = new_data[new_data['month'] == month]['temp_max'].std()
        max_extreme = max_mean + 2.5*max_std
            
            # cold wave
        min_mean = new_data[new_data['month'] == month]['temp_min'].mean()
        min_std = new_data[new_data['month'] == month]['temp_min'].std()
        min_extreme = min_mean - 2.5*min_std
    
        month_temp.loc[month] = {
            'month': month,
            'max_extreme': max_extreme,
            'min_extreme': min_extreme,
            'latitude': lat,
            'longitude': long
        }

    #return month_temp
        # export the monthly information
    month_temp.to_csv(f"../data/temp_events/monthly_summary_lat={lat}_long={long}.csv")
        
        # determine if day is above max extreme or below min extreme
    df2 = pd.DataFrame(columns = ['is_above_max_extreme', 'is_below_min_extreme'])
        
    
    for i in data.index:
        is_extreme_heat = False
        is_extreme_cold = False
        month = new_data.loc[i, 'date'].month

        if new_data.loc[i, 'temp_max'] >= month_temp.loc[month, 'max_extreme']:
            is_extreme_heat = True
        if new_data.loc[i, 'temp_min'] <= month_temp.loc[month, 'min_extreme']:
            is_extreme_cold = True

        df2.loc[i] = {
            'is_above_max_extreme': is_extreme_heat,
            'is_below_min_extreme': is_extreme_cold
        }

    extreme_data = pd.merge(left= new_data, right= df2, left_index=True, right_index=True)

        # temp waves
    temp_waves = pd.DataFrame(columns = ['is_heat_wave', 'is_cold_wave'])
        
    for i in extreme_data.index:
        is_heat_wave = False
        is_cold_wave = False

        if extreme_data.loc[i, 'is_above_max_extreme'] == True and (
            extreme_data.loc[i-1, 'is_above_max_extreme'] == True 
            or extreme_data.loc[i+1, 'is_above_max_extreme'] == True
        ):
            is_heat_wave = True

        if extreme_data.loc[i, 'is_below_min_extreme'] == True and (
            extreme_data.loc[i-1, 'is_below_min_extreme'] == True
            or extreme_data.loc[i+1, 'is_below_min_extreme'] == True
        ):
            is_cold_wave = True
    
        temp_waves.loc[i] = {'is_heat_wave': is_heat_wave, 'is_cold_wave': is_cold_wave}
    
        # merge into final data frame
    final_data = pd.merge(
        left = extreme_data[[
            'date', 'temp_max', 'temp_min', 'latitude', 'longitude', 
            'is_above_max_extreme', 'is_below_min_extreme'
        ]], 
        right= temp_waves, 
        left_index= True, 
        right_index= True
    )

    return final_data

#    except:
#        status = {'latitude': lat, 'longitude': long, 'status': 'error occurred'}

#        return status

In [80]:
find_extreme_temp_events(data_test)

Unnamed: 0,date,temp_max,temp_min,latitude,longitude,is_above_max_extreme,is_below_min_extreme,is_heat_wave,is_cold_wave
0,2020-01-01,-2.45350,-7.26627,-80.0,-170.0,False,False,False,False
1,2020-01-02,-5.41675,-10.22324,-80.0,-170.0,False,False,False,False
2,2020-01-03,-6.14060,-7.45206,-80.0,-170.0,False,False,False,False
3,2020-01-04,-6.23050,-9.52927,-80.0,-170.0,False,False,False,False
4,2020-01-05,-7.03266,-9.70997,-80.0,-170.0,False,False,False,False
...,...,...,...,...,...,...,...,...,...
2022,2025-07-15,-35.29168,-42.88892,-80.0,-170.0,False,False,False,False
2023,2025-07-16,-35.17032,-49.37826,-80.0,-170.0,False,False,False,False
2024,2025-07-17,-46.81170,-51.75855,-80.0,-170.0,False,False,False,False
2025,2025-07-18,-48.85988,-52.49424,-80.0,-170.0,False,False,False,False


In [16]:
status_notes = []

for file in file_list:
    data = pd.read_csv(data_directory + file).drop(columns=['Unnamed: 0'])
    
    status_entry = {}
    lat = int(data.loc[0,'latitude'])
    long = int(data.loc[0,'longitude'])

    try: 
        final_data = find_extreme_temp_events(data)

        # record final dataframe
        file_path = f"../data/temp_events/temp_waves_lat={lat}_long={long}.csv"
        final_data[(final_data['is_heat_wave'] == True) | (final_data['is_cold_wave']==True)].to_csv(file_path)

    except:
        status_entry = {'latitude': lat, 'longitude': long, 'status': 'error occurred', 'file': file}
        print(status_entry)
        
        status_notes.append(status_entry)

{'latitude': -79, 'longitude': 40, 'status': 'error occurred', 'file': 'temp_lat=+10_long=+10.csv'}
{'latitude': 80, 'longitude': -90, 'status': 'error occurred', 'file': 'temp_lat=80_long=-90.csv'}
{'latitude': 60, 'longitude': 149, 'status': 'error occurred', 'file': 'temp_lat=60_long=150.csv'}
{'latitude': 60, 'longitude': 139, 'status': 'error occurred', 'file': 'temp_lat=60_long=140.csv'}
{'latitude': 51, 'longitude': 0, 'status': 'error occurred', 'file': 'temp_lat=51_long=0.csv'}
{'latitude': 50, 'longitude': 99, 'status': 'error occurred', 'file': 'temp_lat=50_long=+100.csv'}
{'latitude': -79, 'longitude': 40, 'status': 'error occurred', 'file': 'temp_lat=-80_long=+40.csv'}
{'latitude': -40, 'longitude': 100, 'status': 'error occurred', 'file': 'temp_status_notes.csv'}
{'latitude': 80, 'longitude': -90, 'status': 'error occurred', 'file': 'temp_lat=80.0_long=-90.0.csv'}
{'latitude': 51, 'longitude': 0, 'status': 'error occurred', 'file': 'temp_lat=52.0_long=-0.0.csv'}


In [17]:
status_notes

[{'latitude': -79,
  'longitude': 40,
  'status': 'error occurred',
  'file': 'temp_lat=+10_long=+10.csv'},
 {'latitude': 80,
  'longitude': -90,
  'status': 'error occurred',
  'file': 'temp_lat=80_long=-90.csv'},
 {'latitude': 60,
  'longitude': 149,
  'status': 'error occurred',
  'file': 'temp_lat=60_long=150.csv'},
 {'latitude': 60,
  'longitude': 139,
  'status': 'error occurred',
  'file': 'temp_lat=60_long=140.csv'},
 {'latitude': 51,
  'longitude': 0,
  'status': 'error occurred',
  'file': 'temp_lat=51_long=0.csv'},
 {'latitude': 50,
  'longitude': 99,
  'status': 'error occurred',
  'file': 'temp_lat=50_long=+100.csv'},
 {'latitude': -79,
  'longitude': 40,
  'status': 'error occurred',
  'file': 'temp_lat=-80_long=+40.csv'},
 {'latitude': -40,
  'longitude': 100,
  'status': 'error occurred',
  'file': 'temp_status_notes.csv'},
 {'latitude': 80,
  'longitude': -90,
  'status': 'error occurred',
  'file': 'temp_lat=80.0_long=-90.0.csv'},
 {'latitude': 51,
  'longitude': 0,
 