# Extreme Temperature Events -- Full Analysis

In [1]:
import pandas as pd
import numpy as np
import os
# import matplotlib.pyplot as plt
# import seaborn as sns
from datetime import datetime

In [2]:
# get file list
file_list = os.listdir('../data/temp')

In [3]:
file_list.remove('.ipynb_checkpoints')

file_list.remove('.DS_Store')

In [5]:
len(file_list)

274

In [6]:
status_notes = []
data_directory = '../data/temp/'

## Functions

In [170]:
def find_cold_waves(df):
    """
    takes a pands data frame and returns the cold waves

    Parameters
    ----------
    df : DataFrame

    Returns
    -------
    cold_extreme : DataFrame 
        Pandas dataframe of all cold events with start date, start month, latitude, longitude, average max, min, changes, etc
        single row for each event
    """
    # find the cold waves: start by identifying the days when the minimum temperature is below the extreme minimum temperature
    cold_days = df[df['min'] <= df['min_extreme']]

    # calculating the change in temperature from the average minimum and the extreme minimum in both 
    cold_days.loc[:,'delta_min_mean_C'] = cold_days.loc[:,'min'] - cold_days.loc[:,'min_mean']
    cold_days.loc[:,'delta_min_mean_std'] = cold_days.loc[:,'delta_min_mean_C'] / cold_days.loc[:,'min_std']
    cold_days.loc[:,'delta_min_extreme_C'] = cold_days.loc[:,'min'] - cold_days.loc[:,'min_extreme']
    cold_days.loc[:,'delta_min_extreme_std'] = cold_days.loc[:,'delta_min_extreme_C'] / cold_days.loc[:,'min_std']

    # identify events as consecutive days below extreme minimum and label them
    cold_days.loc[:,'event_label'] = (cold_days.groupby(by=['latitude', 'longitude'])['date'].diff() != pd.Timedelta(days = 1)).cumsum()
    cold_days.loc[:,'duration'] = cold_days.groupby(by='event_label')['date'].transform('count')
    cold_days = cold_days.reset_index(drop=True)

    # average temperatures for each event
    event_means = cold_days.groupby(by='event_label')[[
        'max', 'min', 'min_mean', 'min_std', 'min_extreme', 
        'delta_min_mean_C', 'delta_min_mean_std', 
        'delta_min_extreme_C', 'delta_min_extreme_std'
    ]].mean()

    # only want a single entry (or row) per event, not one for each day
    indices_keep = []
    cold_waves = cold_days[cold_days['duration'] > 1].reset_index(drop=True)

    for event in cold_waves['event_label'].unique():
        indices_keep.append(cold_waves[cold_waves['event_label']==event].index[0])

    cold_extreme = pd.merge(
        left=cold_waves[['date', 'month', 'latitude', 'longitude', 'duration', 'event_label']], 
        right=event_means, 
        on='event_label'
    )

    cold_extreme = cold_extreme.rename(columns = {'date': 'start_date', 'month': 'start_month'}).drop(columns=['event_label'])

    return cold_extreme

In [173]:
def find_heat_waves(df):
    """
    takes a pands data frame and returns the heat waves

    Parameters
    ----------
    df : DataFrame

    Returns
    -------
    heat_extreme : DataFrame 
        Pandas dataframe of all heat events with start date, start month, latitude, longitude, average max, min, changes, etc
        single row for each event
    """
    # find the cold waves: start by identifying the days when the minimum temperature is below the extreme minimum temperature
    hot_days = df[df['max'] >= df['max_extreme']]

    # calculating the change in temperature from the average minimum and the extreme minimum in both 
    hot_days.loc[:,'delta_max_mean_C'] = hot_days.loc[:,'max'] + hot_days.loc[:,'max_mean']
    hot_days.loc[:,'delta_max_mean_std'] = hot_days.loc[:,'delta_max_mean_C'] / hot_days.loc[:,'max_std']
    hot_days.loc[:,'delta_max_extreme_C'] = hot_days.loc[:,'max'] + hot_days.loc[:,'max_extreme']
    hot_days.loc[:,'delta_max_extreme_std'] = hot_days.loc[:,'delta_max_extreme_C'] / hot_days.loc[:,'max_std']

    # identify events as consecutive days below extreme minimum and label them
    hot_days.loc[:,'event_label'] = (hot_days.groupby(by=['latitude', 'longitude'])['date'].diff() != pd.Timedelta(days = 1)).cumsum()
    hot_days.loc[:,'duration'] = hot_days.groupby(by='event_label')['date'].transform('count')
    hot_days = hot_days.reset_index(drop=True)

    # average temperatures for each event
    event_means = hot_days.groupby(by='event_label')[[
        'max', 'min', 'max_mean', 'max_std', 'max_extreme', 
        'delta_max_mean_C', 'delta_max_mean_std', 
        'delta_max_extreme_C', 'delta_max_extreme_std'
    ]].mean()

    # only want a single entry (or row) per event, not one for each day
    indices_keep = []
    heat_waves = hot_days[hot_days['duration'] > 1].reset_index(drop=True)

    for event in heat_waves['event_label'].unique():
        indices_keep.append(heat_waves[heat_waves['event_label']==event].index[0])

    heat_extreme = pd.merge(
        left=heat_waves[['date', 'month', 'latitude', 'longitude', 'duration', 'event_label']], 
        right=event_means, 
        on='event_label'
    )

    heat_extreme = heat_extreme.rename(columns = {'date': 'start_date', 'month': 'start_month'}).drop(columns=['event_label'])

    return heat_extreme

In [175]:
def find_extreme_temp_events(df):
    """
    takes a pandas dataframe and returns the extreme temperature entries

    Parameters
    ----------
    df : DataFrame
        daily entries of date, mean daily temperature, maximum daily temperature, and minimum daily temperature
        along with latitude and longitude

    Returns
    -------
    final_data : DataFrame
        entries of dates experiencing a heat or cold wave
    """

    status = {}

    lat = int(np.round(df.loc[0,'latitude']))
    long = int(np.round(df.loc[0,'longitude']))

    
    monthly_summary, data = monthly_temperatures(df)
        
    df_big = pd.merge(left=data.drop(columns=['latitude', 'longitude']), right=monthly_summary, on='month')
    
    return find_cold_waves(df_big), find_heat_waves(df_big)

## Processing Data

In [49]:
df_test.columns

MultiIndex([('temp_max', 'mean'),
            ('temp_max',  'std'),
            ('temp_min', 'mean'),
            ('temp_min',  'std')],
           )

In [53]:
df_test.columns = ['_'.join(a) for a in df_test.columns.to_flat_index()]

In [55]:
df_test.columns = ['max_mean', 'max_std', 'min_mean', 'min_std']

In [60]:
df_test['max_extreme'] = df_test['max_mean'] + 2.5*df_test['max_std']
df_test['min_extreme'] = df_test['min_mean'] + 2.5*df_test['min_std']

In [61]:
df_test

Unnamed: 0_level_0,max_mean,max_std,min_mean,min_std,max_extreme,min_extreme
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,-0.354406,4.617219,-5.68469,6.020766,11.188641,9.367224
2,-0.229309,4.807856,-6.573696,5.607346,11.790332,7.444668
3,6.975964,5.800056,-2.046706,5.204669,21.476104,10.964967
4,15.555335,4.851315,5.107265,3.931232,27.683622,14.935346
5,19.944071,4.707988,9.078702,3.951365,31.714041,18.957115
6,25.716075,4.222997,15.616066,3.043644,36.273568,23.225176
7,28.547912,3.952037,17.788191,2.800881,38.428004,24.790394
8,28.731866,3.390608,18.09695,2.902903,37.208387,25.354207
9,21.422744,4.752982,11.398288,3.676266,33.305198,20.588953
10,13.655065,4.889339,5.988719,3.780282,25.878412,15.439424


In [89]:
def monthly_temperatures(df):
    """
    finds the monthly mean, standard deviation, and extreme temperatures for a dataframe of daily temperatures

    parameters
    ----------
    df : DataFrame
        daily entries of date, mean, maximum, and minimum temperature along with latitude and longitude

    Returns
    -------
    month_temp : DataFrame
        the mean, standard deviation and extreme for minimum and maximum temperatures of each month
    df : DataFrame
        with date column in datetime and an added month column
    """

    lat = int(np.round(df.loc[0,'latitude']))
    long = int(np.round(df.loc[0,'longitude']))

    df = df.rename(columns = {'temp_max': 'max', 'temp_min': 'min'})
    df['date'] = pd.to_datetime(df['date'])
    df['month'] = df.apply(lambda row: row['date'].month, axis=1)

    month_temp = df.groupby(by='month')[['max','min']].agg(['mean', 'std'])
    month_temp['max', 'extreme'] = month_temp['max','mean'] + 2.5 * month_temp['max', 'std']
    month_temp['min', 'extreme'] = month_temp['min','mean'] - 2.5 * month_temp['min', 'std']
    month_temp.columns = ['_'.join(a) for a in month_temp.columns.to_flat_index()]
    month_temp['latitude'] = lat
    month_temp['longitude'] = long
    
    month_temp.to_csv(f"../data/monthly/monthly_summary_lat={lat}_long={long}.csv")

    return month_temp, df

In [90]:
test1 = pd.read_csv('../data/temp/'+file_list[31]).drop(columns = ['Unnamed: 0'])
mon_temp, new_df = monthly_temperatures(test1)

In [94]:
df_big = pd.merge(left=new_df.drop(columns=['latitude', 'longitude']), right=mon_temp, on='month')

In [107]:
hot_days = df_big[df_big['max'] >= df_big['max_extreme']]

In [108]:
hot_days

Unnamed: 0,date,temp_mean,max,min,month,max_mean,max_std,min_mean,min_std,max_extreme,min_extreme,latitude,longitude
57,2020-02-27 06:00:00,7.860128,12.6739,1.739,2,-0.229309,4.807856,-6.573696,5.607346,11.790332,-20.59206,50,40
1432,2023-12-03 06:00:00,4.719906,10.73507,-1.0877,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40


In [110]:
hot_days.loc[:,'delta_max_mean_C'] = hot_days.loc[:,'max'] - hot_days.loc[:,'max_mean']
hot_days.loc[:,'delta_max_mean_std'] = hot_days.loc[:,'delta_max_mean_C'] / hot_days.loc[:,'max_std']
hot_days.loc[:,'delta_max_extreme_C'] = hot_days.loc[:,'max'] - hot_days.loc[:,'max_extreme']
hot_days.loc[:,'delta_max_extreme_std'] = hot_days.loc[:,'delta_max_extreme_C'] / hot_days.loc[:,'max_std']

In [112]:
cold_days = df_big[df_big['min'] <= df_big['min_extreme']]

In [113]:
cold_days

Unnamed: 0,date,temp_mean,max,min,month,max_mean,max_std,min_mean,min_std,max_extreme,min_extreme,latitude,longitude
322,2020-11-18 06:00:00,-10.324884,-7.38645,-15.4385,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40
323,2020-11-19 06:00:00,-11.707749,-7.95862,-17.56862,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40
386,2021-01-21 06:00:00,-15.997502,-5.1038,-22.4233,1,-0.354406,4.617219,-5.68469,6.020766,11.188641,-20.736605,50,40
638,2021-09-30 06:00:00,7.130307,12.53704,1.22604,9,21.422744,4.752982,11.398288,3.676266,33.305198,2.207624,50,40
664,2021-10-26 06:00:00,0.705431,6.31637,-3.62028,10,13.655065,4.889339,5.988719,3.780282,25.878412,-3.461987,50,40
721,2021-12-22 06:00:00,-14.198993,-12.77344,-15.5751,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40
722,2021-12-23 06:00:00,-14.509271,-13.57215,-15.96195,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40
723,2021-12-24 06:00:00,-12.756594,-6.45554,-17.75724,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40
800,2022-03-11 06:00:00,-8.716831,-5.98273,-15.83674,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40
801,2022-03-12 06:00:00,-13.01696,-7.57786,-20.0096,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40


In [116]:
cold_days.loc[:,'delta_min_mean_C'] = cold_days.loc[:,'min'] - cold_days.loc[:,'min_mean']
cold_days.loc[:,'delta_min_mean_std'] = cold_days.loc[:,'delta_min_mean_C'] / cold_days.loc[:,'min_std']
cold_days.loc[:,'delta_min_extreme_C'] = cold_days.loc[:,'min'] - cold_days.loc[:,'min_extreme']
cold_days.loc[:,'delta_min_extreme_std'] = cold_days.loc[:,'delta_min_extreme_C'] / cold_days.loc[:,'min_std']

In [121]:
cold_days

Unnamed: 0,date,temp_mean,max,min,month,max_mean,max_std,min_mean,min_std,max_extreme,min_extreme,latitude,longitude,delta_min_mean_C,delta_min_mean_std,delta_min_extreme_C,delta_min_extreme_std,event_label
322,2020-11-18 06:00:00,-10.324884,-7.38645,-15.4385,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40,-15.470642,-3.142747,-3.164017,-0.642747,1
323,2020-11-19 06:00:00,-11.707749,-7.95862,-17.56862,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40,-17.600762,-3.575465,-5.294137,-1.075465,1
386,2021-01-21 06:00:00,-15.997502,-5.1038,-22.4233,1,-0.354406,4.617219,-5.68469,6.020766,11.188641,-20.736605,50,40,-16.73861,-2.780146,-1.686695,-0.280146,2
638,2021-09-30 06:00:00,7.130307,12.53704,1.22604,9,21.422744,4.752982,11.398288,3.676266,33.305198,2.207624,50,40,-10.172248,-2.767006,-0.981584,-0.267006,3
664,2021-10-26 06:00:00,0.705431,6.31637,-3.62028,10,13.655065,4.889339,5.988719,3.780282,25.878412,-3.461987,50,40,-9.608999,-2.541873,-0.158293,-0.041873,4
721,2021-12-22 06:00:00,-14.198993,-12.77344,-15.5751,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40,-11.44623,-2.595324,-0.420412,-0.095324,5
722,2021-12-23 06:00:00,-14.509271,-13.57215,-15.96195,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40,-11.83308,-2.683039,-0.807262,-0.183039,5
723,2021-12-24 06:00:00,-12.756594,-6.45554,-17.75724,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40,-13.62837,-3.090104,-2.602552,-0.590104,5
800,2022-03-11 06:00:00,-8.716831,-5.98273,-15.83674,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40,-13.790034,-2.64955,-0.778361,-0.14955,6
801,2022-03-12 06:00:00,-13.01696,-7.57786,-20.0096,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40,-17.962894,-3.451304,-4.951221,-0.951304,6


In [120]:
cold_days.loc[:,'event_label'] = (cold_days.groupby(by=['latitude', 'longitude'])['date'].diff() != pd.Timedelta(days = 1)).cumsum()

In [123]:
cold_days.loc[:,'duration'] = cold_days.groupby(by='event_label')['date'].transform('count')

In [129]:
cold_days = cold_days.reset_index(drop=True)

In [140]:
cold_days

Unnamed: 0,date,temp_mean,max,min,month,max_mean,max_std,min_mean,min_std,max_extreme,min_extreme,latitude,longitude,delta_min_mean_C,delta_min_mean_std,delta_min_extreme_C,delta_min_extreme_std,event_label,duration
0,2020-11-18 06:00:00,-10.324884,-7.38645,-15.4385,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40,-15.470642,-3.142747,-3.164017,-0.642747,1,2
1,2020-11-19 06:00:00,-11.707749,-7.95862,-17.56862,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40,-17.600762,-3.575465,-5.294137,-1.075465,1,2
2,2021-01-21 06:00:00,-15.997502,-5.1038,-22.4233,1,-0.354406,4.617219,-5.68469,6.020766,11.188641,-20.736605,50,40,-16.73861,-2.780146,-1.686695,-0.280146,2,1
3,2021-09-30 06:00:00,7.130307,12.53704,1.22604,9,21.422744,4.752982,11.398288,3.676266,33.305198,2.207624,50,40,-10.172248,-2.767006,-0.981584,-0.267006,3,1
4,2021-10-26 06:00:00,0.705431,6.31637,-3.62028,10,13.655065,4.889339,5.988719,3.780282,25.878412,-3.461987,50,40,-9.608999,-2.541873,-0.158293,-0.041873,4,1
5,2021-12-22 06:00:00,-14.198993,-12.77344,-15.5751,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40,-11.44623,-2.595324,-0.420412,-0.095324,5,3
6,2021-12-23 06:00:00,-14.509271,-13.57215,-15.96195,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40,-11.83308,-2.683039,-0.807262,-0.183039,5,3
7,2021-12-24 06:00:00,-12.756594,-6.45554,-17.75724,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40,-13.62837,-3.090104,-2.602552,-0.590104,5,3
8,2022-03-11 06:00:00,-8.716831,-5.98273,-15.83674,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40,-13.790034,-2.64955,-0.778361,-0.14955,6,4
9,2022-03-12 06:00:00,-13.01696,-7.57786,-20.0096,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40,-17.962894,-3.451304,-4.951221,-0.951304,6,4


In [164]:
event_means = cold_days.groupby(by='event_label')[['max', 'min', 'min_mean', 'min_std', 'min_extreme']].mean()

In [156]:
indices_keep = []
cold_waves = cold_days[cold_days['duration'] > 1].reset_index(drop=True)

for event in cold_waves['event_label'].unique():
    indices_keep.append(cold_waves[cold_waves['event_label']==event].index[0])

In [157]:
indices_keep

[0, 2, 5, 9, 11, 13]

In [158]:
cold_waves.iloc[indices_keep]

Unnamed: 0,date,temp_mean,max,min,month,max_mean,max_std,min_mean,min_std,max_extreme,min_extreme,latitude,longitude,delta_min_mean_C,delta_min_mean_std,delta_min_extreme_C,delta_min_extreme_std,event_label,duration
0,2020-11-18 06:00:00,-10.324884,-7.38645,-15.4385,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40,-15.470642,-3.142747,-3.164017,-0.642747,1,2
2,2021-12-22 06:00:00,-14.198993,-12.77344,-15.5751,12,-0.160229,3.880364,-4.12887,4.410327,9.540682,-15.154688,50,40,-11.44623,-2.595324,-0.420412,-0.095324,5,3
5,2022-03-11 06:00:00,-8.716831,-5.98273,-15.83674,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40,-13.790034,-2.64955,-0.778361,-0.14955,6,4
9,2023-11-22 06:00:00,-10.333315,-7.77415,-14.0627,11,5.008432,4.979768,0.032142,4.92265,17.457852,-12.274483,50,40,-14.094842,-2.863263,-1.788217,-0.363263,7,2
11,2024-01-13 06:00:00,-21.829136,-17.63154,-26.24477,1,-0.354406,4.617219,-5.68469,6.020766,11.188641,-20.736605,50,40,-20.56008,-3.414861,-5.508165,-0.914861,9,2
13,2025-03-01 06:00:00,-11.761045,-5.0087,-18.53808,3,6.975964,5.800056,-2.046706,5.204669,21.476104,-15.058379,50,40,-16.491374,-3.168573,-3.479701,-0.668573,12,2


In [167]:
extreme_cold = pd.merge(
    left=cold_waves[[
        'date', 'month', 'latitude', 'longitude', 
        'delta_min_mean_C', 'delta_min_mean_std', 'delta_min_extreme_C', 'delta_min_extreme_std', 
        'duration', 'event_label'
    ]], 
    right=event_means, 
    on='event_label'
)

In [172]:
find_cold_waves(df_big)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cold_days.loc[:,'delta_min_mean_C'] = cold_days.loc[:,'min'] - cold_days.loc[:,'min_mean']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cold_days.loc[:,'delta_min_mean_std'] = cold_days.loc[:,'delta_min_mean_C'] / cold_days.loc[:,'min_std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cold_days

Unnamed: 0,date,month,latitude,longitude,duration,event_label,max,min,min_mean,min_std,min_extreme,delta_min_mean_C,delta_min_mean_std,delta_min_extreme_C,delta_min_extreme_std
0,2020-11-18 06:00:00,11,50,40,2,1,-7.672535,-16.50356,0.032142,4.92265,-12.274483,-16.535702,-3.359106,-4.229077,-0.859106
1,2020-11-19 06:00:00,11,50,40,2,1,-7.672535,-16.50356,0.032142,4.92265,-12.274483,-16.535702,-3.359106,-4.229077,-0.859106
2,2021-12-22 06:00:00,12,50,40,3,5,-10.93371,-16.43143,-4.12887,4.410327,-15.154688,-12.30256,-2.789489,-1.276742,-0.289489
3,2021-12-23 06:00:00,12,50,40,3,5,-10.93371,-16.43143,-4.12887,4.410327,-15.154688,-12.30256,-2.789489,-1.276742,-0.289489
4,2021-12-24 06:00:00,12,50,40,3,5,-10.93371,-16.43143,-4.12887,4.410327,-15.154688,-12.30256,-2.789489,-1.276742,-0.289489
5,2022-03-11 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
6,2022-03-12 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
7,2022-03-13 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
8,2022-03-14 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
9,2023-11-22 06:00:00,11,50,40,2,7,-5.786995,-13.31306,0.032142,4.92265,-12.274483,-13.345202,-2.710979,-1.038577,-0.210979


In [169]:
extreme_cold.head()

Unnamed: 0,date,month,latitude,longitude,delta_min_mean_C,delta_min_mean_std,delta_min_extreme_C,delta_min_extreme_std,duration,event_label,max,min,min_mean,min_std,min_extreme
0,2020-11-18 06:00:00,11,50,40,-15.470642,-3.142747,-3.164017,-0.642747,2,1,-7.38645,-15.4385,0.032142,4.92265,-12.274483
1,2020-11-19 06:00:00,11,50,40,-17.600762,-3.575465,-5.294137,-1.075465,2,1,-7.38645,-15.4385,0.032142,4.92265,-12.274483
2,2021-12-22 06:00:00,12,50,40,-11.44623,-2.595324,-0.420412,-0.095324,3,5,-12.77344,-15.5751,-4.12887,4.410327,-15.154688
3,2021-12-23 06:00:00,12,50,40,-11.83308,-2.683039,-0.807262,-0.183039,3,5,-12.77344,-15.5751,-4.12887,4.410327,-15.154688
4,2021-12-24 06:00:00,12,50,40,-13.62837,-3.090104,-2.602552,-0.590104,3,5,-12.77344,-15.5751,-4.12887,4.410327,-15.154688


In [178]:
cold_waves, heat_waves = find_extreme_temp_events(data_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cold_days.loc[:,'delta_min_mean_C'] = cold_days.loc[:,'min'] - cold_days.loc[:,'min_mean']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cold_days.loc[:,'delta_min_mean_std'] = cold_days.loc[:,'delta_min_mean_C'] / cold_days.loc[:,'min_std']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cold_days

In [179]:
cold_waves

Unnamed: 0,date,month,latitude,longitude,duration,event_label,max,min,min_mean,min_std,min_extreme,delta_min_mean_C,delta_min_mean_std,delta_min_extreme_C,delta_min_extreme_std
0,2020-11-18 06:00:00,11,50,40,2,1,-7.672535,-16.50356,0.032142,4.92265,-12.274483,-16.535702,-3.359106,-4.229077,-0.859106
1,2020-11-19 06:00:00,11,50,40,2,1,-7.672535,-16.50356,0.032142,4.92265,-12.274483,-16.535702,-3.359106,-4.229077,-0.859106
2,2021-12-22 06:00:00,12,50,40,3,5,-10.93371,-16.43143,-4.12887,4.410327,-15.154688,-12.30256,-2.789489,-1.276742,-0.289489
3,2021-12-23 06:00:00,12,50,40,3,5,-10.93371,-16.43143,-4.12887,4.410327,-15.154688,-12.30256,-2.789489,-1.276742,-0.289489
4,2021-12-24 06:00:00,12,50,40,3,5,-10.93371,-16.43143,-4.12887,4.410327,-15.154688,-12.30256,-2.789489,-1.276742,-0.289489
5,2022-03-11 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
6,2022-03-12 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
7,2022-03-13 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
8,2022-03-14 06:00:00,3,50,40,4,6,-4.792442,-17.520235,-2.046706,5.204669,-15.058379,-15.473529,-2.973009,-2.461856,-0.473009
9,2023-11-22 06:00:00,11,50,40,2,7,-5.786995,-13.31306,0.032142,4.92265,-12.274483,-13.345202,-2.710979,-1.038577,-0.210979


In [180]:
heat_waves

Unnamed: 0,start_date,start_month,latitude,longitude,duration,max,min,max_mean,max_std,max_extreme,delta_max_mean_C,delta_max_mean_std,delta_max_extreme_C,delta_max_extreme_std


In [21]:
status_notes = []
data_directory = '../data/temp/'


for file in file_list:
    data = pd.read_csv(data_directory + file).drop(columns=['Unnamed: 0'])
    
    status_entry = {}
    lat = int(data.loc[0,'latitude'])
    long = int(data.loc[0,'longitude'])

    try: 
        final_data = find_extreme_temp_events(data)

        # record final dataframe
        file_path = f"../data/temp_events/temp_waves_lat={lat}_long={long}.csv"
        final_data[(final_data['is_heat_wave'] == True) | (final_data['is_cold_wave']==True)].to_csv(file_path)

    except:
        status_entry = {'latitude': lat, 'longitude': long, 'status': 'error occurred', 'file': file}
        print(status_entry)
        
        status_notes.append(status_entry)

{'latitude': -79, 'longitude': 40, 'status': 'error occurred', 'file': 'temp_lat=+10_long=+10.csv'}
{'latitude': 80, 'longitude': -90, 'status': 'error occurred', 'file': 'temp_lat=80_long=-90.csv'}
{'latitude': 60, 'longitude': 149, 'status': 'error occurred', 'file': 'temp_lat=60_long=150.csv'}
{'latitude': 60, 'longitude': 139, 'status': 'error occurred', 'file': 'temp_lat=60_long=140.csv'}
{'latitude': 51, 'longitude': 0, 'status': 'error occurred', 'file': 'temp_lat=51_long=0.csv'}
{'latitude': 50, 'longitude': 99, 'status': 'error occurred', 'file': 'temp_lat=50_long=+100.csv'}
{'latitude': -79, 'longitude': 40, 'status': 'error occurred', 'file': 'temp_lat=-80_long=+40.csv'}
{'latitude': -40, 'longitude': 100, 'status': 'error occurred', 'file': 'temp_status_notes.csv'}
{'latitude': 80, 'longitude': -90, 'status': 'error occurred', 'file': 'temp_lat=80.0_long=-90.0.csv'}
{'latitude': 51, 'longitude': 0, 'status': 'error occurred', 'file': 'temp_lat=52.0_long=-0.0.csv'}


In [23]:
len(status_notes)

10