In [6]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress
import hvplot.pandas
from helpers import get_dataset_range

In [7]:
# NOAA classifies 2015-16 and 2010-11 as strong or very strong events
# of el nino and la nina, respectively
nino_data = get_dataset_range(2015, 2016)
nina_data = get_dataset_range(2010, 2011)

# all non-strong classified data
interval1_data = get_dataset_range(2008, 2009)
interval2_data = get_dataset_range(2012, 2014)
interval3_data = get_dataset_range(2017, 2022)

In [11]:
# nino_data[2015].info()
# nino_data[2015]['EVENT_TYPE'].value_counts()

In [23]:
# split relevant states into climate regions as defined:
# https://www.ncei.noaa.gov/access/monitoring/reference-maps/us-climate-regions
regions = {
    'northwest': ['Washington', 'Oregon', 'Idaho'],
    'west': ['California', 'Nevada'],
    'southwest': ['Utah', 'New Mexico', 'Wyoming', 'Arizona'],
    'south': ['Texas', 'Oklahoma', 'Louisiana', 'Arkansas', 'Mississippi'],
    'southeast': ['Alabama', 'Georgia', 'Florida', 'North Carolina', 'South Carolina', 'Virginia'],
    'midwest': ['Minnesota', 'Wisconsin', 'Michigan', 'Iowa'],
    'ohio': ['Ohio', 'Missouri', 'Tennessee', 'Kentucky', 'West Virginia', 'Indiana', 'Illinois']
    # exclude northeast
}
# too lazy to caps manually
for region in regions.keys():
    regions[region] = [r.upper() for r in regions[region]]

def count_events(dataset_list):
    # to count events in each region
    region_counts = { 'northwest': 0, 'west': 0, 'southwest': 0, 'south': 0, 
                    'southeast': 0, 'midwest': 0, 'ohio': 0 }
    for set in dataset_list.values():
        for region in regions.keys():
            # relevant = set[set['STATE'] in region]
            relevant = set[np.isin(set['STATE'], regions[region])]
            region_counts[region] += len(relevant)
    return region_counts
        

In [25]:
nino_counts = count_events(nino_data)
print(nino_counts)
nina_counts = count_events(nina_data)
print(nina_counts)

{'northwest': 2209, 'west': 2914, 'southwest': 5685, 'south': 18073, 'southeast': 16543, 'midwest': 10732, 'ohio': 21179}
{'northwest': 1935, 'west': 2108, 'southwest': 5689, 'south': 22196, 'southeast': 22050, 'midwest': 13834, 'ohio': 28981}


In [28]:
interval1_counts = count_events(interval1_data)
interval2_counts = count_events(interval2_data)
interval3_counts = count_events(interval3_data)
interval_num_yrs = 2+3+6
interval_counts = { 'northwest': 0, 'west': 0, 'southwest': 0, 'south': 0, 
                    'southeast': 0, 'midwest': 0, 'ohio': 0 }
for region in interval_counts.keys():
    year_weight = 2 / interval_num_yrs
    interval_counts[region] = round((interval1_counts[region] + interval2_counts[region] + interval3_counts[region]) * year_weight, 2)

print(interval_counts)

{'northwest': 1975.64, 'west': 3592.0, 'southwest': 5738.73, 'south': 15367.27, 'southeast': 13256.18, 'midwest': 10032.0, 'ohio': 17278.91}
