In [1]:
import pandas as pd
import re

In [2]:
data = pd.read_csv('events-US-1980-2024.csv', skiprows=1)

In [3]:
data

Unnamed: 0,Name,Disaster,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths
0,Southern Severe Storms and Flooding (April 1980),Flooding,19800410,19800417,2707.0,7
1,Hurricane Allen (August 1980),Tropical Cyclone,19800807,19800811,2200.8,13
2,Central/Eastern Drought/Heat Wave (Summer-Fall...,Drought,19800601,19801130,39979.8,1260
3,Florida Freeze (January 1981),Freeze,19810112,19810114,2042.0,0
4,"Severe Storms, Flash Floods, Hail, Tornadoes (...",Severe Storm,19810505,19810510,1389.0,20
...,...,...,...,...,...,...
373,Southern Hail Storms (September 2023),Severe Storm,20230923,20230924,1677.9,0
374,Southern/Midwestern Drought and Heatwave (Spri...,Drought,20230401,20230930,14645.2,247
375,East Coast Storm and Flooding (December 2023),Flooding,20231216,20231218,1289.0,5
376,Southern Tornado Outbreak and East Coast Storm...,Severe Storm,20240108,20240110,1764.7,3


In [4]:
def extract_month_year(name):
    season_to_month = {
        'Winter': 'January', 'Spring': 'April', 'Summer': 'July', 'Fall': 'October',
        'Winter-Spring': 'March', 'Spring-Summer': 'June', 'Summer-Fall': 'August', 
        'Fall-Winter': 'November', 'Spring-Fall':'July', 'January-March' : 'February',
        'Mid-January' : 'January', 'April-May' : 'April', 'May-June' : ' May', 
        'June-July' : 'June'
        
    }
    # Regular expression to find month and year or season and year
    pattern = r"\((\w+(?:-\w+)?)\s(\d{4})\)"
    match = re.search(pattern, name)
    if match:
        season_year, year = match.group(1), match.group(2)
        # Check if the entry is a season or specific month
        if season_year in season_to_month:
            return season_to_month[season_year], year
        else:
            return season_year, year
    return None, None

In [5]:
data['Month'], data['Year'] = zip(*data['Name'].apply(extract_month_year))

In [6]:
data['Begin Date'] = pd.to_datetime(data['Begin Date'], format='%Y%m%d')
data['End Date'] = pd.to_datetime(data['End Date'], format='%Y%m%d')

In [7]:
data['Date'] = data['Begin Date'] + (data['End Date'] - data['Begin Date']) / 2

In [8]:
data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')

In [9]:
data['Total CPI-Adjusted Cost (Billions of Dollars)'] = data['Total CPI-Adjusted Cost (Millions of Dollars)'] / 1000

In [10]:
data['Name'] = data['Name'].str.replace(r'\s*\([^)]*\)', '', regex=True)

In [11]:
data

Unnamed: 0,Name,Disaster,Begin Date,End Date,Total CPI-Adjusted Cost (Millions of Dollars),Deaths,Month,Year,Date,Total CPI-Adjusted Cost (Billions of Dollars)
0,Southern Severe Storms and Flooding,Flooding,1980-04-10,1980-04-17,2707.0,7,April,1980,1980-04-13,2.7070
1,Hurricane Allen,Tropical Cyclone,1980-08-07,1980-08-11,2200.8,13,August,1980,1980-08-09,2.2008
2,Central/Eastern Drought/Heat Wave,Drought,1980-06-01,1980-11-30,39979.8,1260,August,1980,1980-08-31,39.9798
3,Florida Freeze,Freeze,1981-01-12,1981-01-14,2042.0,0,January,1981,1981-01-13,2.0420
4,"Severe Storms, Flash Floods, Hail, Tornadoes",Severe Storm,1981-05-05,1981-05-10,1389.0,20,May,1981,1981-05-07,1.3890
...,...,...,...,...,...,...,...,...,...,...
373,Southern Hail Storms,Severe Storm,2023-09-23,2023-09-24,1677.9,0,September,2023,2023-09-23,1.6779
374,Southern/Midwestern Drought and Heatwave,Drought,2023-04-01,2023-09-30,14645.2,247,July,2023,2023-07-01,14.6452
375,East Coast Storm and Flooding,Flooding,2023-12-16,2023-12-18,1289.0,5,December,2023,2023-12-17,1.2890
376,Southern Tornado Outbreak and East Coast Storm,Severe Storm,2024-01-08,2024-01-10,1764.7,3,January,2024,2024-01-09,1.7647


In [14]:
column_values = data['Total CPI-Adjusted Cost (Billions of Dollars)'].values

print(column_values)

[  2.707    2.2008  39.9798   2.042    1.389    2.178    1.5794   1.5549
   4.8693   4.7537   9.3      9.396    1.232    6.3592   1.8215   1.3341
   2.4604   3.528    1.728    3.7236   2.457    4.2656   3.99     1.5046
   5.1044   1.128   53.6103   1.3842   1.4625  22.3198   7.6882   5.12
   1.6732   2.4166   1.9517   1.6874   8.296    1.4835   3.4116   6.9497
   7.458    1.7937   2.1188   1.6531  59.4      6.82     1.439    5.4379
  11.9936   1.3846  45.6948   2.7424   2.935    2.2047   6.3545   2.0786
   2.0869   2.08     1.5225   5.15     1.173   11.1924   1.7255   4.263
   1.9979   9.4718   6.036    2.0267   3.6009   9.8638   1.3318   5.8832
   1.8966   7.1325   2.6914   1.9289   3.1157   2.1742   1.6986   1.33
   1.862    6.7739  11.3707   1.7806   4.8      1.9518   1.6669   3.7623
   4.723   12.0083   1.299    1.2489   1.9459   1.602    9.1947   5.4161
   1.1744  14.8283   3.6223   1.8997   1.9665   1.2312   2.2621  15.7616
   3.3905   6.9771   1.4502   1.704    9.2132   8.5804  

In [15]:
unique_disasters = data['Disaster'].unique()
print(unique_disasters)

['Flooding' 'Tropical Cyclone' 'Drought' 'Freeze' 'Severe Storm'
 'Winter Storm' 'Wildfire']


In [12]:
data.to_csv('events-us.csv', index=False)