In [2]:
import pandas as pd
import os

# Define the directory path
folder_path = "data sets time lines"  # Inside "dean 690"

# List all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

# List to store dataframes
df_list = []

# Read each CSV file
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    try:
        df = pd.read_csv(file_path, on_bad_lines="skip")  # Skip problematic rows
        df_list.append(df)
        print(f" Successfully read: {file}")
    except Exception as e:
        print(f" Error reading {file}: {e}")

# Concatenate all valid DataFrames
if df_list:
    merged_df = pd.concat(df_list, ignore_index=True)
    merged_df.to_csv("merged_output.csv", index=False)  # Save merged file
    print("Merging complete. Output saved as 'merged_output.csv'.")
    print("Merged CSV shape:", merged_df.shape)
else:
    print("No valid CSV files found or all had errors.")


 Successfully read: storm_data_coastal_flood.csv
 Successfully read: storm_data_drought.csv
 Successfully read: storm_data_extreme_cold_wind_chill.csv
 Successfully read: storm_data_heavy rain.csv
 Successfully read: storm_data_heavy snow.csv
 Successfully read: storm_data_hurricane.csv
 Successfully read: storm_data_ice_strom.csv
 Successfully read: storm_data_lakeshoreflood.csv
 Successfully read: storm_data_strong_wind.csv
 Successfully read: storm_data_thunderstrom_wind.csv
 Successfully read: storm_data_toronado.csv
 Successfully read: storm_data_tropical strom.csv
 Successfully read: storm_data_tsunami.csv
 Successfully read: storm_data_wildfire.csv
Merging complete. Output saved as 'merged_output.csv'.
Merged CSV shape: (6438, 39)


In [3]:
merged_df

Unnamed: 0,EVENT_ID,CZ_NAME_STR,BEGIN_LOCATION,BEGIN_DATE,BEGIN_TIME,EVENT_TYPE,MAGNITUDE,TOR_F_SCALE,DEATHS_DIRECT,INJURIES_DIRECT,...,END_LOCATION,END_DATE,END_TIME,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EVENT_NARRATIVE,EPISODE_NARRATIVE,ABSOLUTE_ROWNUMBER
0,5482274,EASTERN OCEAN (ZONE),,12/16/2005,600,Coastal Flood,,,0,0,...,,12/16/2005,900,,,,,,The combination of spring tides associated wit...,1
1,5482273,EASTERN MONMOUTH (ZONE),,12/16/2005,600,Coastal Flood,,,0,0,...,,12/16/2005,900,,,,,,The combination of spring tides associated wit...,2
2,5483800,X SE LOS ANGELES (ZONE),,12/21/2005,800,Coastal Flood,,,0,1,...,,12/21/2005,1500,,,,,,A very large long-period westerly swell combin...,3
3,5483799,X S VENTURA (ZONE),,12/21/2005,900,Coastal Flood,,,0,0,...,,12/21/2005,1500,,,,,,A very large long-period westerly swell combin...,4
4,5482540,SAN DIEGO COUNTY COASTS (ZONE),,12/21/2005,1000,Coastal Flood,,,0,0,...,,12/22/2005,1000,,,,,,A powerful storm in the East Pacific generated...,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6433,5516783,KERN CTY MTNS (ZONE),,07/20/2006,1800,Wildfire,,,0,0,...,,07/20/2006,1845,,,,,,A small fire around Keyesville near Lake Isabe...,496
6434,5517624,TULARE CTY FOOTHILLS (ZONE),,07/20/2006,1900,Wildfire,,,0,0,...,,07/23/2006,800,,,,,The Coyote Fire 9 miles east of Porterville in...,,497
6435,5517628,S SIERRA MTNS (ZONE),,07/21/2006,1500,Wildfire,,,0,0,...,,07/29/2006,2359,,,,,The Frog Complex Fire in Yosemite National Par...,,498
6436,5525633,S SIERRA MTNS (ZONE),,07/21/2006,1538,Wildfire,,,0,0,...,,07/31/2006,2359,,,,,The Burnt Fire was lightning initiated late in...,,499


In [6]:
merged_df.columns

Index(['EVENT_ID', 'CZ_NAME_STR', 'BEGIN_LOCATION', 'BEGIN_DATE', 'BEGIN_TIME',
       'EVENT_TYPE', 'MAGNITUDE', 'TOR_F_SCALE', 'DEATHS_DIRECT',
       'INJURIES_DIRECT', 'DAMAGE_PROPERTY_NUM', 'DAMAGE_CROPS_NUM',
       'STATE_ABBR', 'CZ_TIMEZONE', 'MAGNITUDE_TYPE', 'EPISODE_ID', 'CZ_TYPE',
       'CZ_FIPS', 'WFO', 'INJURIES_INDIRECT', 'DEATHS_INDIRECT', 'SOURCE',
       'FLOOD_CAUSE', 'TOR_LENGTH', 'TOR_WIDTH', 'BEGIN_RANGE',
       'BEGIN_AZIMUTH', 'END_RANGE', 'END_AZIMUTH', 'END_LOCATION', 'END_DATE',
       'END_TIME', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EVENT_NARRATIVE', 'EPISODE_NARRATIVE', 'ABSOLUTE_ROWNUMBER'],
      dtype='object')

In [8]:

# Define the columns to check
damage_cols = [ 
    "DEATHS_DIRECT", "INJURIES_DIRECT", "DAMAGE_PROPERTY_NUM", 
    "DAMAGE_CROPS_NUM", "INJURIES_INDIRECT", "DEATHS_INDIRECT"
]

# Drop rows where *all* these columns are zero
filtered_df = merged_df.loc[~(merged_df[damage_cols] == 0).all(axis=1)]

# Save the cleaned dataset
filtered_df.to_csv("cleaned_data.csv", index=False)

print(f"Removed {len(merged_df) - len(filtered_df)} rows with all zeros in damage-related columns.")
print(f"New dataset shape: {filtered_df.shape}")


Removed 2793 rows with all zeros in damage-related columns.
New dataset shape: (3645, 39)


In [460]:
event_counts = filtered_df.groupby('EVENT_TYPE').size().reset_index(name='Count')

# Sort by 'Count' column in descending order
event_counts = event_counts.sort_values(by="Count", ascending=False)
event_counts

Unnamed: 0,EVENT_TYPE,Count
20,Thunderstorm Wind,496
19,Strong Wind,487
0,0.0,486
13,Drought,438
14,Extreme Cold/Wind Chill,229
21,Tornado,228
22,Tropical Storm,164
12,Coastal Flood,157
24,Wildfire,151
18,Lakeshore Flood,140


In [10]:
# Check for missing values first
print(filtered_df.isnull().sum())

# Function to convert the time format
def convert_time_format(time):
    # Ensure the time is treated as a string and strip any spaces
    time_str = str(time).strip()
    
    # If the time is empty, '0', or malformed like '0:', replace with '00:00'
    if len(time_str) == 0 or time_str == '0' or time_str == '0:':
        return '00:00'
    
    # If the length of the time string is 3 (for times like '200'), add a leading zero
    if len(time_str) == 3:
        time_str = '0' + time_str
        
    # Format the string as HH:MM
    formatted_time = time_str[:2] + ':' + time_str[2:]
    
    return formatted_time

# Apply the function to 'BEGIN_TIME' and 'END_TIME'
filtered_df['BEGIN_TIME'] = filtered_df['BEGIN_TIME'].apply(convert_time_format)
filtered_df['END_TIME'] = filtered_df['END_TIME'].apply(convert_time_format)

# Now convert the formatted time strings into datetime objects
filtered_df['BEGIN_TIME'] = pd.to_datetime(filtered_df['BEGIN_TIME'], format='%H:%M', errors='coerce')
filtered_df['END_TIME'] = pd.to_datetime(filtered_df['END_TIME'], format='%H:%M', errors='coerce')

# Check for missing values again to see if any NaT (Not a Time) values are introduced
print(filtered_df.isnull().sum())

# Display the result
print(filtered_df[['BEGIN_TIME', 'END_TIME']].head())

# Save to CSV if needed
filtered_df.to_csv("formatted_times.csv", index=False)


EVENT_ID                  1
CZ_NAME_STR             318
BEGIN_LOCATION          336
BEGIN_DATE              347
BEGIN_TIME              348
EVENT_TYPE              364
MAGNITUDE               372
TOR_F_SCALE             373
DEATHS_DIRECT           372
INJURIES_DIRECT         373
DAMAGE_PROPERTY_NUM     373
DAMAGE_CROPS_NUM        373
STATE_ABBR              373
CZ_TIMEZONE             373
MAGNITUDE_TYPE          373
EPISODE_ID              373
CZ_TYPE                 389
CZ_FIPS                 389
WFO                     389
INJURIES_INDIRECT       389
DEATHS_INDIRECT         389
SOURCE                  389
FLOOD_CAUSE             389
TOR_LENGTH              389
TOR_WIDTH               389
BEGIN_RANGE             389
BEGIN_AZIMUTH           389
END_RANGE               389
END_AZIMUTH             389
END_LOCATION            389
END_DATE                389
END_TIME                389
BEGIN_LAT               874
BEGIN_LON               404
END_LAT                 389
END_LON             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['BEGIN_TIME'] = filtered_df['BEGIN_TIME'].apply(convert_time_format)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['END_TIME'] = filtered_df['END_TIME'].apply(convert_time_format)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['BEGIN_TIME'] = pd.to_datetime(filt

In [12]:
filtered_df.isnull().sum()

EVENT_ID                  1
CZ_NAME_STR             318
BEGIN_LOCATION          336
BEGIN_DATE              347
BEGIN_TIME              904
EVENT_TYPE              364
MAGNITUDE               372
TOR_F_SCALE             373
DEATHS_DIRECT           372
INJURIES_DIRECT         373
DAMAGE_PROPERTY_NUM     373
DAMAGE_CROPS_NUM        373
STATE_ABBR              373
CZ_TIMEZONE             373
MAGNITUDE_TYPE          373
EPISODE_ID              373
CZ_TYPE                 389
CZ_FIPS                 389
WFO                     389
INJURIES_INDIRECT       389
DEATHS_INDIRECT         389
SOURCE                  389
FLOOD_CAUSE             389
TOR_LENGTH              389
TOR_WIDTH               389
BEGIN_RANGE             389
BEGIN_AZIMUTH           389
END_RANGE               389
END_AZIMUTH             389
END_LOCATION            389
END_DATE                389
END_TIME               1641
BEGIN_LAT               874
BEGIN_LON               404
END_LAT                 389
END_LON             

In [14]:
filtered_df.head()

Unnamed: 0,EVENT_ID,CZ_NAME_STR,BEGIN_LOCATION,BEGIN_DATE,BEGIN_TIME,EVENT_TYPE,MAGNITUDE,TOR_F_SCALE,DEATHS_DIRECT,INJURIES_DIRECT,...,END_LOCATION,END_DATE,END_TIME,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EVENT_NARRATIVE,EPISODE_NARRATIVE,ABSOLUTE_ROWNUMBER
2,5483800,X SE LOS ANGELES (ZONE),,12/21/2005,1900-01-01 08:00:00,Coastal Flood,,,0,1,...,,12/21/2005,1900-01-01 15:00:00,,,,,,A very large long-period westerly swell combin...,3
4,5482540,SAN DIEGO COUNTY COASTS (ZONE),,12/21/2005,1900-01-01 10:00:00,Coastal Flood,,,0,0,...,,12/22/2005,1900-01-01 10:00:00,,,,,,A powerful storm in the East Pacific generated...,5
5,5482541,ORANGE COUNTY COASTAL PLAIN (ZONE),,12/21/2005,1900-01-01 10:00:00,Coastal Flood,,,0,0,...,,12/22/2005,1900-01-01 10:00:00,,,,,,A powerful storm in the East Pacific generated...,6
7,5482542,SAN DIEGO COUNTY COASTS (ZONE),,12/29/2005,1900-01-01 07:05:00,Coastal Flood,,,0,0,...,,12/29/2005,1900-01-01 07:05:00,,,,,Two street ends at Imperial Beach were briefly...,,8
8,5482544,X W SAN DIEGO (ZONE),,12/30/2005,1900-01-01 07:55:00,Coastal Flood,,,0,0,...,,12/30/2005,1900-01-01 07:55:00,,,,,,Waves over 8 ft high continued to cause minor ...,9


In [16]:
filtered_df.groupby('END_TIME').size().reset_index(name='begin_date Count')

Unnamed: 0,END_TIME,begin_date Count
0,1900-01-01 00:00:00,535
1,1900-01-01 01:00:00,20
2,1900-01-01 01:30:00,1
3,1900-01-01 01:42:00,2
4,1900-01-01 02:00:00,18
...,...,...
130,1900-01-01 23:15:00,1
131,1900-01-01 23:30:00,4
132,1900-01-01 23:45:00,5
133,1900-01-01 23:46:00,6


#### Data Imputation

In [19]:
filtered_df

Unnamed: 0,EVENT_ID,CZ_NAME_STR,BEGIN_LOCATION,BEGIN_DATE,BEGIN_TIME,EVENT_TYPE,MAGNITUDE,TOR_F_SCALE,DEATHS_DIRECT,INJURIES_DIRECT,...,END_LOCATION,END_DATE,END_TIME,BEGIN_LAT,BEGIN_LON,END_LAT,END_LON,EVENT_NARRATIVE,EPISODE_NARRATIVE,ABSOLUTE_ROWNUMBER
2,5483800,X SE LOS ANGELES (ZONE),,12/21/2005,1900-01-01 08:00:00,Coastal Flood,,,0,1,...,,12/21/2005,1900-01-01 15:00:00,,,,,,A very large long-period westerly swell combin...,3
4,5482540,SAN DIEGO COUNTY COASTS (ZONE),,12/21/2005,1900-01-01 10:00:00,Coastal Flood,,,0,0,...,,12/22/2005,1900-01-01 10:00:00,,,,,,A powerful storm in the East Pacific generated...,5
5,5482541,ORANGE COUNTY COASTAL PLAIN (ZONE),,12/21/2005,1900-01-01 10:00:00,Coastal Flood,,,0,0,...,,12/22/2005,1900-01-01 10:00:00,,,,,,A powerful storm in the East Pacific generated...,6
7,5482542,SAN DIEGO COUNTY COASTS (ZONE),,12/29/2005,1900-01-01 07:05:00,Coastal Flood,,,0,0,...,,12/29/2005,1900-01-01 07:05:00,,,,,Two street ends at Imperial Beach were briefly...,,8
8,5482544,X W SAN DIEGO (ZONE),,12/30/2005,1900-01-01 07:55:00,Coastal Flood,,,0,0,...,,12/30/2005,1900-01-01 07:55:00,,,,,,Waves over 8 ft high continued to cause minor ...,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6395,5523062,APPLE AND YUCCA VALLEYS (ZONE),,07/09/2006,1900-01-01 07:30:00,Wildfire,,,0,1,...,,07/31/2006,1900-01-01 18:00:00,,,,,,Lightning sparked two large wildfires in the r...,457
6396,5523061,P SW SAN BERNARDINO (ZONE),,07/09/2006,1900-01-01 07:33:00,Wildfire,,,1,17,...,,07/18/2006,1900-01-01 18:00:00,,,,,,Lightning sparked two large wildfires in the r...,458
6420,5526724,CENTRAL AND SOUTHERN VALLEY (ZONE),,07/15/2006,1900-01-01 17:00:00,Wildfire,,,0,0,...,,07/21/2006,1900-01-01 17:00:00,,,,,,"Dry thunderstorms moved through Petroleum, Gar...",483
6423,5520400,EASTERN CHERRY (ZONE),,07/16/2006,1900-01-01 15:40:00,Wildfire,,,0,0,...,,07/18/2006,1900-01-01 10:00:00,,,,,,,486


In [21]:
filtered_df.isnull().sum()

EVENT_ID                  1
CZ_NAME_STR             318
BEGIN_LOCATION          336
BEGIN_DATE              347
BEGIN_TIME              904
EVENT_TYPE              364
MAGNITUDE               372
TOR_F_SCALE             373
DEATHS_DIRECT           372
INJURIES_DIRECT         373
DAMAGE_PROPERTY_NUM     373
DAMAGE_CROPS_NUM        373
STATE_ABBR              373
CZ_TIMEZONE             373
MAGNITUDE_TYPE          373
EPISODE_ID              373
CZ_TYPE                 389
CZ_FIPS                 389
WFO                     389
INJURIES_INDIRECT       389
DEATHS_INDIRECT         389
SOURCE                  389
FLOOD_CAUSE             389
TOR_LENGTH              389
TOR_WIDTH               389
BEGIN_RANGE             389
BEGIN_AZIMUTH           389
END_RANGE               389
END_AZIMUTH             389
END_LOCATION            389
END_DATE                389
END_TIME               1641
BEGIN_LAT               874
BEGIN_LON               404
END_LAT                 389
END_LON             

In [23]:
filtered_df.groupby(['END_TIME']).size().reset_index(name='Count').sort_values(by="Count", ascending=False)

Unnamed: 0,END_TIME,Count
0,1900-01-01 00:00:00,535
104,1900-01-01 18:00:00,99
36,1900-01-01 10:00:00,99
46,1900-01-01 11:00:00,95
82,1900-01-01 15:00:00,84
...,...,...
58,1900-01-01 12:39:00,1
57,1900-01-01 12:37:00,1
55,1900-01-01 12:25:00,1
53,1900-01-01 11:59:00,1


In [473]:
filtered_df['EVENT_TYPE'].value_counts()

EVENT_TYPE
Thunderstorm Wind                                                                                                                                                                                                                                                                                                                                    496
Strong Wind                                                                                                                                                                                                                                                                                                                                          487
0.0                                                                                                                                                                                                                                                                                                        

In [25]:
event_counts = filtered_df['EVENT_TYPE'].value_counts()
filtered_df = filtered_df[filtered_df['EVENT_TYPE'].isin(event_counts[event_counts > 17].index)]


In [27]:
filtered_df_di = filtered_df[filtered_df['CZ_FIPS'].astype(str).str.isdigit()]
filtered_df_di = filtered_df_di[filtered_df_di['STATE_ABBR'].apply(lambda x: isinstance(x, str) and len(x) <= 2 and x.strip() != '')]
state_fips_mapping = {
    'AL': '01', 'AK': '02', 'AZ': '04', 'AR': '05', 'CA': '06', 'CO': '08',
    'CT': '09', 'DE': '10', 'FL': '12', 'GA': '13', 'HI': '15', 'ID': '16',
    'IL': '17', 'IN': '18', 'IA': '19', 'KS': '20', 'KY': '21', 'LA': '22',
    'ME': '23', 'MD': '24', 'MA': '25', 'MI': '26', 'MN': '27', 'MS': '28',
    'MO': '29', 'MT': '30', 'NE': '31', 'NV': '32', 'NH': '33', 'NJ': '34',
    'NM': '35', 'NY': '36', 'NC': '37', 'ND': '38', 'OH': '39', 'OK': '40',
    'OR': '41', 'PA': '42', 'RI': '44', 'SC': '45', 'SD': '46', 'TN': '47',
    'TX': '48', 'UT': '49', 'VT': '50', 'VA': '51', 'WA': '53', 'WV': '54',
    'WI': '55', 'WY': '56'
}
filtered_df_di['STATE_FIPS'] = filtered_df_di['STATE_ABBR'].map(state_fips_mapping)

filtered_df_di['CZ_FIPS'] = filtered_df_di['CZ_FIPS'].astype(str)
filtered_df_di['STATE_FIPS'] = filtered_df_di['STATE_FIPS'].astype(str)
filtered_df_di=filtered_df_di.rename(columns={'CZ_FIPS':'fipsCountyCode','STATE_FIPS':'fipsStateCode','EVENT_TYPE':'incidentType'})


In [29]:
#filtered_df_di = filtered_df_di.dropna()
filtered_df_di = filtered_df_di.dropna(subset=['fipsStateCode','fipsCountyCode'])


In [31]:
filtered_df_di = filtered_df_di[filtered_df_di['STATE_ABBR'] != 'nan']


In [33]:
filtered_df_di.isnull().sum()

EVENT_ID                 0
CZ_NAME_STR              0
BEGIN_LOCATION           0
BEGIN_DATE               0
BEGIN_TIME               5
incidentType             0
MAGNITUDE                0
TOR_F_SCALE              0
DEATHS_DIRECT            0
INJURIES_DIRECT          0
DAMAGE_PROPERTY_NUM      0
DAMAGE_CROPS_NUM         0
STATE_ABBR               0
CZ_TIMEZONE              0
MAGNITUDE_TYPE           0
EPISODE_ID               0
CZ_TYPE                  0
fipsCountyCode           0
WFO                      0
INJURIES_INDIRECT        0
DEATHS_INDIRECT          0
SOURCE                   0
FLOOD_CAUSE              0
TOR_LENGTH               0
TOR_WIDTH                0
BEGIN_RANGE              0
BEGIN_AZIMUTH            0
END_RANGE                0
END_AZIMUTH              0
END_LOCATION             0
END_DATE                 0
END_TIME                11
BEGIN_LAT                0
BEGIN_LON                0
END_LAT                  0
END_LON                  0
EVENT_NARRATIVE        645
E

In [35]:
event_type_mapping = {
    'Ice Storm': 'Severe Ice Storm',
    'Tropical Storm':'Severe Storm',
    'Coastal Flood': 'Coastal Storm',
    'Lakeshore Flood':'Flood',
    'Strong Wind':'Hurricane',
     'Wildfire':'Fire',
    'Heavy Rain':'Flood',
    'Extreme Cold/Wind Chill':'Severe Storm'
    
    # Add other event type mappings here
}

In [37]:
Hurricane           8156
Severe Storm        4763
Flood               3425
Severe Ice Storm    2461
Fire                1830
Snowstorm            495
Coastal Storm        407
Tornado              327
Mud/Landslide         54
Earthquake            44

SyntaxError: invalid syntax (3811015962.py, line 1)

In [39]:
filtered_df_di['incidentType'].value_counts()

incidentType
Strong Wind                487
Extreme Cold/Wind Chill    229
Tropical Storm             164
Coastal Flood              157
Wildfire                   151
Lakeshore Flood            140
Ice Storm                  131
Heavy Rain                  38
Tsunami                     18
Name: count, dtype: int64

In [73]:
filtered_df_di['incidentType'] = filtered_df_di['incidentType'].replace(event_type_mapping)
filtered_df_di['BEGIN_DATE'] = pd.to_datetime(filtered_df_di['BEGIN_DATE'])
filtered_df_di['Year'] = filtered_df_di['BEGIN_DATE'].dt.year


In [85]:
property_damages=filtered_df_di.groupby(['fipsStateCode','fipsCountyCode','incidentType','Year'])[['DAMAGE_PROPERTY_NUM']].mean().reset_index()

In [87]:
property_damages

Unnamed: 0,fipsStateCode,fipsCountyCode,incidentType,Year,DAMAGE_PROPERTY_NUM
0,01,1,Hurricane,2022,5000000.0
1,01,10,Hurricane,2007,1000000.0
2,01,10,Hurricane,2011,75000.0
3,01,11,Severe Storm,2005,1530000.0
4,01,12,Hurricane,2009,240000.0
...,...,...,...,...,...
1221,,3,Severe Storm,2004,1000000.0
1222,,6,Fire,2005,200000.0
1223,,7,Fire,2005,200000.0
1224,,8,Coastal Storm,2006,35000.0


In [89]:
property_damages = property_damages[property_damages['fipsStateCode'] != 'nan']
property_damages

Unnamed: 0,fipsStateCode,fipsCountyCode,incidentType,Year,DAMAGE_PROPERTY_NUM
0,01,1,Hurricane,2022,5000000.0
1,01,10,Hurricane,2007,1000000.0
2,01,10,Hurricane,2011,75000.0
3,01,11,Severe Storm,2005,1530000.0
4,01,12,Hurricane,2009,240000.0
...,...,...,...,...,...
1208,55,74,Flood,2020,100000.0
1209,55,8,Fire,2006,50000.0
1210,56,17,Fire,2005,500000.0
1211,56,17,Fire,2006,65000.0


In [91]:
property_damages['fipsStateCode'].unique()

array(['01', '02', '04', '05', '06', '08', '09', '10', '12', '13', '15',
       '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26',
       '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37',
       '38', '39', '40', '41', '42', '44', '45', '46', '47', '48', '50',
       '51', '53', '54', '55', '56'], dtype=object)

In [93]:
property_damages.to_csv('Property Damages.csv')

In [95]:
filtered_df.columns

Index(['EVENT_ID', 'CZ_NAME_STR', 'BEGIN_LOCATION', 'BEGIN_DATE', 'BEGIN_TIME',
       'EVENT_TYPE', 'MAGNITUDE', 'TOR_F_SCALE', 'DEATHS_DIRECT',
       'INJURIES_DIRECT', 'DAMAGE_PROPERTY_NUM', 'DAMAGE_CROPS_NUM',
       'STATE_ABBR', 'CZ_TIMEZONE', 'MAGNITUDE_TYPE', 'EPISODE_ID', 'CZ_TYPE',
       'CZ_FIPS', 'WFO', 'INJURIES_INDIRECT', 'DEATHS_INDIRECT', 'SOURCE',
       'FLOOD_CAUSE', 'TOR_LENGTH', 'TOR_WIDTH', 'BEGIN_RANGE',
       'BEGIN_AZIMUTH', 'END_RANGE', 'END_AZIMUTH', 'END_LOCATION', 'END_DATE',
       'END_TIME', 'BEGIN_LAT', 'BEGIN_LON', 'END_LAT', 'END_LON',
       'EVENT_NARRATIVE', 'EPISODE_NARRATIVE', 'ABSOLUTE_ROWNUMBER'],
      dtype='object')

In [69]:
filtered_df['STATE_ABBR'].unique

<bound method Series.unique of 2       CA
4       CA
5       CA
7       CA
8       CA
        ..
6395    CA
6396    CA
6420    MT
6423    NE
6437    CA
Name: STATE_ABBR, Length: 3242, dtype: object>

In [342]:
filtered_df.isnull().sum()


EVENT_ID                  0
CZ_NAME_STR               0
BEGIN_LOCATION            0
BEGIN_DATE                0
BEGIN_TIME              502
EVENT_TYPE                0
MAGNITUDE                 0
TOR_F_SCALE               0
DEATHS_DIRECT             0
INJURIES_DIRECT           0
DAMAGE_PROPERTY_NUM       0
DAMAGE_CROPS_NUM          0
STATE_ABBR                0
CZ_TIMEZONE               0
MAGNITUDE_TYPE            0
EPISODE_ID                0
CZ_TYPE                   0
CZ_FIPS                   0
WFO                       0
INJURIES_INDIRECT         0
DEATHS_INDIRECT           0
SOURCE                    0
FLOOD_CAUSE               0
TOR_LENGTH                0
TOR_WIDTH                 0
BEGIN_RANGE               0
BEGIN_AZIMUTH             0
END_RANGE                 0
END_AZIMUTH               0
END_LOCATION              0
END_DATE                  0
END_TIME               1252
BEGIN_LAT               471
BEGIN_LON                15
END_LAT                   0
END_LON             

In [56]:
filtered_df.to_csv('strom data set.csv')

In [400]:
property_damages

Unnamed: 0,fipsStateCode,fipsCountyCode,incidentType,DAMAGE_PROPERTY_NUM,DAMAGE_CROPS_NUM
0,01,1,Hurricane,5000000.0,0.0
1,01,10,Hurricane,537500.0,0.0
2,01,11,Severe Storm,1530000.0,0.0
3,01,12,Hurricane,240000.0,0.0
4,01,12,Severe Storm,930000.0,0.0
...,...,...,...,...,...
974,55,73,Flood,10000.0,0.0
975,55,74,Flood,33750.0,0.0
976,55,8,Fire,50000.0,0.0
977,56,17,Fire,282500.0,0.0
