In [15]:
import pandas as pd
import numpy as np
import json

In [1]:
# Load the data
df = pd.read_csv('data.csv')

# Group by Disaster Type and Start Year and sum the required columns
grouped_df = df.groupby(['Disaster Type', 'Start Year']).agg({
    "Total Damage, Adjusted ('000 US$)": 'sum',
    'Total Deaths': 'sum',
    'Total Affected': 'sum'
}).reset_index()

# Save the grouped data to a new CSV file
grouped_df.to_csv('data_grouped.csv', index=False)


In [2]:
# Load the CSV data
df = pd.read_csv('data_grouped.csv')

# Select relevant columns
selected_columns = ['Disaster Type', 'Start Year', "Total Damage, Adjusted ('000 US$)", 'Total Deaths', 'Total Affected']
df_selected = df[selected_columns]

# Convert to JSON
json_data = df_selected.to_json(orient='records')

# Save JSON to a file
with open('data.json', 'w') as f:
    f.write(json_data)


In [4]:
# Load the CSV data
df = pd.read_csv('data_grouped.csv')


df['Disaster Type'].unique()

array(['Air', 'Animal incident', 'Chemical spill',
       'Collapse (Industrial)', 'Collapse (Miscellaneous)', 'Drought',
       'Earthquake', 'Epidemic', 'Explosion (Industrial)',
       'Explosion (Miscellaneous)', 'Extreme temperature',
       'Fire (Industrial)', 'Fire (Miscellaneous)', 'Flood', 'Fog',
       'Gas leak', 'Glacial lake outburst flood', 'Impact',
       'Industrial accident (General)', 'Infestation',
       'Mass movement (dry)', 'Mass movement (wet)',
       'Miscellaneous accident (General)', 'Oil spill', 'Poisoning',
       'Radiation', 'Rail', 'Road', 'Storm', 'Volcanic activity', 'Water',
       'Wildfire'], dtype=object)

In [8]:
# Values to filter
filter_values = ['Earthquake', 'Extreme temperature', 'Flood', 'Wildfire', 'Storm', 'Volcanic activity']

# Filter the DataFrame
filtered_df = df[df['Disaster Type'].isin(filter_values)]

# Save the DataFrame as a CSV file
filtered_df.to_csv('filtered_disaster_types.csv', index=False)

In [9]:
# Accumulate values up to each year
accumulated_df = filtered_df.copy()
accumulated_df.sort_values(by=['Disaster Type', 'Start Year'], inplace=True)
accumulated_df['Total Damage, Adjusted (\'000 US$)'] = accumulated_df.groupby('Disaster Type')["Total Damage, Adjusted ('000 US$)"].cumsum()
accumulated_df['Total Deaths'] = accumulated_df.groupby('Disaster Type')['Total Deaths'].cumsum()
accumulated_df['Total Affected'] = accumulated_df.groupby('Disaster Type')['Total Affected'].cumsum()

# Save the accumulated DataFrame as a CSV file
accumulated_df.to_csv('accumulated_disaster_data.csv', index=False)

In [21]:
# Add missing years with the same values as the previous year
def add_missing_years(group):
    min_year = group['Start Year'].min()
    max_year = group['Start Year'].max()
    all_years = pd.DataFrame({'Start Year': np.arange(min_year, max_year + 1)})
    group = all_years.merge(group, on='Start Year', how='left')
    group.fillna(method='ffill', inplace=True)
    return group

# Apply the function to each disaster type group
filled_df = accumulated_df.groupby('Disaster Type').apply(add_missing_years).reset_index(drop=True)

# Save the filled DataFrame as a CSV file
filled_df.to_csv('filled_disaster_data.csv', index=False)

  group.fillna(method='ffill', inplace=True)
  group.fillna(method='ffill', inplace=True)
  group.fillna(method='ffill', inplace=True)
  group.fillna(method='ffill', inplace=True)
  group.fillna(method='ffill', inplace=True)
  group.fillna(method='ffill', inplace=True)
  filled_df = accumulated_df.groupby('Disaster Type').apply(add_missing_years).reset_index(drop=True)


In [22]:
# Convert to the specified JSON format
result = []
for disaster_type, group in filled_df.groupby('Disaster Type'):
    entry = {
        "name": disaster_type,
        "Total Damage, Adjusted ('000 US$)": group[['Start Year', "Total Damage, Adjusted ('000 US$)"]].values.tolist(),
        'Total Affected': group[['Start Year', 'Total Affected']].values.tolist(),
        'Total Deaths': group[['Start Year', 'Total Deaths']].values.tolist()
    }
    result.append(entry)

# Save the result to a JSON file
with open('accumulated_disaster_data_all_years.json', 'w') as json_file:
    json.dump(result, json_file)


In [30]:
# Filter to include only years after 1950
filtered_filled_df = filled_df[filled_df['Start Year'] >= 1950]

# Convert to the specified JSON format
result = []
for disaster_type, group in filtered_filled_df.groupby('Disaster Type'):
    entry = {
        "name": disaster_type,
        "Total Damage, Adjusted ('000 US$)": group[['Start Year', "Total Damage, Adjusted ('000 US$)"]].values.tolist(),
        'Total Affected': group[['Start Year', 'Total Affected']].values.tolist(),
        'Total Deaths': group[['Start Year', 'Total Deaths']].values.tolist()
    }
    result.append(entry)

# Save the result to a JSON file
with open('accumulated_disaster_data_1950.json', 'w') as json_file:
    json.dump(result, json_file, indent=4)