In [12]:
import pandas as pd
import numpy as np
import calendar
import scipy.stats
from scipy.stats import zscore

import requests
import urllib.parse

from geopy.geocoders import Nominatim
import time
from pprint import pprint

from geopy.geocoders import ArcGIS

In [7]:
df = pd.read_csv("disasters_clean_lat_lon.csv")

In [8]:
pd.set_option('display.max_columns', None)

In [13]:
# Convert the monetary values of the damages to the current day vaues using the CPI - adjusted for comparison
df["Total Damages ('000 US$)"] = (df["Total Damages ('000 US$)"].astype(float) / (df["CPI"].astype(float)/100)).round(2)
df["Insured Damages ('000 US$)"] = (df["Insured Damages ('000 US$)"].astype(float) / (df["CPI"].astype(float)/100)).round(2)
df["Aid Contribution"] = (df["Aid Contribution"].astype(float) / (df["CPI"].astype(float)/100)).round(2)

# # Select the columns of interest
df_selected = df[['Total Deaths', 'No Injured', 'No Affected', 'No Homeless', 'Total Affected', "Insured Damages ('000 US$)", "Total Damages ('000 US$)", 'Aid Contribution']]

df_selected = df_selected.fillna(0)

# Calculate the Z-scores for each column
df_z = df_selected.apply(zscore)

# Rename the columns to indicate that they are Z-scores
df_z.columns = [col + ' Z-Score' for col in df_selected.columns]

# Concatenate the Z-score dataframe with the original dataframe
df = pd.concat([df, df_z], axis=1)

# Group the data by 'Dis Mag Scale'
grouped = df.groupby('Dis Mag Scale')

# Define the function that will be applied to each group
def compute_rank_and_zscore(group):
    # Calculate the Z-Score for 'Dis Mag Value' for each group
    group['Dis Mag Value Z-Score'] = (group['Dis Mag Value'] - group['Dis Mag Value'].mean()) / group['Dis Mag Value'].std()
    
    # A ranking df holding all ranks
    rankings_df = group[['Total Deaths Z-Score', 'No Injured Z-Score', 'Dis Mag Value Z-Score', 'No Affected Z-Score', 'No Homeless Z-Score', 'Total Affected Z-Score', "Insured Damages ('000 US$) Z-Score", "Total Damages ('000 US$) Z-Score", 'Aid Contribution Z-Score']].rank(method='min')

    # A dictionary mapping of the ranks for their significance where 10 is most and 1 is least
    ranking_significance = {'Total Deaths Z-Score': 1, 'No Injured Z-Score': 2, 'Dis Mag Value Z-Score': 3, 'No Homeless Z-Score': 4, 'Total Affected Z-Score': 5, "Insured Damages ('000 US$) Z-Score": 6, "Total Damages ('000 US$) Z-Score": 7, 'Aid Contribution Z-Score': 8, 'No Affected Z-Score': 9}

    # multiply the ranks of each column by its corresponding weight
    rankings_df = rankings_df.div(ranking_significance, axis=1)

    # sum the weighted ranks of all columns
    rank_sum = rankings_df.sum(axis=1)

    # create a new ranking based on the sum of the weighted ranks
    group['Rank'] = rank_sum.rank(method='min')
    
    return group

# Apply the function to each group
df_ranked = grouped.apply(compute_rank_and_zscore)

df_ranked.to_excel("Analysis/Result/ranking-test.xlsx")

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df_ranked = grouped.apply(compute_rank_and_zscore)


ModuleNotFoundError: No module named 'openpyxl'

In [None]:
#df["No Injured"] = df["No Injured"].astype(float)

In [None]:
#df["No Injured"] = df["No Injured"].interpolate(method="linear")

In [None]:
display(df.head(15))

In [None]:
#df.to_csv("disasters_clean.csv", index = True)