In [82]:
# Imports and dependencies
import pandas as pd
gender_deaths = "resources/causeofdeathgender.csv"
race_deaths = "resources/causeofdeathrace.csv"

In [83]:
# Create dataframe from csv breaking down suicides by year, state and gender
gender_data_df = pd.read_csv(gender_deaths)
gender_data_df = gender_data_df[["Year", "Census Region", "Gender", "Deaths", "Population", "Crude Rate"]]
gender_data_df = gender_data_df.replace("Census Region 1: Northeast", "Northeast")
gender_data_df = gender_data_df.replace("Census Region 2: Midwest", "Midwest")
gender_data_df = gender_data_df.replace("Census Region 3: South", "South")
gender_data_df = gender_data_df.replace("Census Region 4: West", "West")
gender_data_df = gender_data_df.replace("Unreliable", "")
gender_data_df = gender_data_df.dropna(how="any")
gender_data_df.to_csv("output/suicides_by_gender.csv", index=True)
gender_data_df

Unnamed: 0,Year,Census Region,Gender,Deaths,Population,Crude Rate
0,1999.0,Northeast,Female,861.0,27572096.0,3.1
1,1999.0,Northeast,Male,3360.0,25771679.0,13.0
3,1999.0,Midwest,Female,1172.0,32715025.0,3.6
4,1999.0,Midwest,Male,5243.0,31385036.0,16.7
6,1999.0,South,Female,2216.0,50680024.0,4.4
...,...,...,...,...,...,...
277,2020.0,Midwest,Male,7968.0,33735294.0,23.6
279,2020.0,South,Female,3709.0,64626584.0,5.7
280,2020.0,South,Male,14754.0,62036170.0,23.8
282,2020.0,West,Female,2554.0,39413537.0,6.5


In [84]:
# Create dataframe from csv breaking down suicides by year, state and race
race_data_df = pd.read_csv(race_deaths)
race_data_df = race_data_df[["Year", "Census Region", "Race", "Deaths", "Population", "Crude Rate"]]
race_data_df = race_data_df.replace("Census Region 1: Northeast", "Northeast")
race_data_df = race_data_df.replace("Census Region 2: Midwest", "Midwest")
race_data_df = race_data_df.replace("Census Region 3: South", "South")
race_data_df = race_data_df.replace("Census Region 4: West", "West")
race_data_df = race_data_df.replace("Unreliable", "")
race_data_df = race_data_df.dropna(how="any")
race_data_df.to_csv("output/suicides_by_race.csv", index=True)
race_data_df

Unnamed: 0,Year,Census Region,Race,Deaths,Population,Crude Rate
0,1999.0,Northeast,Asian or Pacific Islander,106.0,2236541.0,4.7
1,1999.0,Northeast,Black or African American,289.0,6764575.0,4.3
2,1999.0,Northeast,White,3820.0,44129786.0,8.7
3,1999.0,Midwest,American Indian or Alaska Native,47.0,443191.0,10.6
4,1999.0,Midwest,Asian or Pacific Islander,55.0,1255799.0,4.4
...,...,...,...,...,...,...
335,2020.0,South,White,16007.0,93516921.0,17.1
336,2020.0,West,American Indian or Alaska Native,390.0,2241606.0,17.4
337,2020.0,West,Asian or Pacific Islander,757.0,10134801.0,7.5
338,2020.0,West,Black or African American,477.0,4818968.0,9.9


In [85]:
# Cleanup step to drop rows where data (specifically "Crude Rate") is labeled as "Unreliable" to keep data numeric
gender_data_df.replace("Unreliable", "")
race_data_df.replace("Unreliable", "")

Unnamed: 0,Year,Census Region,Race,Deaths,Population,Crude Rate
0,1999.0,Northeast,Asian or Pacific Islander,106.0,2236541.0,4.7
1,1999.0,Northeast,Black or African American,289.0,6764575.0,4.3
2,1999.0,Northeast,White,3820.0,44129786.0,8.7
3,1999.0,Midwest,American Indian or Alaska Native,47.0,443191.0,10.6
4,1999.0,Midwest,Asian or Pacific Islander,55.0,1255799.0,4.4
...,...,...,...,...,...,...
335,2020.0,South,White,16007.0,93516921.0,17.1
336,2020.0,West,American Indian or Alaska Native,390.0,2241606.0,17.4
337,2020.0,West,Asian or Pacific Islander,757.0,10134801.0,7.5
338,2020.0,West,Black or African American,477.0,4818968.0,9.9


In [86]:
# Creating new dataframes to separate data by gender and race
male_data_df = gender_data_df[gender_data_df["Gender"] == "Male"]
female_data_df = gender_data_df[gender_data_df["Gender"] == "Female"]
white_data_df = race_data_df[race_data_df["Race"] == "White"]
black_data_df = race_data_df[race_data_df["Race"] == "Black or African American"]
asian_data_df = race_data_df[race_data_df["Race"] == "Asian or Pacific Islander"]
native_data_df = race_data_df[race_data_df["Race"] == "American Indian or Alaska Native"]

In [87]:
# Create dataframe to group annual sums of suicide rates for males
male_data_group = male_data_df.groupby(["Year", "Census Region"])
male_data_group_df = male_data_group.sum()
male_data_group_df = male_data_group_df
male_data_group_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,Population,Crude Rate
Year,Census Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999.0,Midwest,5243.0,31385036.0,16.7
1999.0,Northeast,3360.0,25771679.0,13.0
1999.0,South,8962.0,48484436.0,18.5
1999.0,West,5882.0,31161722.0,18.9
2000.0,Midwest,5285.0,31555438.0,16.7
...,...,...,...,...
2019.0,West,9672.0,39081567.0,24.7
2020.0,Midwest,7968.0,33735294.0,23.6
2020.0,Northeast,4455.0,27243519.0,16.4
2020.0,South,14754.0,62036170.0,23.8


In [91]:
# Create dataframe to group annual sums of suicide rates for females
female_data_group = female_data_df.groupby(["Year", "Census Region"])
female_data_group_df = female_data_group.sum()
female_data_group_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,Population,Crude Rate
Year,Census Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999.0,Midwest,1172.0,32715025.0,3.6
1999.0,Northeast,861.0,27572096.0,3.1
1999.0,South,2216.0,50680024.0,4.4
1999.0,West,1497.0,31270150.0,4.8
2000.0,Midwest,1161.0,32837338.0,3.5
...,...,...,...,...
2019.0,West,2721.0,39265701.0,6.9
2020.0,Midwest,1937.0,34581450.0,5.6
2020.0,Northeast,1220.0,28606350.0,4.3
2020.0,South,3709.0,64626584.0,5.7


In [93]:
# Create dataframe to group annual sums of suicide rates for white
white_data_group = white_data_df.groupby(["Year", "Census Region"])
white_data_group_df = white_data_group.sum()
white_data_group_df["Crude Rate"] = ((white_data_group_df["Deaths"] / white_data_group_df["Population"]) * 100000)
white_data_group_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,Population,Crude Rate
Year,Census Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999.0,Midwest,5850.0,55699117.0,10.502859
1999.0,Northeast,3820.0,44129786.0,8.656285
1999.0,South,10062.0,77052138.0,13.058690
1999.0,West,6561.0,51806749.0,12.664373
2000.0,Midwest,5934.0,55815155.0,10.631521
...,...,...,...,...
2019.0,West,10755.0,61511549.0,17.484521
2020.0,Midwest,8822.0,56875361.0,15.511110
2020.0,Northeast,5026.0,43137993.0,11.650982
2020.0,South,16007.0,93516921.0,17.116688


In [94]:
# Create dataframe to group annual sums of suicide rates for black or African American
black_data_group = black_data_df.groupby(["Year", "Census Region"])
black_data_group_df = black_data_group.sum()
black_data_group_df["Crude Rate"] = ((black_data_group_df["Deaths"] / black_data_group_df["Population"]) * 100000)
black_data_group_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,Population,Crude Rate
Year,Census Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999.0,Midwest,463.0,6701954.0,6.908433
1999.0,Northeast,289.0,6764575.0,4.272257
1999.0,South,971.0,19276054.0,5.037338
1999.0,West,228.0,3430538.0,6.646188
2000.0,Midwest,399.0,6779740.0,5.885181
...,...,...,...,...
2019.0,West,452.0,4732798.0,9.550376
2020.0,Midwest,746.0,8047183.0,9.270325
2020.0,Northeast,411.0,8028574.0,5.119215
2020.0,South,1899.0,26286520.0,7.224235


In [95]:
# Create dataframe to group annual sums of suicide rates for Asian or Pacific Islander
asian_data_group = asian_data_df.groupby(["Year", "Census Region"])
asian_data_group_df = asian_data_group.sum()
asian_data_group_df["Crude Rate"] = ((asian_data_group_df["Deaths"] / asian_data_group_df["Population"]) * 100000)
asian_data_group_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,Population,Crude Rate
Year,Census Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999.0,Midwest,55.0,1255799.0,4.379682
1999.0,Northeast,106.0,2236541.0,4.739462
1999.0,South,103.0,2048707.0,5.027561
1999.0,West,394.0,5805449.0,6.786727
2000.0,Midwest,63.0,1323974.0,4.758402
...,...,...,...,...
2019.0,West,781.0,9904576.0,7.885244
2020.0,Midwest,183.0,2686643.0,6.811474
2020.0,Northeast,227.0,4217846.0,5.381894
2020.0,South,399.0,5374447.0,7.424020


In [96]:
# Create dataframe to group annual sums of suicide rates for American Indian or Alaska Native
native_data_group = native_data_df.groupby(["Year", "Census Region"])
native_data_group_df = native_data_group.sum()
native_data_group_df["Crude Rate"] = ((native_data_group_df["Deaths"] / native_data_group_df["Population"]) * 100000)
native_data_group_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths,Population,Crude Rate
Year,Census Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1999.0,Midwest,47.0,443191.0,10.604908
1999.0,South,42.0,787561.0,5.332920
1999.0,West,196.0,1389136.0,14.109490
2000.0,Midwest,50.0,473907.0,10.550593
2000.0,South,56.0,848582.0,6.599244
...,...,...,...,...
2019.0,West,405.0,2198345.0,18.422950
2020.0,Midwest,154.0,707557.0,21.765031
2020.0,Northeast,11.0,465456.0,2.363274
2020.0,South,158.0,1484866.0,10.640691
