In [11]:
import os
import pandas as pd

#### population data from: https://www150.statcan.gc.ca/t1/tbl1/en/cv.action?pid=1710007801

In [2]:
mort_file_2015 = os.path.join("datas", "new_mortality", "2015 mortality.csv")

mort_2015 = pd.read_csv(mort_file_2015)
mort_2015 = mort_2015.rename(columns={"2015": "Number of Mortality"})


In [3]:
# population file
pop_file = os.path.join("datas", "population", "population.csv")
pop_df = pd.read_csv(pop_file)

# select only 2015, then clean
pop_df = pop_df[["Geography", "Age group", "2015"]]
pop_df = pop_df.rename(columns={"2015":"population"})
pop_df = pop_df.sort_values(by=["Geography"])
pop_df = pop_df.reset_index(drop=True)

# Change string type to int type
pop_df["population"] = pop_df["population"].str.replace(",", "")
pop_df["population"] = pop_df["population"].astype(int)
pop_df.dtypes

Geography     object
Age group     object
population     int32
dtype: object

In [4]:
# Merge dataframe
merged = pd.merge(pop_df,mort_2015)

# Calculate mortality rate
rate = (100*(merged["Number of Mortality"]/merged["population"])).map("{:,.2f}%".format)
merged["Mortality Rate"] = rate
merged


Unnamed: 0,Geography,Age group,population,Number of Mortality,Mortality Rate
0,"Abbotsford-Mission, British Columbia",All ages,184021,1252,0.68%
1,"Barrie, Ontario",All ages,202264,1426,0.71%
2,"Brantford, Ontario",All ages,144109,1339,0.93%
3,"Calgary, Alberta",All ages,1432169,6655,0.46%
4,"Edmonton, Alberta",All ages,1358345,8173,0.60%
5,"Greater Sudbury, Ontario",All ages,165261,1698,1.03%
6,"Guelph, Ontario",All ages,153290,1077,0.70%
7,"Halifax, Nova Scotia",All ages,416824,3136,0.75%
8,"Hamilton, Ontario",All ages,769853,6725,0.87%
9,"Kelowna, British Columbia",All ages,195305,1717,0.88%


In [5]:
rate = (100*(merged["Number of Mortality"]/merged["population"])).map("{:,.2f}%".format)
merged["Mortality Rate"] = rate
merged

Unnamed: 0,Geography,Age group,population,Number of Mortality,Mortality Rate
0,"Abbotsford-Mission, British Columbia",All ages,184021,1252,0.68%
1,"Barrie, Ontario",All ages,202264,1426,0.71%
2,"Brantford, Ontario",All ages,144109,1339,0.93%
3,"Calgary, Alberta",All ages,1432169,6655,0.46%
4,"Edmonton, Alberta",All ages,1358345,8173,0.60%
5,"Greater Sudbury, Ontario",All ages,165261,1698,1.03%
6,"Guelph, Ontario",All ages,153290,1077,0.70%
7,"Halifax, Nova Scotia",All ages,416824,3136,0.75%
8,"Hamilton, Ontario",All ages,769853,6725,0.87%
9,"Kelowna, British Columbia",All ages,195305,1717,0.88%


#### Median income: https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1110000801&pickMembers%5B0%5D=1.42

In [6]:
med_inc_file = os.path.join("datas", "income", "median_income.csv")
income_df = pd.read_csv(med_inc_file)
income_df = income_df.dropna()

# drop other years except 2015
income_df = income_df.drop(columns=["2013", "2014", "2016"])

# change type to integer
income_df["2015"] = income_df["2015"].str.replace(",","").astype(int)

# calculate mean of ottawa-gatinuea Quebec part and Ont part
# Ottawa-Gatineau, Ontario/Quebec
x = (income_df["2015"][9] + income_df["2015"][10])/2

# append row to df
income_df = income_df.append({"Geography":
                              "Ottawa-Gatineau, Ontario/Quebec","2015":x}, 
                             ignore_index=True)

# now get rid of Quebec part and Ont part.
income_df = income_df.drop(income_df.index[9:11])

# sort it alphabatically
income_df = income_df.sort_values(by=["Geography"]).reset_index(drop=True)

# rename year to median income
income_df = income_df.rename(columns={"2015":"Median Income"})
income_df

Unnamed: 0,Geography,Median Income
0,"Abbotsford-Mission, British Columbia",30290.0
1,"Barrie, Ontario",35270.0
2,"Brantford, Ontario",33010.0
3,"Calgary, Alberta",43130.0
4,"Edmonton, Alberta",43190.0
5,"Greater Sudbury, Ontario",38560.0
6,"Guelph, Ontario",39260.0
7,"Halifax, Nova Scotia",36390.0
8,"Hamilton, Ontario",35990.0
9,"Kelowna, British Columbia",34610.0


In [7]:
merge = pd.merge(merged,income_df)
merge

Unnamed: 0,Geography,Age group,population,Number of Mortality,Mortality Rate,Median Income
0,"Abbotsford-Mission, British Columbia",All ages,184021,1252,0.68%,30290.0
1,"Barrie, Ontario",All ages,202264,1426,0.71%,35270.0
2,"Brantford, Ontario",All ages,144109,1339,0.93%,33010.0
3,"Calgary, Alberta",All ages,1432169,6655,0.46%,43130.0
4,"Edmonton, Alberta",All ages,1358345,8173,0.60%,43190.0
5,"Greater Sudbury, Ontario",All ages,165261,1698,1.03%,38560.0
6,"Guelph, Ontario",All ages,153290,1077,0.70%,39260.0
7,"Halifax, Nova Scotia",All ages,416824,3136,0.75%,36390.0
8,"Hamilton, Ontario",All ages,769853,6725,0.87%,35990.0
9,"Kelowna, British Columbia",All ages,195305,1717,0.88%,34610.0


### Merge Health indicators

In [23]:
health_file = os.path.join("datas", "Health", "Health_Indicators.csv")

health_df = pd.read_csv(health_file)
health_df = health_df.dropna()

merged_df = pd.merge(merge, health_df)

# Get rid of age group
merged_df = merged_df.drop(columns=["Age group"])
merged_df

Unnamed: 0,Geography,population,Number of Mortality,Mortality Rate,Median Income,"Body mass index, adjusted self-reported, adult (18 years and over), obese","Body mass index, adjusted self-reported, adult (18 years and over), overweight","Current smoker, daily",Diabetes,Heavy drinking,High blood pressure,"Perceived health, fair or poor","Perceived life stress, most days quite a bit or extremely stressful","Perceived mental health, fair or poor"
0,"Abbotsford-Mission, British Columbia",184021,1252,0.68%,30290.0,23.75%,23.75%,8.37%,5.65%,13.42%,10.76%,7.93%,18.97%,5.87%
1,"Barrie, Ontario",202264,1426,0.71%,35270.0,21.46%,21.70%,9.79%,6.87%,16.22%,15.28%,11.03%,22.15%,6.63%
2,"Brantford, Ontario",144109,1339,0.93%,33010.0,17.97%,24.56%,13.12%,5.90%,17.07%,15.06%,9.99%,14.09%,5.55%
3,"Calgary, Alberta",1432169,6655,0.46%,43130.0,18.31%,28.86%,7.76%,3.57%,15.24%,11.19%,7.59%,18.52%,5.75%
4,"Edmonton, Alberta",1358345,8173,0.60%,43190.0,19.51%,29.00%,11.10%,4.64%,15.76%,12.67%,8.43%,17.76%,5.45%
5,"Greater Sudbury, Ontario",165261,1698,1.03%,38560.0,23.84%,25.35%,15.37%,7.87%,17.73%,19.85%,13.13%,17.49%,8.35%
6,"Guelph, Ontario",153290,1077,0.70%,39260.0,19.57%,26.55%,9.07%,4.44%,12.39%,14.48%,9.46%,21.53%,6.52%
7,"Halifax, Nova Scotia",416824,3136,0.75%,36390.0,22.84%,27.16%,10.58%,6.79%,20.56%,13.48%,9.16%,17.23%,6.62%
8,"Hamilton, Ontario",769853,6725,0.87%,35990.0,19.21%,28.20%,10.56%,5.95%,16.82%,16.50%,9.07%,18.32%,5.73%
9,"Kelowna, British Columbia",195305,1717,0.88%,34610.0,16.18%,27.65%,8.40%,4.56%,17.00%,13.88%,7.27%,15.21%,4.71%


### Converting percentages to numbers