# Question:

How has COVID-19 impacted unemployment in Canada? In which provinces is unemployment most impacted by COVID-19?

In [1]:
#Import dependancies

import os
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.font_manager import FontProperties
from scipy import stats

In [2]:
#Stats Canada data files only show 2 months of unemplyment data. The csv data files are cleaned for the specific columns 
# containing monthly unemployment rates by geography (Canada and provinces). 

oct_nov_19_filepath = os.path.join("Resources","Oct 19 Nov 19 StatsCan Unemployment.csv")
oct_nov_19 = pd.read_csv(oct_nov_19_filepath)
oct_nov_19 = oct_nov_19[["Geography 4", "Oct-19", "Nov-19"]]
oct_nov_19 = oct_nov_19.dropna()

dec_jan_20_filepath = os.path.join("Resources","Dec 19 Jan 20 StatsCan Unemployment.csv")
dec_jan_20 = pd.read_csv(dec_jan_20_filepath)
dec_jan_20 = dec_jan_20[["Geography 4", "Dec-19", "Jan-20"]]
dec_jan_20 = dec_jan_20.dropna()

feb_mar_20_filepath = os.path.join("Resources", "Feb 20 Mar 20 StatsCan Unemployment.csv")
feb_mar_20 = pd.read_csv(feb_mar_20_filepath)
feb_mar_20 = feb_mar_20[["Geography 4", "Feb-20", "Mar-20"]]
feb_mar_20 = feb_mar_20.dropna()

apr_may_20_filepath = os.path.join("Resources","Apr 20 May 20 StatsCan Unemployment.csv")
apr_may_20 = pd.read_csv(apr_may_20_filepath)
apr_may_20 = apr_may_20[["Geography 4", "Apr-20", "May-20"]]
apr_may_20 = apr_may_20.dropna()

jun_jul_20_filepath = os.path.join("Resources","Jun 20 Jul 20 StatsCan Unemployment.csv")
jun_jul_20 = pd.read_csv(jun_jul_20_filepath)
jun_jul_20 = jun_jul_20[["Geography 4", "Jun-20", "Jul-20"]]
jun_jul_20 = jun_jul_20.dropna()

aug_sep_20_filepath = os.path.join("Resources","Aug 20 Sep 20 StatsCan Unemployment.csv")
aug_sep_20 = pd.read_csv(aug_sep_20_filepath)
aug_sep_20 = aug_sep_20[["Geography 4", "Aug-20", "Sep-20"]]
aug_sep_20 = aug_sep_20.dropna()


FileNotFoundError: [Errno 2] File Resources\Oct 19 Nov 19 StatsCan Unemployment.csv does not exist: 'Resources\\Oct 19 Nov 19 StatsCan Unemployment.csv'

In [None]:
#Stats Canada data files are merged into one dataframe to show 12 months of unemployment data together by province. 

merged_can_unemp = pd.merge(oct_nov_19, dec_jan_20, on ="Geography 4")
merged_can_unemp = pd.merge(merged_can_unemp, feb_mar_20, on ="Geography 4")
merged_can_unemp = pd.merge(merged_can_unemp, apr_may_20, on ="Geography 4")
merged_can_unemp = pd.merge(merged_can_unemp, jun_jul_20, on ="Geography 4")
merged_can_unemp = pd.merge(merged_can_unemp, aug_sep_20, on ="Geography 4")


prov_can_unemp = merged_can_unemp.drop([0])
prov_can_unemp = prov_can_unemp.rename(columns={"Geography 4": "Province"})
prov_can_unemp

In [None]:
#Transpose data to have Provinces in columns and groupby Month

prov_can_unemp = prov_can_unemp.transpose()


columns= prov_can_unemp.iloc[0].values
prov_can_unemp.columns=columns
prov_can_unemp.drop('Province', axis =0, inplace=True)
prov_can_unemp.reset_index(inplace =True)
prov_can_unemp.rename(columns = {"index":"Month"}, inplace=True)
prov_can_unemp


In [None]:
#Plot the provincial unemployment data (Oct-19 to Sept-20)
prov_can_unemp.plot()
plt.title("Provincial Unemployment by Month")
x_ticks=np.arange(len(prov_can_unemp["Month"]))
plt.xticks(x_ticks, ["Oct-19", "Nov-19", "Dec-19", "Jan-20", "Feb-20", 
                    "Mar-20", "Apr-20", "May-20", "Jun-20", "Jul-20", "Aug-20", "Sep-20"], rotation=45)
plt.ylabel("Unemployment Rate")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Legend', bbox_to_anchor=(1, 1), loc='upper left', prop=fontP)
plt.tight_layout()


In [None]:
#October, November, and December unemployment values are dropped to isolate to the COVID-19 impacts. 
prov_can_unemp = prov_can_unemp.drop([0,1,2])
prov_can_unemp = prov_can_unemp.reset_index()
prov_can_unemp = prov_can_unemp.drop(columns= "index")
prov_can_unemp

In [None]:
#Plot the revised provincial unemployment data (Jan-20 to Sept-20)

prov_can_unemp.plot('Month',['Newfoundland and Labrador','Prince Edward Island', 'Nova Scotia', 'New Brunswick', 
                             'Quebec','Ontario', 
                            'Manitoba', 'Saskatchewan', 'Alberta', 'British Columbia'],
                    color =['aqua', 'r', 'coral', 'grey', 'fuchsia', 'goldenrod', 'green', 'teal', 'salmon',
                            'b', 'orange'])
plt.title("Provincial Unemployment by Month")
x_ticks=np.arange(len(prov_can_unemp["Month"]))
plt.xticks(x_ticks, ["Jan-20", "Feb-20", "Mar-20", "Apr-20", "May-20", "Jun-20", "Jul-20", "Aug-20", "Sep-20"], rotation=45)
plt.ylabel("Unemployment Rate")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Legend', bbox_to_anchor=(1, 1), loc='upper left', prop=fontP)
plt.tight_layout()


# Observations:

* Overall, all provinces experienced a spike in unemployment starting in March (impacts of COVID). 
* For the most part, provinces have been able to bring unemployment down from the levels seen in the March to May timeframe. 
* Newfoundland and Labrador experienced the highest unemployment of provinces in Canada during this time frame.
* Although it is important to note that N&L had the highest unemployment rate before COVID-19 impacts would have taken affect. 
* Quebec experienced the highest spike in unemployment around March and April but this has been dramatically reduced. 
* Quebec unemployment as of September is among the lowest in Canada. 

In [None]:
#Pull Johns Hopkins Data on cumulative COVID-19 confirmed case counts. Isolate for Canada only and exclude cruise ship 
# (Diamond Princess) cases. Only include case counts on last day of the month to capture the monthly total. 

can_cases_filepath = os.path.join("..", "Covid confirmed cases", "covid19_confirmed_global.csv")
can_cases = pd.read_csv(can_cases_filepath)
can_cases = can_cases.loc[can_cases["Country/Region"]=="Canada"]
can_cases = can_cases.drop([41,42])
can_cases = can_cases[["Province/State", "1/31/20", "2/29/20", "3/31/20","4/30/20", "5/31/20", 
                       "6/30/20", "7/31/20", "8/31/20", "9/30/20"]]

can_cases = can_cases.rename(columns = {"Province/State":"Province"})
can_cases

In [None]:
#Transpose data to have Provinces in columns and groupby Month
can_cases = can_cases.transpose()


columns= can_cases.iloc[0].values
can_cases.columns=columns
can_cases.drop('Province', axis =0, inplace=True)
can_cases.reset_index(inplace =True)
can_cases.rename(columns = {"index":"Month"}, inplace=True)
can_cases

In [None]:
#Plot the provincial COVID-19 case data (Jan-20 to Sept-20)
can_cases.plot('Month',['Newfoundland and Labrador','Prince Edward Island', 'Nova Scotia', 'New Brunswick', 'Quebec','Ontario', 
                            'Manitoba', 'Saskatchewan', 'Alberta', 'British Columbia'],
                    color =['aqua', 'r', 'coral', 'grey', 'fuchsia', 'goldenrod', 'green', 'teal', 'salmon',
                            'b', 'orange'])
plt.title("Provincial COVID-19 Cases by Month")
x_ticks=np.arange(len(can_cases["Month"]))
plt.xticks(x_ticks, ["Jan-20", "Feb-20", 
                    "Mar-20", "Apr-20", "May-20", "Jun-20", "Jul-20", "Aug-20", "Sep-20"], rotation=45)
plt.ylabel("Confirmed COVID-19 Cases")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Legend', bbox_to_anchor=(1, 1), loc='upper left', prop=fontP)
plt.tight_layout()

# Observations:

* Quebec, Ontario, Alberta, and British Columbia have the highest number of COVID-19 confirmed cases (in order of volume). 
* Quebec and Ontario experienced a sharp spike in cases in the March to May timeframe, which slightly leveled off until August. 
* Quebec and Ontario experienced a second sharp spike in cases between August and September. 

In [None]:
#Plot unemployment data and case data together. 

can_cases.plot('Month',['Newfoundland and Labrador','Prince Edward Island', 'Nova Scotia', 'New Brunswick', 'Quebec','Ontario', 
                            'Manitoba', 'Saskatchewan', 'Alberta', 'British Columbia'],
                    color =['aqua', 'r', 'coral', 'grey', 'fuchsia', 'goldenrod', 'green', 'teal', 'salmon',
                            'b', 'orange'])
plt.title("Provincial COVID-19 Cases by Month")
x_ticks=np.arange(len(can_cases["Month"]))
plt.xticks(x_ticks, ["Jan-20", "Feb-20", 
                    "Mar-20", "Apr-20", "May-20", "Jun-20", "Jul-20", "Aug-20", "Sep-20"], rotation=45)
plt.ylabel("Confirmed COVID-19 Cases")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Legend', bbox_to_anchor=(1, 1), loc='upper left', prop=fontP)
plt.tight_layout()
plt.savefig('Output/Provincial_COVID_Cases.png')



prov_can_unemp.plot('Month',['Newfoundland and Labrador','Prince Edward Island', 'Nova Scotia', 'New Brunswick', 
                             'Quebec','Ontario', 
                            'Manitoba', 'Saskatchewan', 'Alberta', 'British Columbia'],
                    color =['aqua', 'r', 'coral', 'grey', 'fuchsia', 'goldenrod', 'green', 'teal', 'salmon',
                            'b', 'orange'])
plt.title("Provincial Unemployment by Month")
x_ticks=np.arange(len(prov_can_unemp["Month"]))
plt.xticks(x_ticks, ["Jan-20", "Feb-20", "Mar-20", "Apr-20", "May-20", "Jun-20", "Jul-20", "Aug-20", "Sep-20"], rotation=45)
plt.ylabel("Unemployment Rate")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Legend', bbox_to_anchor=(1, 1), loc='upper left', prop=fontP)
plt.tight_layout()
plt.savefig('Output/Provincial_Unemployment.png')


# COVID-19 Cases Observations:

* Quebec, Ontario, Alberta, and British Columbia have the highest number of COVID-19 confirmed cases (in order of volume).
* Quebec and Ontario experienced a sharp spike in cases in the March to May timeframe, which slightly leveled off until August.
* Quebec and Ontario experienced a second sharp spike in cases between August and September.

# Unemployment Observations:

* Overall, all provinces experienced a spike in unemployment starting in March (impacts of COVID).
* For the most part, provinces have been able to bring unemployment down from the levels seen in the March to May timeframe.
* Newfoundland and Labrador experienced the highest unemployment of provinces in Canada during this time frame.
* Although it is important to note that N&L had the highest unemployment rate before COVID-19 impacts would have taken affect.
* Quebec experienced the highest spike in unemployment around March and April but this has been dramatically reduced.
* Quebec unemployment as of September is among the lowest in Canada.


In [None]:
#Isolating Ontario and Quebec in the plots. 

can_cases.plot('Month',[ 'Quebec','Ontario'],
                    color =[ 'fuchsia', 'goldenrod'])
plt.title("COVID-19 Cases by Month - Quebec and Ontario")
x_ticks=np.arange(len(can_cases["Month"]))
plt.xticks(x_ticks, ["Jan-20", "Feb-20", 
                    "Mar-20", "Apr-20", "May-20", "Jun-20", "Jul-20", "Aug-20", "Sep-20"], rotation=45)
plt.ylabel("Confirmed COVID-19 Cases")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Legend', bbox_to_anchor=(1, 1), loc='upper left', prop=fontP)
plt.tight_layout()
plt.savefig('Output/Quebec_Ontario_COVID_Cases.png')

prov_can_unemp.plot('Month',['Quebec','Ontario'],color =['fuchsia', 'goldenrod'])
plt.title("Unemployment by Month - Quebec and Ontario")
x_ticks=np.arange(len(prov_can_unemp["Month"]))
plt.xticks(x_ticks, ["Jan-20", "Feb-20", "Mar-20", "Apr-20", "May-20", "Jun-20", "Jul-20", "Aug-20", "Sep-20"], rotation=45)
plt.ylabel("Unemployment Rate")
plt.xlabel("Month")
fontP = FontProperties()
fontP.set_size('small')
plt.legend(title='Legend', bbox_to_anchor=(1, 1), loc='upper left', prop=fontP)
plt.tight_layout()
plt.savefig('Output/Quebec_Ontario_Unemployment.png')

# Observations:

* Quebec's unemployment rate was at its peak in April when COVID-19 cases were sharply increasing. 
* Ontario's rate of COVID-19 cases was lower than Quebec and the province's peak unemployment rate came later than Quebec at May. 
* Quebec and Ontario were able to flatten their curves between May and August and unemployment rates declined during this time as well. 