# Executive Summary

# Introduction

### Load Libraries

In [67]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np
%matplotlib inline
import warnings
from scipy import stats
warnings.filterwarnings('ignore')

## Data

A population Census is held across Canada every 5 years. It collects data about age and sex, families and households, language, immigration and internal migration, racial diversity, Aboriginal peoples, housing, education, income, and labor. 
The City of Toronto Neighborhood Profiles use this Census data to provide a portrait of the demographic, social and economic characteristics of the people and households in each City of Toronto neighborhood.

The purpose of this study is to build a linear regression model that will predict the number of covid-19 cases for any Toronto neighborhood based on its demographic data. Predicting covid-19 cases is useful in that an understanding of which socioeconomic factors that influence the growth of disease in a community will help with our understanding of the virus. This knowledge will enable resources to be better targeted in the future to help prevent transmission during this, or any future pandemic.

The data is made available through Toronto's Open Data portal (https://open.toronto.ca/).

Two datasets from this site were used for this study. One that shows the number of covid cases by Toronto neighborhood (https://open.toronto.ca/dataset/covid-19-cases-in-toronto/) and the other contains demographic features of each of these neighborhoods (https://open.toronto.ca/dataset/neighbourhood-profiles/). There are over 2,300 features show for each neighborhood including:

### Read in Files

In [97]:
covid_to = pd.read_csv('data/CityofToronto_COVID-19_NeighbourhoodData.csv')

In [104]:
#neigh_prof = pd.read_csv("data/neighbourhood-profiles-2016-readme.csv")

In [99]:
neigh = pd.read_csv("data/neighbourhood-profiles-2016-csv.csv")

### Data Cleaning

In [112]:
# The Neighbourhood file has Neighbourhoods as columns so we must 
# transpose it.
neighTO = neigh.transpose()

In [111]:
[x for x in neighTO.loc['Characteristic']]

['Neighbourhood Number',
 'TSNS2020 Designation',
 'Population, 2016',
 'Population, 2011',
 'Population Change 2011-2016',
 'Total private dwellings',
 'Private dwellings occupied by usual residents',
 'Population density per square kilometre',
 'Land area in square kilometres',
 'Children (0-14 years)',
 'Youth (15-24 years)',
 'Working Age (25-54 years)',
 'Pre-retirement (55-64 years)',
 'Seniors (65+ years)',
 'Older Seniors (85+ years)',
 'Male: 0 to 04 years',
 'Male: 05 to 09 years',
 'Male: 10 to 14 years',
 'Male: 15 to 19 years',
 'Male: 20 to 24 years',
 'Male: 25 to 29 years',
 'Male: 30 to 34 years',
 'Male: 35 to 39 years',
 'Male: 40 to 44 years',
 'Male: 45 to 49 years',
 'Male: 50 to 54 years',
 'Male: 55 to 59 years',
 'Male: 60 to 64 years',
 'Male: 65 to 69 years',
 'Male: 70 to 74 years',
 'Male: 75 to 79 years',
 'Female: 10 to 14 years',
 'Male: 80 to 84 years',
 'Male: 85 to 89 years',
 'Male: 90 to 94 years',
 'Male: 95 to 99 years',
 'Male: 100 years and over

In [65]:
[x for x in neigh['Characteristic'].unique()]

['Neighbourhood Number',
 'TSNS2020 Designation',
 'Population, 2016',
 'Population, 2011',
 'Population Change 2011-2016',
 'Total private dwellings',
 'Private dwellings occupied by usual residents',
 'Population density per square kilometre',
 'Land area in square kilometres',
 'Children (0-14 years)',
 'Youth (15-24 years)',
 'Working Age (25-54 years)',
 'Pre-retirement (55-64 years)',
 'Seniors (65+ years)',
 'Older Seniors (85+ years)',
 'Male: 0 to 04 years',
 'Male: 05 to 09 years',
 'Male: 10 to 14 years',
 'Male: 15 to 19 years',
 'Male: 20 to 24 years',
 'Male: 25 to 29 years',
 'Male: 30 to 34 years',
 'Male: 35 to 39 years',
 'Male: 40 to 44 years',
 'Male: 45 to 49 years',
 'Male: 50 to 54 years',
 'Male: 55 to 59 years',
 'Male: 60 to 64 years',
 'Male: 65 to 69 years',
 'Male: 70 to 74 years',
 'Male: 75 to 79 years',
 'Female: 10 to 14 years',
 'Male: 80 to 84 years',
 'Male: 85 to 89 years',
 'Male: 90 to 94 years',
 'Male: 95 to 99 years',
 'Male: 100 years and over

In [70]:
[x for x in neigh.columns]

['_id',
 'Category',
 'Topic',
 'Data Source',
 'Characteristic',
 'City of Toronto',
 'Agincourt North',
 'Agincourt South-Malvern West',
 'Alderwood',
 'Annex',
 'Banbury-Don Mills',
 'Bathurst Manor',
 'Bay Street Corridor',
 'Bayview Village',
 'Bayview Woods-Steeles',
 'Bedford Park-Nortown',
 'Beechborough-Greenbrook',
 'Bendale',
 'Birchcliffe-Cliffside',
 'Black Creek',
 'Blake-Jones',
 'Briar Hill-Belgravia',
 'Bridle Path-Sunnybrook-York Mills',
 'Broadview North',
 'Brookhaven-Amesbury',
 'Cabbagetown-South St. James Town',
 'Caledonia-Fairbank',
 'Casa Loma',
 'Centennial Scarborough',
 'Church-Yonge Corridor',
 'Clairlea-Birchmount',
 'Clanton Park',
 'Cliffcrest',
 'Corso Italia-Davenport',
 'Danforth',
 'Danforth East York',
 'Don Valley Village',
 'Dorset Park',
 'Dovercourt-Wallace Emerson-Junction',
 'Downsview-Roding-CFB',
 'Dufferin Grove',
 'East End-Danforth',
 'Edenbridge-Humber Valley',
 'Eglinton East',
 'Elms-Old Rexdale',
 'Englemount-Lawrence',
 'Eringate-Ce

In [71]:
neigh

Unnamed: 0,_id,Category,Topic,Data Source,Characteristic,City of Toronto,Agincourt North,Agincourt South-Malvern West,Alderwood,Annex,...,Willowdale West,Willowridge-Martingrove-Richview,Woburn,Woodbine Corridor,Woodbine-Lumsden,Wychwood,Yonge-Eglinton,Yonge-St.Clair,York University Heights,Yorkdale-Glen Park
0,1,Neighbourhood Information,Neighbourhood Information,City of Toronto,Neighbourhood Number,,129,128,20,95,...,37,7,137,64,60,94,100,97,27,31
1,2,Neighbourhood Information,Neighbourhood Information,City of Toronto,TSNS2020 Designation,,No Designation,No Designation,No Designation,No Designation,...,No Designation,No Designation,NIA,No Designation,No Designation,No Designation,No Designation,No Designation,NIA,Emerging Neighbourhood
2,3,Population,Population and dwellings,Census Profile 98-316-X2016001,"Population, 2016",2731571,29113,23757,12054,30526,...,16936,22156,53485,12541,7865,14349,11817,12528,27593,14804
3,4,Population,Population and dwellings,Census Profile 98-316-X2016001,"Population, 2011",2615060,30279,21988,11904,29177,...,15004,21343,53350,11703,7826,13986,10578,11652,27713,14687
4,5,Population,Population and dwellings,Census Profile 98-316-X2016001,Population Change 2011-2016,4.50%,-3.90%,8.00%,1.30%,4.60%,...,12.90%,3.80%,0.30%,7.20%,0.50%,2.60%,11.70%,7.50%,-0.40%,0.80%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2378,2379,Mobility,Mobility status - Place of residence 5 years ago,Census Profile 98-316-X2016001,Migrants,400950,3170,3145,925,6390,...,3765,2270,7260,985,620,1350,2425,2310,4965,1345
2379,2380,Mobility,Mobility status - Place of residence 5 years ago,Census Profile 98-316-X2016001,Internal migrants,184120,880,980,680,3930,...,1545,1110,1720,610,395,780,1260,1355,1700,580
2380,2381,Mobility,Mobility status - Place of residence 5 years ago,Census Profile 98-316-X2016001,Intraprovincial migrants,141135,735,760,615,2630,...,1070,960,1400,350,320,570,970,1025,1490,445
2381,2382,Mobility,Mobility status - Place of residence 5 years ago,Census Profile 98-316-X2016001,Interprovincial migrants,42985,135,220,70,1310,...,475,150,335,250,85,210,290,325,195,135


In [74]:
covid_to.sort_values('Neighbourhood Name')

Unnamed: 0,Neighbourhood ID,Neighbourhood Name,"Rate per 100,000 people",Case Count
128,129.0,Agincourt North,838.113558,244
127,128.0,Agincourt South-Malvern West,723.997138,172
19,20.0,Alderwood,945.744151,114
94,95.0,Annex,861.560637,263
41,42.0,Banbury-Don Mills,498.284889,138
...,...,...,...,...
93,94.0,Wychwood,968.708621,139
99,100.0,Yonge-Eglinton,423.119235,50
96,97.0,Yonge-St.Clair,462.962963,58
26,27.0,York University Heights,2866.669083,791


In [None]:
covid_to.sort_values('Ra')

In [82]:
covid_to[['Neighbourhood Name', "Rate per 100,000 people"]].sort_values("Rate per 100,000 people").tail(10)

Unnamed: 0,Neighbourhood Name,"Rate per 100,000 people"
5,Kingsview Village-The Westway,2559.090909
21,Humbermede,2566.741718
0,West Humber-Clairville,2719.740634
23,Black Creek,2829.277269
26,York University Heights,2866.669083
1,Mount Olive-Silverstone-Jamestown,2985.980458
24,Glenfield-Jane Heights,3004.165163
28,Maple Leaf,3451.686282
112,Weston,3612.716763
140,Missing Address/Postal Code,


In [89]:
sring = "k(a3(b(a2(c))))"


"b" + (2 * "c")

'bcc'

In [96]:
"k" + "a" + 3 * ("b" + "a" + 2 * "c")

'kabaccbaccbacc'