# Air Quality and Respiratory Disease
## Step1. Data Cleaning

### **Data:**

In this project, we will be using three different data. Datasset can be found here:

In [7]:
# Import libraries
import pandas as pd
import numpy as np
import plotly.express as px
from functools import partial
from time import sleep
from IPython.display import clear_output
from tqdm.notebook import tnrange 

In [9]:
d = {"Province": [],
    "Pollutant": []}
    
provinces = ["AB", "BC", "ON", "MB", "NB", "NS", "YT", "QC", "SK", "NL", "PE"]
common_pollutants = ["NH3","CO","SOX","NOX","VOC"]
provnum = 0

def import_data(province, pollutants):
    df = pd.DataFrame.from_dict(d)
    for i in pollutants:
        p = pd.read_csv("https://raw.githubusercontent.com/callysto/data-files/main/data-viz-of-the-week/air-quality/data/province-pollutants/" + province + "_" + i + ".csv")
        p_total = p.loc[p["SECTORS"] == "GRAND TOTAL"]
        p_total = p_total.drop(["Unnamed: 1", "Unnamed: 2"], axis=1)
        p_total = p_total.rename(columns={"SECTORS":"Province"})
        p_total.at[1,"Province"] = province
        p_total.insert(1,"Pollutant", i, True)
        df = pd.concat([df, p_total], ignore_index=True)
        
        for i in tnrange(100, desc='Downloading ' + i + ' data for ' + province):
            startRow = 1 + i*100
            sleep(0.01)
    
    global provnum
    if provnum > 11:
        provnum = 0
    else:
        provnum += 1
            
    print("All provincial data for " + province + " has been downloaded." + 
         " Remaining provinces:" + str(len(provinces) - provnum) + "/" + str(len(provinces)) + ".") 
        
    return df

df = pd.concat(map(partial(import_data, pollutants=common_pollutants), provinces),ignore_index=True)
clear_output(wait=True)
df.head()

Unnamed: 0,Province,Pollutant,1990,1991,1992,1993,1994,1995,1996,1997,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,AB,NH3,95261.6403072018,96392.6120160835,100809.748597631,102888.757346287,108909.29619548,117873.246725767,123629.070648118,126274.333185743,...,132641.48219304642,137630.5644576695,139707.6493382899,141095.12881051924,139937.588499843,132619.48058146506,124020.85788901642,130492.08344071465,131440.5311861856,133161.78812024335
1,AB,CO,1810389.43111,1674240.47593966,1694347.04215079,1670384.18553975,1713860.13181427,1699241.95442997,1707362.43286568,1747506.53522988,...,1015160.0103461308,1020173.0616697504,1044806.4504109876,1083451.1163415606,985516.9393906804,950437.964740328,1009269.6788974762,1001193.5531940853,992062.7619323296,890235.0708604576
2,AB,SOX,512405.17695316,524144.18481018,564717.776802336,571473.902885045,594900.160427548,569212.579973766,556679.129617835,522989.264001654,...,343134.3155222252,333877.81221442844,314187.69199869514,291592.47385681805,259725.3729667968,239568.79130380912,241439.9996413067,225758.93672194,221287.8184768282,182536.32978225523
3,AB,NOX,613284.286308138,585705.325771787,609463.591567837,640884.40812074,692547.095770932,718124.92776215,749396.467537041,815873.317241737,...,691938.4815302572,649050.7227761747,643710.2914325446,656735.6508195029,630979.6246838769,603268.9203695664,627271.8514718515,625976.446078691,625636.7223761571,567697.196882858
4,AB,VOC,643752.225882095,624633.776990753,642477.562274331,654384.418252455,669557.452773095,679792.446419822,708522.408799455,678362.261827271,...,478680.3220269855,518824.5195309372,556411.0771793935,572615.3277760863,517421.8223597623,476740.1237000264,474301.4225637558,501074.8373153089,492636.6127706006,456932.64826396375
