# Air Quality Barcelona

In [1]:
import pandas as pd

In [2]:
def structure_dataset(csv_name):
    df = pd.read_csv('data/'+csv_name)
    df.drop(['CODI_PROVINCIA','PROVINCIA','CODI_MUNICIPI','MUNICIPI'], axis = 1, inplace = True)
    df.rename(columns = {'CODI_CONTAMINANT':'POLLUTANT_CODE','ESTACIO':'STATION','ANY':'YEAR','MES':'MONTH','DIA':'DAY'}, inplace = True)
    for num in ["%.2d" % i for i in range(1,25)]:
        df.drop(['V'+num], axis = 1, inplace = True)
        df['H'+num] = pd.to_numeric(df['H'+num],errors = 'coerce')
    new_df = pd.DataFrame({'STATION':[df['STATION'][int(i/24)] for i in range(df.shape[0]*24)],
                      'POLLUTANT_CODE':[df['POLLUTANT_CODE'][int(i/24)] for i in range(df.shape[0]*24)],
                      'YEAR':[df['YEAR'][int(i/24)] for i in range(df.shape[0]*24)],
                      'MONTH':[df['MONTH'][int(i/24)] for i in range(df.shape[0]*24)],
                      'DAY':[df['DAY'][int(i/24)] for i in range(df.shape[0]*24)],
                      'HOUR':[j for i in range(df.shape[0]) for j in range(1,25)],
                      'VALUE':[df['H'+num][row] for row in range(df.shape[0]) for num in ["%.2d" % i for i in range(1,25)]]
                      })
    return new_df

In [3]:
jan2020 = structure_dataset('2020_01_Gener_qualitat_aire_BCN.csv')
feb2020 = structure_dataset('2020_02_Febrer_qualitat_aire_BCN.csv')
march2020 = structure_dataset('2020_03_Marc_qualitat_aire_BCN.csv')
april2020 = structure_dataset('2020_04_Abril_qualitat_aire_BCN.csv')
may2020 = structure_dataset('2020_05_Maig_qualitat_aire_BCN.csv')
june2020 = structure_dataset('2020_06_Juny_qualitat_aire_BCN.csv')
july2020 = structure_dataset('2020_07_Juliol_qualitat_aire_BCN.csv')
aug2020 = structure_dataset('2020_08_Agost_qualitat_aire_BCN.csv')
sep2020 = structure_dataset('2020_09_Setembre_qualitat_aire_BCN.csv')

may2019 = structure_dataset('2019_05_Maig_qualitat_aire_BCN.csv')

In [4]:
stations = pd.read_csv('data/Qualitat_Aire_Estacions.csv')
pollutants = pd.read_csv('data/Qualitat_Aire_Contaminants.csv')

In [5]:
jan_sep_2020 = pd.concat([jan2020,feb2020,march2020,april2020,may2020,june2020,july2020,aug2020,sep2020],ignore_index = True)

In [6]:
stations.drop(['Codi_Contaminant'], axis = 1, inplace = True)
stations.drop_duplicates(inplace = True)
jan_sep_2020 = jan_sep_2020.merge(stations[['Estacio','Nom_districte']], how = 'left',left_on = 'STATION',right_on = 'Estacio')
jan_sep_2020.drop(['Estacio'], axis = 1, inplace = True)
jan_sep_2020.rename(columns = {'Nom_districte':'DISTRICT_NAME'}, inplace = True)

In [7]:
jan_sep_2020 = jan_sep_2020.merge(pollutants, how = 'left',left_on = 'POLLUTANT_CODE',right_on = 'Codi_Contaminant')
jan_sep_2020.drop(['Codi_Contaminant'], axis = 1, inplace = True)
jan_sep_2020.rename(columns = {'Desc_Contaminant':'POLLUTANT_DESCRIPTION','Unitats':'UNITS'}, inplace = True)

In [8]:
jan_sep_2020 = jan_sep_2020[['YEAR','MONTH','DAY','HOUR','STATION','DISTRICT_NAME','POLLUTANT_CODE','POLLUTANT_DESCRIPTION','UNITS','VALUE']]

## Air Quality COVID's situation

In [9]:
may = pd.concat([may2019,may2020],ignore_index = True)
may = may.merge(pollutants, how = 'left',left_on = 'POLLUTANT_CODE',right_on = 'Codi_Contaminant')
may.drop(['Codi_Contaminant'], axis = 1, inplace = True)
may.rename(columns = {'Desc_Contaminant':'POLLUTANT_DESCRIPTION','Unitats':'UNITS'}, inplace = True)

In [10]:
may.groupby(['POLLUTANT_DESCRIPTION','YEAR','HOUR']).max()['VALUE']

POLLUTANT_DESCRIPTION  YEAR  HOUR
CO                     2019  1       1.1
                             2       0.6
                             3       0.7
                             4       0.4
                             5       0.4
                                    ... 
SO2                    2020  20      5.0
                             21      5.0
                             22      5.0
                             23      5.0
                             24      4.0
Name: VALUE, Length: 336, dtype: float64

## Air Quality by neighbourhood

In [11]:
jan_sep_2020.groupby(['POLLUTANT_DESCRIPTION','DISTRICT_NAME']).mean()['VALUE']

POLLUTANT_DESCRIPTION  DISTRICT_NAME 
CO                     Eixample           0.259083
                       Gracia             0.318769
                       Horta-Guinardo     0.276345
                       Les Corts          0.284462
NO                     Ciutat Vella       7.949243
                       Eixample          17.275962
                       Gracia            11.305618
                       Horta-Guinardo     7.169296
                       Les Corts          5.004723
                       Sant Marti         9.522316
                       Sants-Montjuic     6.026637
NO2                    Ciutat Vella      22.168063
                       Eixample          33.317340
                       Gracia            30.399679
                       Horta-Guinardo    21.205815
                       Les Corts         17.066792
                       Sant Marti        26.692755
                       Sants-Montjuic    21.951220
NOx                    Ciutat Vella      33.

## Air Quality by day

In [12]:
date_jan_sep_2020 = jan_sep_2020.copy()
date_jan_sep_2020['DATE'] = [str(jan_sep_2020['DAY'][i])+'/'+str(jan_sep_2020['MONTH'][i])+'/'+str(jan_sep_2020['YEAR'][i]) for i in range(jan_sep_2020.shape[0])]
date_jan_sep_2020['DATE'] = pd.to_datetime(date_jan_sep_2020['DATE'])
date_jan_sep_2020['DAY_WEEK'] = [date_jan_sep_2020['DATE'][i].weekday() for i in range(date_jan_sep_2020.shape[0])]
date_jan_sep_2020.drop(['YEAR','MONTH','DAY'],axis=1,inplace=True)

In [13]:
date_jan_sep_2020.groupby(['POLLUTANT_DESCRIPTION','DAY_WEEK']).mean()['VALUE']

POLLUTANT_DESCRIPTION  DAY_WEEK
CO                     0            0.273906
                       1            0.290930
                       2            0.302486
                       3            0.300582
                       4            0.307892
                       5            0.259771
                       6            0.258017
NO                     0            7.470194
                       1            8.217404
                       2            9.321800
                       3           10.758750
                       4           10.467450
                       5            5.776205
                       6            5.724605
NO2                    0           21.992189
                       1           23.378157
                       2           24.143381
                       3           26.392222
                       4           25.648759
                       5           18.767533
                       6           18.227019
NOx                    