# Air Quality Barcelona

In [1]:
import pandas as pd

In [2]:
def structure_dataset(csv_name):
    df = pd.read_csv('data/'+csv_name)
    df.drop(['CODI_PROVINCIA','PROVINCIA','CODI_MUNICIPI','MUNICIPI'], axis = 1, inplace = True)
    df.rename(columns = {'CODI_CONTAMINANT':'POLLUTANT_CODE','ESTACIO':'STATION','ANY':'YEAR','MES':'MONTH','DIA':'DAY'}, inplace = True)
    for num in ["%.2d" % i for i in range(1,25)]:
        df.drop(['V'+num], axis = 1, inplace = True)
        df['H'+num] = pd.to_numeric(df['H'+num],errors = 'coerce')
    new_df = pd.DataFrame({'STATION':[df['STATION'][int(i/24)] for i in range(df.shape[0]*24)],
                      'POLLUTANT_CODE':[df['POLLUTANT_CODE'][int(i/24)] for i in range(df.shape[0]*24)],
                      'YEAR':[df['YEAR'][int(i/24)] for i in range(df.shape[0]*24)],
                      'MONTH':[df['MONTH'][int(i/24)] for i in range(df.shape[0]*24)],
                      'DAY':[df['DAY'][int(i/24)] for i in range(df.shape[0]*24)],
                      'HOUR':[j for i in range(df.shape[0]) for j in range(1,25)],
                      'VALUE':[df['H'+num][row] for row in range(df.shape[0]) for num in ["%.2d" % i for i in range(1,25)]]
                      })
    return new_df

In [3]:
jan2020 = structure_dataset('2020_01_Gener_qualitat_aire_BCN.csv')
feb2020 = structure_dataset('2020_02_Febrer_qualitat_aire_BCN.csv')
march2020 = structure_dataset('2020_03_Marc_qualitat_aire_BCN.csv')
april2020 = structure_dataset('2020_04_Abril_qualitat_aire_BCN.csv')
may2020 = structure_dataset('2020_05_Maig_qualitat_aire_BCN.csv')
june2020 = structure_dataset('2020_06_Juny_qualitat_aire_BCN.csv')
july2020 = structure_dataset('2020_07_Juliol_qualitat_aire_BCN.csv')
aug2020 = structure_dataset('2020_08_Agost_qualitat_aire_BCN.csv')
sep2020 = structure_dataset('2020_09_Setembre_qualitat_aire_BCN.csv')

In [4]:
stations = pd.read_csv('data/Qualitat_Aire_Estacions.csv')
pollutants = pd.read_csv('data/Qualitat_Aire_Contaminants.csv')

In [5]:
jan_sep_2020 = pd.concat([jan2020,feb2020,march2020,april2020,may2020,june2020,july2020,aug2020,sep2020],ignore_index = True)

In [7]:
stations.drop(['Codi_Contaminant'], axis = 1, inplace = True)
stations.drop_duplicates(inplace = True)
jan_sep_2020 = jan_sep_2020.merge(stations[['Estacio','Nom_districte']], how = 'left',left_on = 'STATION',right_on = 'Estacio')
jan_sep_2020.drop(['Estacio'], axis = 1, inplace = True)
jan_sep_2020.rename(columns = {'Nom_districte':'DISTRICT_NAME'}, inplace = True)

In [8]:
jan_sep_2020 = jan_sep_2020.merge(pollutants, how = 'left',left_on = 'POLLUTANT_CODE',right_on = 'Codi_Contaminant')
jan_sep_2020.drop(['Codi_Contaminant'], axis = 1, inplace = True)
jan_sep_2020.rename(columns = {'Desc_Contaminant':'POLLUTANT_DESCRIPTION','Unitats':'UNITS'}, inplace = True)

In [9]:
jan_sep_2020 = jan_sep_2020[['YEAR','MONTH','DAY','HOUR','STATION','DISTRICT_NAME','POLLUTANT_CODE','POLLUTANT_DESCRIPTION','UNITS','VALUE']]

## Air Quality by neighbourhood

In [24]:
jan_sep_2020.groupby(['DISTRICT_NAME','POLLUTANT_DESCRIPTION']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,YEAR,MONTH,DAY,HOUR,STATION,POLLUTANT_CODE,VALUE
DISTRICT_NAME,POLLUTANT_DESCRIPTION,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Ciutat Vella,NO,2020.0,5.003717,15.743494,12.5,50.0,7.0,7.949243
Ciutat Vella,NO2,2020.0,5.003717,15.743494,12.5,50.0,8.0,22.168063
Ciutat Vella,NOx,2020.0,5.003717,15.743494,12.5,50.0,12.0,33.849017
Ciutat Vella,O3,2020.0,5.003717,15.743494,12.5,50.0,14.0,49.019513
Eixample,CO,2020.0,5.003717,15.743494,12.5,43.0,6.0,0.259083
Eixample,NO,2020.0,5.003717,15.743494,12.5,43.0,7.0,17.275962
Eixample,NO2,2020.0,5.003717,15.743494,12.5,43.0,8.0,33.31734
Eixample,NOx,2020.0,5.003717,15.743494,12.5,43.0,12.0,59.505072
Eixample,O3,2020.0,5.003717,15.743494,12.5,43.0,14.0,44.617161
Eixample,PM10,2020.0,5.003717,15.743494,12.5,43.0,10.0,23.644434
