In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## AQI Basics

    Daily AQI color     Level of concern           Value of index     Desc. of air quality

    Green               Good                       0 to 50            Air quality is satisfactory, and air pollution poses
                                                                      little or no risk.

    Yellow              Moderate                   51 to 100          Air quality is acceptable. However, there may be a
                                                                      risk for some people, particularly those who are
                                                                      unusually sensitive to air pollution.

    Orange              Unhealthy for sensitive    101 to 150         Members of sensitive groups may experience health
                                                                      effects. The general public is less likely to be
                                                                      affected.

    Red                 Unhealthy                  151 to 200         Some members of the general public may experience
                                                                      health effects; members of sensitive groups may
                                                                      experience more serious health effects.

    Purple              Very unhealthy             201 to 300         Health alert: The risk of health effects is increased
                                                                      for everyone.

    Maroon              Hazardous                  301+               Health warning of emergency conditions: everyone
                                                                      is more likely to be affected.
        

In [3]:
air_quality = pd.read_csv('data/aqi_data.csv')
air_quality.columns

Index(['Id', 'Mounths', 'PM10 in æg/m3', 'SO2 in æg/m3', 'NOx  in æg/m3',
       ' PM2.5  in æg/m3', 'Ammonia - NH3  in æg/m3', 'O3   in æg/m3',
       'CO  in mg/m3', ' Benzene  in æg/m3', 'AQI'],
      dtype='object')

In [4]:
# renaming columns
air_quality.columns = ['id', 'month', 'PM10', 'SO2', 'NOx', 'PM25', 'NH3', 'O3', 'CO', 'benzene', 'AQI']
air_quality

Unnamed: 0,id,month,PM10,SO2,NOx,PM25,NH3,O3,CO,benzene,AQI
0,1,Jan-17,174.0,26.4,35.0,79,25.0,107.6,0.90,0.70,149.0
1,2,Feb-17,143.0,35.1,40.3,75,31.0,103.0,0.90,0.90,129.0
2,3,Mar-17,142.0,32.1,30.9,59,26.0,80.7,0.80,0.50,128.0
3,4,Apr-17,117.0,50.9,36.3,75,36.0,79.5,0.90,0.70,111.0
4,5,May-17,,41.6,25.2,53,28.0,70.0,0.50,0.50,
...,...,...,...,...,...,...,...,...,...,...,...
67,68,Aug-22,89.0,12.0,35.0,22,26.0,15.2,0.64,0.09,67.0
68,69,Sep-22,92.0,13.0,37.3,30,31.0,10.0,0.70,0.06,76.0
69,70,Oct-22,93.0,12.0,30.7,46,28.0,9.7,0.67,0.07,101.0
70,71,Nov-22,87.0,10.4,28.2,78,30.0,12.1,1.52,0.03,166.0


In [5]:
# checking data types
air_quality.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 11 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   id       72 non-null     int64  
 1   month    72 non-null     object 
 2   PM10     66 non-null     float64
 3   SO2      71 non-null     float64
 4   NOx      70 non-null     float64
 5   PM25     72 non-null     int64  
 6   NH3      72 non-null     float64
 7   O3       72 non-null     float64
 8   CO       72 non-null     float64
 9   benzene  72 non-null     float64
 10  AQI      67 non-null     float64
dtypes: float64(8), int64(2), object(1)
memory usage: 6.3+ KB


In [6]:
air_quality.head()

Unnamed: 0,id,month,PM10,SO2,NOx,PM25,NH3,O3,CO,benzene,AQI
0,1,Jan-17,174.0,26.4,35.0,79,25.0,107.6,0.9,0.7,149.0
1,2,Feb-17,143.0,35.1,40.3,75,31.0,103.0,0.9,0.9,129.0
2,3,Mar-17,142.0,32.1,30.9,59,26.0,80.7,0.8,0.5,128.0
3,4,Apr-17,117.0,50.9,36.3,75,36.0,79.5,0.9,0.7,111.0
4,5,May-17,,41.6,25.2,53,28.0,70.0,0.5,0.5,


In [7]:
# selecting only non-null aqi values
aqi = air_quality.loc[air_quality.AQI.notnull()]

In [9]:
aqi.sort_values(by='AQI')

Unnamed: 0,id,month,PM10,SO2,NOx,PM25,NH3,O3,CO,benzene,AQI
66,67,Jul-22,76.0,9.3,30.3,23,26.0,15.7,0.69,0.03,58.0
18,19,Jul-18,123.0,14.5,31.3,22,33.0,7.9,0.20,0.40,65.6
67,68,Aug-22,89.0,12.0,35.0,22,26.0,15.2,0.64,0.09,67.0
19,20,Aug-18,171.0,16.5,28.8,19,24.0,12.6,0.30,0.20,75.0
68,69,Sep-22,92.0,13.0,37.3,30,31.0,10.0,0.70,0.06,76.0
...,...,...,...,...,...,...,...,...,...,...,...
60,61,Jan-22,87.0,13.2,29.2,62,16.0,14.6,0.54,0.05,146.0
0,1,Jan-17,174.0,26.4,35.0,79,25.0,107.6,0.90,0.70,149.0
12,13,Jan-18,,,,75,26.0,71.7,0.80,0.40,156.0
71,72,Dec-22,88.0,13.0,34.5,72,15.0,16.5,1.39,0.04,162.0


In [None]:
# creating new column with quality classification
# with for loop



In [33]:
aqi.loc[(aqi.AQI < 150) & (aqi.AQI > 100)]

Unnamed: 0,id,month,PM10,SO2,NOx,PM25,NH3,O3,CO,benzene,AQI
0,1,Jan-17,174.0,26.4,35.0,79,25.0,107.6,0.9,0.7,149.0
1,2,Feb-17,143.0,35.1,40.3,75,31.0,103.0,0.9,0.9,129.0
2,3,Mar-17,142.0,32.1,30.9,59,26.0,80.7,0.8,0.5,128.0
3,4,Apr-17,117.0,50.9,36.3,75,36.0,79.5,0.9,0.7,111.0
6,7,Jul-17,111.0,38.9,21.5,29,30.0,24.4,0.3,0.7,106.7
9,10,Oct-17,146.0,37.1,32.6,63,23.0,52.8,0.6,0.5,130.0
13,14,Feb-18,178.0,17.0,37.0,53,23.0,54.1,0.7,0.3,118.0
14,15,Mar-18,144.0,17.2,29.7,63,22.0,48.3,0.7,0.3,129.0
15,16,Apr-18,159.0,16.5,30.0,44,20.0,38.7,0.5,0.2,109.4
16,17,May-18,130.0,16.6,32.3,39,16.9,39.5,0.5,0.2,106.9
