# Statistical performance of met data on severe, very poor and poor AQI

In [2]:
import sys
sys.path.append('..')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import datetime
from functions import dplot, stat_calc, aqi_calc, score_calc

# Use seaborn style defaults and set default figure size
sns.set_style("ticks")

In [3]:
aqi = pd.read_csv('./data/aqi_data_cleaned.csv')
met = pd.read_csv('./data/met_data_cleaned.csv')
pblh00 = pd.read_csv('./data/pblh00_data.csv')
pblh12 = pd.read_csv('./data/pblh12_data.csv')

In [4]:
aqi.set_index('datetime', inplace=True)
met.set_index('datetime', inplace=True)
pblh00.set_index('datetime', inplace=True)
pblh12.set_index('datetime', inplace=True)

In [5]:
aqi.head()

Unnamed: 0_level_0,mod_PM2.5,mod_PM10,mod_pm2,mod_pm2_stdev,mod_pm10,mod_pm10_stdev,mod_aqi_pm2,mod_aqi_pm10,quality_mod_pm25,quality_mod_pm10,obs_PM2.5,obs_PM10,obs_pm2,obs_pm2_stdev,obs_pm10,obs_pm10_stdev,obs_aqi_pm2,obs_aqi_pm10,quality_obs_pm25,quality_obs_pm10
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-11-01 23:00:00,90.07,181.35,127.504583,59.183333,219.6475,118.24375,305.771025,178.96735,4,2,159.32,270.6,179.44875,66.654167,335.294375,99.067708,345.716089,285.294375,4,3
2020-11-02 00:00:00,91.95,186.83,124.977083,59.065833,217.360833,118.30125,303.827377,177.45815,4,2,139.33,244.12,169.091667,63.607917,322.072708,97.627292,337.751492,272.072708,4,3
2020-11-02 01:00:00,102.19,208.51,122.575833,59.254167,215.445,118.876667,301.980816,176.1937,4,2,116.35,226.64,159.435,60.396667,311.264375,95.753542,330.325515,261.264375,4,3
2020-11-02 02:00:00,104.84,214.72,120.275833,59.52875,213.872083,119.80625,300.212116,175.155575,4,2,113.74,254.27,150.589583,58.083333,300.331042,95.091875,323.52339,250.331042,4,3
2020-11-02 03:00:00,99.08,200.06,117.87375,60.25125,212.123333,121.276667,292.819588,174.0014,3,2,129.55,304.04,143.366667,56.084167,291.565625,95.260208,317.968967,241.565625,4,3


In [6]:
met.head()

Unnamed: 0_level_0,mod_temp,obs_temp,mod_wd,obs_wd,mod_ws,obs_ws,mod_swdown,obs_swlower,obs_swupper,obs_lwupper,obs_lwlower
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-11-01 00:00:00,,,,,,,,,,,
2020-11-01 01:00:00,,,,,,,,,,,
2020-11-01 02:00:00,,,,,,,,,,,
2020-11-01 03:00:00,,,,,,,,,,,
2020-11-01 04:00:00,,,,,,,,,,,


In [7]:
pblh00.drop(['datetime.1'], inplace=True, axis=1)
pblh00.head()

Unnamed: 0_level_0,obs_pbl00,mod_pbl00
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-01,159.0,
2020-11-02,250.0,
2020-11-03,185.0,
2020-11-04,168.0,75.59265
2020-11-05,142.0,72.55289


In [8]:
pblh12.drop(['datetime.1'], inplace=True, axis=1)
pblh12.head()

Unnamed: 0_level_0,obs_pbl12,mod_pbl12
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-11-01 12:00:00,1823.0,
2020-11-02 12:00:00,1905.0,
2020-11-03 12:00:00,1764.0,1987.59082
2020-11-04 12:00:00,431.0,1609.97534
2020-11-05 12:00:00,1736.0,1637.60486


In [9]:
# # Add the missing values of datetime in PBLH00 and PBLH12

# idx = pd.date_range('2020-11-01 00:00:00','2021-01-31 23:00:00', freq='H')

# pblh00 = pblh00.reindex(idx, fill_value=np.nan)
# pblh12 = pblh12.reindex(idx, fill_value=np.nan)

In [10]:
met_aqi = pd.concat([aqi,met, pblh00, pblh12], axis=1)

In [11]:
met_aqi.head()

Unnamed: 0_level_0,mod_PM2.5,mod_PM10,mod_pm2,mod_pm2_stdev,mod_pm10,mod_pm10_stdev,mod_aqi_pm2,mod_aqi_pm10,quality_mod_pm25,quality_mod_pm10,...,obs_ws,mod_swdown,obs_swlower,obs_swupper,obs_lwupper,obs_lwlower,obs_pbl00,mod_pbl00,obs_pbl12,mod_pbl12
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-11-01 23:00:00,90.07,181.35,127.504583,59.183333,219.6475,118.24375,305.771025,178.96735,4.0,2.0,...,,,,,,,,,,
2020-11-02 00:00:00,91.95,186.83,124.977083,59.065833,217.360833,118.30125,303.827377,177.45815,4.0,2.0,...,,,,,,,,,,
2020-11-02 01:00:00,102.19,208.51,122.575833,59.254167,215.445,118.876667,301.980816,176.1937,4.0,2.0,...,,,,,,,,,,
2020-11-02 02:00:00,104.84,214.72,120.275833,59.52875,213.872083,119.80625,300.212116,175.155575,4.0,2.0,...,,,,,,,,,,
2020-11-02 03:00:00,99.08,200.06,117.87375,60.25125,212.123333,121.276667,292.819588,174.0014,3.0,2.0,...,,,,,,,,,,


In [12]:
met_aqi_severe_25 = met_aqi.loc[(met_aqi['quality_obs_pm25'] == 5)]
met_aqi_verypoor_25 = met_aqi.loc[(met_aqi['quality_obs_pm25'] == 4)]
met_aqi_poor_25 = met_aqi.loc[(met_aqi['quality_obs_pm25'] == 3)]

met_aqi_severe_10 = met_aqi.loc[(met_aqi['quality_obs_pm10'] == 5)]
met_aqi_verypoor_10 = met_aqi.loc[(met_aqi['quality_obs_pm10'] == 4)]
met_aqi_poor_10 = met_aqi.loc[(met_aqi['quality_obs_pm10'] == 3)]

In [24]:
met_aqi_poor_10.tail()

Unnamed: 0_level_0,mod_PM2.5,mod_PM10,mod_pm2,mod_pm2_stdev,mod_pm10,mod_pm10_stdev,mod_aqi_pm2,mod_aqi_pm10,quality_mod_pm25,quality_mod_pm10,...,obs_ws,mod_swdown,obs_swlower,obs_swupper,obs_lwupper,obs_lwlower,obs_pbl00,mod_pbl00,obs_pbl12,mod_pbl12
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-31 19:00:00,262.13,477.64,169.55625,94.367917,304.63125,181.144583,338.108756,254.63125,4.0,3.0,...,,,,,,,,,,
2021-01-31 20:00:00,280.5,515.65,171.562083,97.25125,308.490417,186.55875,339.651242,258.490417,4.0,3.0,...,,,,,,,,,,
2021-01-31 21:00:00,280.55,531.1,173.837083,98.862083,313.481667,190.334167,341.400717,263.481667,4.0,3.0,...,,,,,,,,,,
2021-01-31 22:00:00,317.31,602.42,178.011667,101.294583,322.102083,195.576667,344.610972,272.102083,4.0,3.0,...,,,,,,,,,,
2021-01-31 23:00:00,346.14,654.82,183.647083,105.1075,333.397917,202.8175,348.944607,283.397917,4.0,3.0,...,,,,,,,,,,


In [14]:
temp_aqi_severe_25 = met_aqi_severe_25[['mod_temp', 'obs_temp']]
swdown_aqi_severe_25 = met_aqi_severe_25[['mod_swdown', 'obs_swupper']]
wd_aqi_severe_25 = met_aqi_severe_25[['mod_wd', 'obs_wd']]
ws_aqi_severe_25 = met_aqi_severe_25[['mod_ws', 'obs_ws']]
pblh00_aqi_severe_25 = met_aqi_severe_25[['mod_pbl00', 'obs_pbl00']]
pblh12_aqi_severe_25 = met_aqi_severe_25[['mod_pbl12', 'obs_pbl12']]


temp_aqi_severe_10 = met_aqi_severe_10[['mod_temp', 'obs_temp']]
swdown_aqi_severe_10 = met_aqi_severe_10[['obs_PM10', 'mod_swdown', 'obs_swupper']]
wd_aqi_severe_10 = met_aqi_severe_10[['mod_wd', 'obs_wd']]
ws_aqi_severe_10 = met_aqi_severe_10[['mod_ws', 'obs_ws']]
pblh00_aqi_severe_10 = met_aqi_severe_10[['mod_pbl00', 'obs_pbl00']]
pblh12_aqi_severe_10 = met_aqi_severe_10[['mod_pbl12', 'obs_pbl12']]

In [15]:
temp_aqi_verypoor_25 = met_aqi_verypoor_25[['mod_temp', 'obs_temp']]
swdown_aqi_verypoor_25 = met_aqi_verypoor_25[['mod_swdown', 'obs_swupper']]
wd_aqi_verypoor_25 = met_aqi_verypoor_25[['mod_wd', 'obs_wd']]
ws_aqi_verypoor_25 = met_aqi_verypoor_25[['mod_ws', 'obs_ws']]
pblh00_aqi_verypoor_25 = met_aqi_verypoor_25[['mod_pbl00', 'obs_pbl00']]
pblh12_aqi_verypoor_25 = met_aqi_verypoor_25[['mod_pbl12', 'obs_pbl12']]


temp_aqi_verypoor_10 = met_aqi_verypoor_10[['mod_temp', 'obs_temp']]
swdown_aqi_verypoor_10 = met_aqi_verypoor_10[['obs_PM10', 'mod_swdown', 'obs_swupper']]
wd_aqi_verypoor_10 = met_aqi_verypoor_10[['mod_wd', 'obs_wd']]
ws_aqi_verypoor_10 = met_aqi_verypoor_10[['mod_ws', 'obs_ws']]
pblh00_aqi_verypoor_10 = met_aqi_verypoor_10[['mod_pbl00', 'obs_pbl00']]
pblh12_aqi_verypoor_10 = met_aqi_verypoor_10[['mod_pbl12', 'obs_pbl12']]

In [16]:
temp_aqi_poor_25 = met_aqi_poor_25[['mod_temp', 'obs_temp']]
swdown_aqi_poor_25 = met_aqi_poor_25[['mod_swdown', 'obs_swupper']]
wd_aqi_poor_25 = met_aqi_poor_25[['mod_wd', 'obs_wd']]
ws_aqi_poor_25 = met_aqi_poor_25[['mod_ws', 'obs_ws']]
pblh00_aqi_poor_25 = met_aqi_poor_25[['mod_pbl00', 'obs_pbl00']]
pblh12_aqi_poor_25 = met_aqi_poor_25[['mod_pbl12', 'obs_pbl12']]


temp_aqi_poor_10 = met_aqi_poor_10[['mod_temp', 'obs_temp']]
swdown_aqi_poor_10 = met_aqi_poor_10[['obs_PM10', 'mod_swdown', 'obs_swupper']]
wd_aqi_poor_10 = met_aqi_poor_10[['mod_wd', 'obs_wd']]
ws_aqi_poor_10 = met_aqi_poor_10[['mod_ws', 'obs_ws']]
pblh00_aqi_poor_10 = met_aqi_poor_10[['mod_pbl00', 'obs_pbl00']]
pblh12_aqi_poor_10 = met_aqi_poor_10[['mod_pbl12', 'obs_pbl12']]

## Severe

In [63]:
stat_calc.get_stat_performance(temp_aqi_severe_25.dropna())

Enter the column for model value: mod_temp
Enter the column for observation value: obs_temp
Mean bias :
mean bias 3.5235392878763743
fractional bias is :
fractional bias =  0.18127213441678344
Correlation coefficient is :
corr coeff =  0.9588720458247789  and p-value is =  4.490921850135477e-200
RMSE is :
RMSE  3.81244303011223
NMSE is :
NMSE  0.038787608620465444


In [64]:
stat_calc.get_stat_performance(swdown_aqi_severe_25.dropna())

Enter the column for model value: mod_swdown
Enter the column for observation value: obs_swupper
Mean bias :
mean bias 30.51268645856179
fractional bias is :
fractional bias =  0.25413841016424105
Correlation coefficient is :
corr coeff =  0.9016634277542039  and p-value is =  7.440219416144086e-134
RMSE is :
RMSE  105.74824762813535
NMSE is :
NMSE  0.7884890837752522


In [18]:
stat_calc.get_stat_performance(ws_aqi_severe_25.dropna())

Enter the column for model value: mod_ws
Enter the column for observation value: obs_ws
Mean bias :
mean bias 0.428503683163187
fractional bias is :
fractional bias =  0.24902708536238408
Correlation coefficient is :
corr coeff =  0.5907571008700996  and p-value is =  1.281055999242866e-35
RMSE is :
RMSE  1.0832391155715761
NMSE is :
NMSE  0.4025488571758101


In [19]:
stat_calc.get_stat_performance(wd_aqi_severe_25.dropna())

Enter the column for model value: mod_wd
Enter the column for observation value: obs_wd
Mean bias :
mean bias 21.65737314074175
fractional bias is :
fractional bias =  0.10502790806132109
Correlation coefficient is :
corr coeff =  0.5566648643681016  and p-value is =  5.246996982726047e-31
RMSE is :
RMSE  87.80883638089006
NMSE is :
NMSE  0.18183338825694326


In [27]:
stat_calc.get_stat_performance(pblh00_aqi_severe_25.dropna())

Enter the column for model value: mod_pbl00
Enter the column for observation value: obs_pbl00
Mean bias :
mean bias nan
fractional bias is :
fractional bias =  nan
Correlation coefficient is :
corr coeff =  nan  and p-value is =  NA
RMSE is :
RMSE  nan
NMSE is :
NMSE  nan


In [28]:
stat_calc.get_stat_performance(pblh12_aqi_severe_25.dropna())

Enter the column for model value: mod_pbl12
Enter the column for observation value: obs_pbl12
Mean bias :
mean bias 496.0730961111111
fractional bias is :
fractional bias =  0.6062133833547804
Correlation coefficient is :
corr coeff =  0.580966611864326  and p-value is =  0.011458724374619778
RMSE is :
RMSE  660.4346834438411
NMSE is :
NMSE  0.717254311421345


## Very Poor

In [65]:
stat_calc.get_stat_performance(temp_aqi_verypoor_25.dropna())

Enter the column for model value: mod_temp
Enter the column for observation value: obs_temp
Mean bias :
mean bias 2.9531417759216625
fractional bias is :
fractional bias =  0.1658820542329685
Correlation coefficient is :
corr coeff =  0.9518163650528425  and p-value is =  0.0
RMSE is :
RMSE  3.356581894462419
NMSE is :
NMSE  0.035795022415447394


In [66]:
stat_calc.get_stat_performance(swdown_aqi_verypoor_25.dropna())

Enter the column for model value: mod_swdown
Enter the column for observation value: obs_swupper
Mean bias :
mean bias 17.41011831161625
fractional bias is :
fractional bias =  0.1295127678970581
Correlation coefficient is :
corr coeff =  0.8852613823875675  and p-value is =  0.0
RMSE is :
RMSE  107.96989607801801
NMSE is :
NMSE  0.6478166217098711


In [68]:
stat_calc.get_stat_performance(ws_aqi_verypoor_25.dropna())

Enter the column for model value: mod_ws
Enter the column for observation value: obs_ws
Mean bias :
mean bias 0.3675479145606447
fractional bias is :
fractional bias =  0.17667312673669794
Correlation coefficient is :
corr coeff =  0.6495434683790441  and p-value is =  4.934531589012247e-131
RMSE is :
RMSE  1.1346945297305713
NMSE is :
NMSE  0.2998287193575404


In [69]:
stat_calc.get_stat_performance(wd_aqi_verypoor_25.dropna())

Enter the column for model value: mod_wd
Enter the column for observation value: obs_wd
Mean bias :
mean bias 9.536666853355312
fractional bias is :
fractional bias =  0.04149050556987179
Correlation coefficient is :
corr coeff =  0.5986605515807153  and p-value is =  3.6302985768826624e-104
RMSE is :
RMSE  76.64101267494802
NMSE is :
NMSE  0.11122787628339194


In [70]:
stat_calc.get_stat_performance(pblh00_aqi_verypoor_25.dropna())

Enter the column for model value: mod_pbl00
Enter the column for observation value: obs_pbl00
Mean bias :
mean bias nan
fractional bias is :
fractional bias =  nan
Correlation coefficient is :
corr coeff =  nan  and p-value is =  NA
RMSE is :
RMSE  nan
NMSE is :
NMSE  nan


In [71]:
stat_calc.get_stat_performance(pblh12_aqi_verypoor_25.dropna())

Enter the column for model value: mod_pbl12
Enter the column for observation value: obs_pbl12
Mean bias :
mean bias 198.35930846938777
fractional bias is :
fractional bias =  0.20135788464849552
Correlation coefficient is :
corr coeff =  0.6126640468694087  and p-value is =  2.8840054249689964e-06
RMSE is :
RMSE  553.8662429003031
NMSE is :
NMSE  0.3193496773760288


## Poor

In [73]:
stat_calc.get_stat_performance(temp_aqi_poor_25.dropna())

Enter the column for model value: mod_temp
Enter the column for observation value: obs_temp
Mean bias :
mean bias 3.859186979403162
fractional bias is :
fractional bias =  0.25004322258673173
Correlation coefficient is :
corr coeff =  0.8990352330213308  and p-value is =  5.329639059712011e-92
RMSE is :
RMSE  4.270902730831546
NMSE is :
NMSE  0.07778927122930838


In [17]:
stat_calc.get_stat_performance(swdown_aqi_poor_25.dropna())

Enter the column for model value: mod_swdown
Enter the column for observation value: obs_swupper
Mean bias :
mean bias 34.905761436877164
fractional bias is :
fractional bias =  0.2542039767604041
Correlation coefficient is :
corr coeff =  0.8704069683950468  and p-value is =  3.298981145204252e-79
RMSE is :
RMSE  121.63244320125659
NMSE is :
NMSE  0.7975218474085858


In [19]:
stat_calc.get_stat_performance(ws_aqi_poor_25.dropna())

Enter the column for model value: mod_ws
Enter the column for observation value: obs_ws
Mean bias :
mean bias 0.03420270848379442
fractional bias is :
fractional bias =  0.012918273008029103
Correlation coefficient is :
corr coeff =  0.6406825653808546  and p-value is =  1.2322652945292058e-30
RMSE is :
RMSE  1.32213471154406
NMSE is :
NMSE  0.24937760713761428


In [21]:
stat_calc.get_stat_performance(wd_aqi_poor_25.dropna())

Enter the column for model value: mod_wd
Enter the column for observation value: obs_wd
Mean bias :
mean bias 6.883327974150192
fractional bias is :
fractional bias =  0.025728627127556743
Correlation coefficient is :
corr coeff =  0.3856027174889004  and p-value is =  2.1490490423688932e-10
RMSE is :
RMSE  88.69330519708912
NMSE is :
NMSE  0.10992332486105659


In [22]:
stat_calc.get_stat_performance(pblh00_aqi_poor_25.dropna())

Enter the column for model value: mod_pbl00
Enter the column for observation value: obs_pbl00
Mean bias :
mean bias nan
fractional bias is :
fractional bias =  nan
Correlation coefficient is :
corr coeff =  nan  and p-value is =  NA
RMSE is :
RMSE  nan
NMSE is :
NMSE  nan


In [23]:
stat_calc.get_stat_performance(pblh12_aqi_poor_25.dropna())

Enter the column for model value: mod_pbl12
Enter the column for observation value: obs_pbl12
Mean bias :
mean bias 417.78380727272724
fractional bias is :
fractional bias =  0.5088825256705019
Correlation coefficient is :
corr coeff =  0.5756187983096224  and p-value is =  0.06388288823026537
RMSE is :
RMSE  530.0206871674706
NMSE is :
NMSE  0.4456411805770959
