In [1]:
import pandas as pd
import numpy as np

## Load new test data

In [2]:
df = pd.read_csv("tables/test_new.csv")

df.head()

Unnamed: 0,aerosol_type,date,Single_Scattering_Albedo[675nm],Absorption_Angstrom_Exponent_440-870nm,Refractive_Index-Real_Part[440nm],Refractive_Index-Real_Part[1020nm],Asymmetry_Factor-Total[440nm],Asymmetry_Factor-Total[1020nm],Asymmetry_Factor-Fine[440nm],Asymmetry_Factor-Fine[1020nm],Asymmetry_Factor-Coarse[1020nm],Lidar_Ratio[440nm],Lidar_Ratio[1020nm],Depolarization_Ratio[440nm]
0,UID,2009-01-29,0.326197,-0.691044,-0.583627,-0.239521,0.114841,-0.276759,0.748385,0.269895,2.892512,1.085532,0.085721,0.493442
1,UID,2009-01-29,0.446421,-0.550457,-0.630753,-0.230968,-0.046461,-0.412813,0.603878,0.097671,2.922754,1.112423,0.065184,0.508634
2,PD,2009-02-02,-0.306409,1.103142,-1.057732,0.334848,0.274713,0.562224,-0.298487,-0.360308,-0.830384,0.643415,0.104435,-0.317556
3,PD,2009-02-02,-0.379049,1.712591,-0.537057,0.227926,-0.141602,0.588199,-0.685105,0.078547,-0.652569,0.149747,0.171862,-0.422341
4,PD,2009-02-03,-2.563406,-0.458946,0.492129,0.845122,0.178504,0.679166,0.074416,-0.198958,0.271337,1.041449,0.542799,-0.499935


## Temporal analysis on aerosol types

In [3]:
# Choose COVID-19 start date
# Reference: https://www.yalemedicine.org/news/covid-timeline
covid_period_start = "03/11/2020"

In [4]:
# Get year column and flag to indicate if row is before or during COVID-19
df = df.assign(
    date = lambda x: pd.to_datetime(x["date"]),
    year = lambda x: x.date.dt.year,
    after_covid = lambda x: np.where((x["date"] - pd.Timestamp(covid_period_start)).dt.days >= 0, 1, 0),
)

In [5]:
# Check amount of data for each year
df.year.value_counts().sort_index()

2009    156
2010    273
2011     82
2012     49
2013     98
2014    224
2015    232
2016    139
2017     23
2018    167
2019    107
2020     27
2021     26
Name: year, dtype: int64

In [6]:
# Compare air composition before and during COVID-19
air_comp_covid = pd.DataFrame({
    "Pre-COVID-19 (%)": df[df["after_covid"] == 0].aerosol_type.value_counts(normalize=True)*100,
    "COVID-19 Period (%)": df[df["after_covid"] == 1].aerosol_type.value_counts(normalize=True)*100
}).fillna(0)

air_comp_covid

Unnamed: 0,Pre-COVID-19 (%),COVID-19 Period (%)
BBD,3.377948,0.0
BBW,6.9471,5.882353
MD,7.07457,5.882353
PD,50.98789,23.529412
UI,13.384321,50.0
UID,18.228171,14.705882


In [7]:
# Export results as CSV
air_comp_covid.to_csv("tables/air_comp_covid_table.csv", index=True)