# Determining the Effect of RH on Particle Factor Results

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from pathlib import Path
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
import numpy as np
import joblib
import os
from matplotlib.dates import date2num
from datetime import timedelta

mpl.rcParams['mathtext.default'] = 'regular'

sns.set("paper", "ticks", color_codes=True, palette='colorblind', font_scale=1.5)

%matplotlib inline

# Set the default paths
basepath = Path(os.path.dirname(os.path.realpath('__file__')))
datapath = basepath.parent.joinpath('data')

## Load the Data

In [2]:
df = pd.read_feather(datapath.joinpath("munged/tata2017015/final-data.feather")).set_index("index")

## Load the NMF Results

In [3]:
nmf = pd.read_feather(datapath.joinpath("output/tata2017015/nmf-tseries-results.feather")).set_index("index")

## Merge the two frames

In [4]:
merged = pd.merge(df[["rh_i", "temp_i"]], nmf[["Factor 1", "Factor 2", "Factor 3", "PM1_EST"]], left_index=True, right_index=True, how='outer')


## Perform a bootstrap analysis

In [104]:
n_samples = 5000
boot_pct = .01

results = pd.DataFrame()

for _ in range(n_samples):
    frame = merged.sample(frac=boot_pct, replace=True)
    
    # correlate everything
    corr = frame.corr()**2
    
    # keep only the columns of interest
    corr = corr.loc[["rh_i"], ["Factor 1", "Factor 2", "Factor 3"]]
    
    # append to results
    results = results.append(corr.reset_index().melt(id_vars=["index"]).dropna(), ignore_index=True)
    
print (results.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 3 columns):
index       15000 non-null object
variable    15000 non-null object
value       15000 non-null float64
dtypes: float64(1), object(2)
memory usage: 351.6+ KB
None


## Calculate the final values

In [105]:
results.groupby(['index', 'variable']).quantile([0.05, 0.5, 0.95])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
index,variable,Unnamed: 2_level_1,Unnamed: 3_level_1
rh_i,Factor 1,0.05,0.038541
rh_i,Factor 1,0.5,0.062524
rh_i,Factor 1,0.95,0.091042
rh_i,Factor 2,0.05,0.328693
rh_i,Factor 2,0.5,0.379471
rh_i,Factor 2,0.95,0.430283
rh_i,Factor 3,0.05,0.418435
rh_i,Factor 3,0.5,0.452349
rh_i,Factor 3,0.95,0.488238
