In [None]:
import glob
import numpy as np
import hyss_util as hu
from datetime import datetime
from sklearn.decomposition import PCA, FactorAnalysis, FastICA
from sklearn.model_selection import train_test_split
from plotting import set_defaults
import pandas as pd
from pandas.plotting import scatter_matrix
from scipy import stats
from scipy.optimize import minimize
import statsmodels.formula.api as sm
import random
import emcee
import corner
from IPython.display import display, Math
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# -- set the types

kind = "veg"
comp = "bld"
runpca = False
runfan = False
runica = False
getndvi = False

In [None]:
# -- get wavelengths
waves = hu.read_header("../../../image_files/veg_00000.hdr")["waves"]

# -- define good scans
good = np.array([int(i) for i in np.load("../../../gdobler_output/output/good_scans.npy")])

In [None]:
def normalize_spectra(specs):
    ms, bs = [], []
    for ii in range(specs.shape[0]):
        m = specs[0].mean()/specs[ii].mean()
        b = 0.0
        ms.append(m)
        bs.append(b)
        
    ms   = np.array(ms)
    bs   = np.array(bs)
    norm = specs*ms[:,np.newaxis] + bs[:,np.newaxis]
    rat  = norm/norm[0]
    return rat

In [None]:
# -- load comparison set from bld (left box)
print("getting {0} spectra...".format(comp))
try:
    blds_left = np.load("../../../gdobler_output/output/blds_left.npy")
except:
    blds_left = np.array([np.load(i)[:,:,:78].mean(-1).mean(-1) for i in
                    sorted(glob.glob("../../../gdobler_output/output/{0}_specs/{0}_specs*.npy"\
                                    .format(comp))) if "_avg" not in i])
    blds_left = blds_left[good]
    # np.save("../../../gdobler_output/output/blds_left.npy",blds)

In [None]:
# -- normalize spectra
#ms, bs = [], []
#for ii in range(blds_left.shape[0]):
#    # m, b = np.polyfit(blds[ii,:100],blds[0,:100],1)
#    # m, b = np.polyfit(blds[ii],blds[0],1)
#    m = blds_left[0].mean()/blds_left[ii].mean()
#    b = 0.0
#    ms.append(m)
#    bs.append(b)

#ms   = np.array(ms)
#bs   = np.array(bs)
#norm = blds_left*ms[:,np.newaxis] + bs[:,np.newaxis]
#rat  = norm/norm[0]

lbrat = normalize_spectra(blds_left)

In [None]:
# -- get vegetation spectra
print("getting {0} spectra...".format(kind))
if kind=="veg":
    vegs = np.load("../../../gdobler_output/output/veg_patch_specs.npy")
else:
    vegs = np.array([np.load(i) for i in
                     sorted(glob.glob("../../../gdobler_output/output/" + 
                                      "{0}_specs/{0}_specs_avg*.npy" \
                                      .format(kind)))])
    vegs = vegs[good]

In [None]:
# -- normalize spectra
#ss, os = [], []
#for ii in range(vegs.shape[0]):
#    # s, o = np.polyfit(vegs[ii,:100],vegs[0,:100],1)
#    # s, o = np.polyfit(vegs[ii],vegs[0],1)
#    s = vegs[0].mean()/vegs[ii].mean()
#    o = 0.0
#    ss.append(s)
#    os.append(o)

#ss    = np.array(ss)
#os    = np.array(os)
#vnorm = vegs*ss[:,np.newaxis] + os[:,np.newaxis]
#vrat  = vnorm/vnorm[0]

vrat = normalize_spectra(vegs)

In [None]:
# -- get comparison spectra from right building
print("getting {0} spectra...".format(comp))
try:
    blds_right = np.load("../../../gdobler_output/output/blds_right.npy")
except:
    blds_right = np.array([np.load(i)[:,:,78:].mean(-1).mean(-1) for i in
                     sorted(glob.glob("../../../gdobler_output/output/{0}_specs/{0}_specs*.npy"\
                                          .format(comp))) if "_avg" not in i])
    blds_right = blds_right[good]
    # np.save("../../../gdobler_output/output/blds_right.npy",vegs)

In [None]:
# -- normalize spectra
#ss, os = [], []
#for ii in range(vegs.shape[0]):
#    # s, o = np.polyfit(vegs[ii,:100],vegs[0,:100],1)
#    # s, o = np.polyfit(vegs[ii],vegs[0],1)
#    s = blds_right[0].mean()/blds_right[ii].mean()
#    o = 0.0
#    ss.append(s)
#    os.append(o)

#ss    = np.array(ss)
#os    = np.array(os)
#vnorm = blds_right*ss[:,np.newaxis] + os[:,np.newaxis]
#vrat  = vnorm/vnorm[0]

rbrat = normalize_spectra(blds_right)

In [None]:
# -- take the ratio of ratios
D_vrat = vrat/lbrat   # vegetation to left buildings
D_brat = rbrat/lbrat  # right buildings to left buildings

#### Normalize and plot air quality

In [None]:
# -- get some ancillary data
sc     = pd.read_csv("../../../gdobler_output/output/scan_conditions.csv")
sc_sub = sc[sc.filename.isin(["veg_{0:05}.raw".format(i) for i in good])]

temps = sc_sub.temperature.values
humid = sc_sub.humidity.values
pm25  = sc_sub.pm25.values
o3    = sc_sub.o3.values
secs  = []
for stime in sc_sub.time.values:
    yr, mo, dy, tm = stime.split()
    stime_fmt      = "{0} {1} {2:02} {3}:00".format(yr,mo,int(dy),tm)
    obs_dt         = datetime.strptime(stime_fmt,"%Y %b %d %H:%M:%S")
    secs.append(float(obs_dt.strftime("%s")))
secs = np.array(secs)

In [None]:
# create air quality arrays

#normalize air quality arrays

o3 = stats.zscore(o3)
pm25 = stats.zscore(pm25)
temps = stats.zscore(temps)
humid = stats.zscore(humid)

aq_opth1  = np.vstack([o3,pm25,temps,humid,np.ones_like(o3)]).T
aq_pth1 = np.vstack([pm25,temps,humid,np.ones_like(o3)]).T
aq_op1 = np.vstack([o3,pm25,np.ones_like(o3)]).T

In [None]:
opth = pd.DataFrame({'O3': o3, 'PM2.5': pm25, 'Temp': temps, 'Humidity':humid})
opth.describe()

In [None]:
scatter_matrix(opth, alpha=0.6, figsize=(10,10), diagonal='hist')
plt.show()

In [None]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(9,12))

ax1.plot(o3,color="darkred",lw=1)
#ax1.set_xlabel("scan number")
ax1.set_ylabel('normalized O3')
ax1.set_title('O3')

ax2.plot(pm25,color="darkred",lw=1)
#ax2.set_xlabel("scan number")
ax2.set_ylabel('normalized PM2.5')
ax2.set_title('PM2.5')

ax3.plot(temps,color="darkred",lw=1)
ax.set_xlabel("scan number")
ax3.set_ylabel('normalized Temps')
ax3.set_title('Temperature')

ax4.plot(humid,color="darkred",lw=1)
ax4.set_xlabel("scan number")
ax4.set_ylabel('normalized Humidity')
ax4.set_title('Humidity')

plt.show()

In [None]:
fig = plt.subplots(figsize=[10,3])
lino, = plt.plot(o3, color="yellow", lw=1)
linp, = plt.plot(pm25, color="black", lw=1)
lint, = plt.plot(temps, color="red", lw=1)
linh, = plt.plot(humid, color="blue", lw=1)
plt.title("Air quality variables")
plt.xlabel("scan number")
plt.ylabel("normalized AQ")
plt.legend([lino,linp,lint,linh], ["O3","PM2.5","Temp","Humid"], loc="upper left", fontsize=12)
plt.show()

## (1)  D(750) / D(1000)   --||vs||--   O3, PM2.5, Temps, Humid (ALL)

$brightness = w_{O3} * O_3 + w_{PM25} * PM2.5 + w_{T} * Temp + w_{H} * Humid + b + \epsilon$

$\epsilon$ is a Gaussian noise term such that:

observed = true + random.normal(0, $\epsilon$, number_of_observations)

In [None]:
aq_opth1  = np.vstack([o3,pm25,temps,humid,np.ones_like(o3)]).T

### Linear Least Squares - no errors

In [None]:
print(waves[500], waves[-150])
print(waves[430], waves[570])

ind_ir  = np.argmin(np.abs(waves-860.))
ind_vis = np.argmin(np.abs(waves-670.))

In [None]:
# calculate differential ratios
# D(750)/D(1000)

D_veg = D_vrat[:,500] / D_vrat[:,-1]
D_bld = D_brat[:,500] / D_brat[:,-1]

In [None]:
# normalize differential ratios

D_veg = stats.zscore(D_veg)
D_bld = stats.zscore(D_bld)

In [None]:
ind  = np.arange(len(D_veg))
solv  = np.linalg.lstsq(aq_opth1[ind], D_veg[ind], rcond=None)
predv = np.dot(aq_opth1[ind], solv[0])

# coefficient of determination r2
#rsq  = 1.0-((brightness-pred)**2).sum() / \
#     ((brightness-brightness.mean())**2).sum()
r2   = 1 - solv[1] / (len(D_veg) * D_veg.var())
print("sol = ", solv)
#print("rsq = ", rsq)
print("r2  = ", r2)

In [None]:
ind  = np.arange(len(D_bld))
solb  = np.linalg.lstsq(aq_opth1[ind], D_bld[ind], rcond=None)
predb = np.dot(aq_opth1[ind], solb[0])

# coefficient of determination r2
#rsq  = 1.0-((brightness-pred)**2).sum() / \
#     ((brightness-brightness.mean())**2).sum()
r2   = 1 - solb[1] / (len(D_bld) * D_bld.var())
print("sol = ", solb)
#print("rsq = ", rsq)
print("r2  = ", r2)

In [None]:
fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=(4,12))

ax1.plot(D_veg,color="darkred",lw=1)
ax1.plot(predv,color="dodgerblue",lw=2)
ax1.set_ylim(-4,4)
ax1.set_xlim(0,predv.size)
ax1.set_xlabel("scan number")
#ax1.set_ylabel(r'$D(\lambda=0.75\mu$m$)/D(\lambda=1.0\mu$m$)$')
ax1.set_ylabel(r'$D_{750}/D_{1000}$')
ax1.set_title('Vegetation')

ax2.plot(D_bld,color="darkred",lw=1)
ax2.plot(predb,color="dodgerblue",lw=2)
ax2.set_ylim(-4,4)
ax2.set_xlim(0,predb.size)
ax2.set_xlabel("scan number")
ax2.set_ylabel(r'$D_{750}/D_{1000}$')
ax2.set_title('Buildings')

plt.show()

### Maximum Likelihood Estimation with brightness

In [None]:
def log_prior(theta):
    beta_o3, beta_pm25, beta_temps, beta_humid, alpha, eps = theta
    if eps <= 0:
        return -np.inf
    return 0.0

def log_likelihood(theta, o3, pm25, temps, humid, y):
    beta_o3, beta_pm25, beta_temps, beta_humid, alpha, eps = theta
    model = beta_o3 * o3 + beta_pm25 * pm25 + beta_temps * temps + beta_humid * humid + alpha
    denom = eps ** 2
    lp = -0.5 * sum(((y - model) ** 2) / denom + np.log(denom) + np.log(2 * np.pi))
    return lp

def log_probability(theta, o3, pm25, temps, humid, y):
    lp = log_prior(theta)
    if not np.isfinite(lp):
        return -np.inf
    return lp + log_likelihood(theta, o3, pm25, temps, humid, y)

In [None]:
# -- obtaining uncertainty for brightness

diffv = D_veg[1:] - D_veg[:-1]
verr = diffv.std()/np.sqrt(2.0)
print("error in vegetation brightness = ", verr)

diffb = D_bld[1:] - D_bld[:-1]
berr = diffb.std()/np.sqrt(2.0)
print("error in building brightness = ", berr)

In [None]:
# -- obtaining uncertainty for air quality

templates  = np.vstack([o3,pm25,temps,humid,np.ones_like(o3)]).T
diff_air = templates[1:] - templates[:-1]
air_err = diff_air.std(0)/np.sqrt(2.0)
print("errors in air qaulity = ", air_err)

In [None]:
# -- numerical optimum of likelihood function

np.random.seed(314)
nll = lambda *args: -log_likelihood(*args)
initialv = np.append(solv[0], verr)
resultv = minimize(nll, initialv, args=(aq_opth1[:,0], aq_opth1[:,1], aq_opth1[:,2], 
                                    aq_opth1[:,3], D_veg))
beta_o3_v, beta_pm25_v, beta_temps_v, beta_humid_v, alpha_v, epsilon_v = resultv.x

print("Maximum likelihood estimates for Vegetation:")
print("w_o3    =  {0:.5f}".format(beta_o3_v))
print("w_pm25  = {0:.5f}".format(beta_pm25_v))
print("w_temps = {0:.5f}".format(beta_temps_v))
print("w_humid =  {0:.5f}".format(beta_humid_v))
print("b       =  {0:.5f}".format(alpha_v))
print("e       =  {0:.5f}".format(epsilon_v))

np.random.seed(413)
nll = lambda *args: -log_likelihood(*args)
initialb = np.append(solb[0], berr)
resultb = minimize(nll, initialb, args=(aq_opth1[:,0], aq_opth1[:,1], aq_opth1[:,2], 
                                    aq_opth1[:,3], D_bld))
beta_o3_b, beta_pm25_b, beta_temps_b, beta_humid_b, alpha_b, epsilon_b = resultb.x
print("Maximum likelihood estimates for Buildings:")
print("w_o3    =  {0:.5f}".format(beta_o3_b))
print("w_pm25  = {0:.5f}".format(beta_pm25_b))
print("w_temps = {0:.5f}".format(beta_temps_b))
print("w_humid =  {0:.5f}".format(beta_humid_b))
print("b       =  {0:.5f}".format(alpha_b))
print("e       =  {0:.5f}".format(epsilon_b))

### MCMC

In [None]:
nwalkers, ndim = 50, 6
pv0 = resultv.x + 1e-4 * np.random.randn(nwalkers, ndim)
pb0 = resultb.x + 1e-4 * np.random.randn(nwalkers, ndim)

In [None]:
# -- **Vegetation**
# -- Sample the distribution using emcee
# -- start by initializing the walkers in a small Gaussian ball around the maximum likelihood result
# -- then run 5000 steps of MCMC

import emcee

samplerv = emcee.EnsembleSampler(nwalkers, ndim, log_probability, 
                                args=(aq_opth1[:,0], aq_opth1[:,1],
                                      aq_opth1[:,2], aq_opth1[:,3], D_veg))
posv, probv, statev = samplerv.run_mcmc(pv0, 5000)

In [None]:
# -- **Buildings**
# -- Sample the distribution using emcee
# -- start by initializing the walkers in a small Gaussian ball around the maximum likelihood result
# -- then run 5000 steps of MCMC

import emcee

samplerb = emcee.EnsembleSampler(nwalkers, ndim, log_probability, 
                                args=(aq_opth1[:,0], aq_opth1[:,1],
                                      aq_opth1[:,2], aq_opth1[:,3], D_bld))
posb, probb, stateb = samplerb.run_mcmc(pb0, 5000)

In [None]:
fig, axes = plt.subplots(ndim, 2, figsize=(10, 13), sharex=True)
labels = [r'$w_{O_3}$', r'$w_{PM2.5}$', r'$w_{temps}$', r'$w_{humid}$', r'$b$', r'$\epsilon$']
samplesv = samplerv.get_chain()
samplesb = samplerb.get_chain()
for i in range(ndim):
    ax = axes[i, 0]
    ax.plot(samplesv[:, :, i], "k", alpha=0.3)
    ax.set_xlim(0, len(samplesv))
    ax.set_ylabel(labels[i])
    ax.yaxis.set_label_coords(-0.1, 0.5)

for i in range(ndim):
    ax = axes[i, 1]
    ax.plot(samplesb[:, :, i], "k", alpha=0.3)
    ax.set_xlim(0, len(samplesb))
    ax.set_ylabel(labels[i])
    ax.yaxis.set_label_coords(-0.1, 0.5)
    
axes[-1,0].set_xlabel("step number")
axes[-1,1].set_xlabel("step number")
axes[0,0].set_title("Vegetation")
axes[0,1].set_title("Buildings")
plt.show()

In [None]:
tau = samplerv.get_autocorr_time()
print(tau)
tau = samplerb.get_autocorr_time()
print(tau)

In [None]:
# -- discard initial 200 steps (~3 x tau)
# -- thin by 30 (about half the autocorrelation time)
# -- flatten chain

flat_samplesv = samplerv.get_chain(discard=200, thin=30, flat=True)
print(flat_samplesv.shape)
flat_samplesb = samplerb.get_chain(discard=200, thin=30, flat=True)
print(flat_samplesb.shape)

In [None]:
fig = corner.corner(flat_samplesv, labels=labels, truths=np.median(flat_samplesv, axis=0))
plt.title("Vegetation")

In [None]:
fig = corner.corner(flat_samplesb, labels=labels, truths=np.median(flat_samplesb, axis=0))
plt.title("Buildings")

In [None]:
from IPython.display import display, Math

labels = ["w_{O_3}", "w_{PM2.5}", "w_{temps}", "w_{humid}", "b", "\epsilon"]
Print("Vegetation")
for i in range(ndim):
    mcmc = np.percentile(flat_samplesv[:,i], [16, 50, 84])
    q = np.diff(mcmc)
    txt = "\mathrm{{{3}}} = {0:.4f}_{{-{1:.4f}}}^{{{2:.4f}}}"
    txt = txt.format(mcmc[1], q[0], q[1], labels[i])
    display(Math(txt))

print("")
Print("Buildings")
for i in range(ndim):
    mcmc = np.percentile(flat_samplesb[:,i], [16, 50, 84])
    q = np.diff(mcmc)
    txt = "\mathrm{{{3}}} = {0:.4f}_{{-{1:.4f}}}^{{{2:.4f}}}"
    txt = txt.format(mcmc[1], q[0], q[1], labels[i])
    display(Math(txt))

In [None]:
fig, ((ax1,ax2)) = plt.subplots(1, 2, figsize=[10,3])
linb, = ax1.plot(D_veg,color="darkred",lw=1)
inds = np.random.randint(len(flat_samplesv), size=800)
for ind in inds:
    sample = flat_samplesv[ind]
    linp, = ax1.plot(np.dot(aq_opth1, sample[:5]), color="dodgerblue", lw=0.3)
ax1.set_ylim(-4, 4)
ax1.set_xlim(0,predv.size)
ax1.set_xlabel("scan number")
ax1.set_ylabel(r'$D_{750} / D_{1000}$')
ax1.set_legend([linb,linp],["data","model"],loc="upper left",fontsize=12)

linb, = ax2.plot(D_bld,color="darkred",lw=1)
inds = np.random.randint(len(flat_samplesb), size=800)
for ind in inds:
    sample = flat_samplesb[ind]
    linp, = ax2.plot(np.dot(aq_opth1, sample[:5]), color="dodgerblue", lw=0.3)
ax2.set_ylim(-4, 4)
ax2.set_xlim(0,predb.size)
ax2.set_title("Buildings")
ax2.set_xlabel("scan number")
ax2.set_ylabel(r'$D_{750} / D_{1000}$')
ax2.set_legend([linb,linp],["data","model"],loc="upper left",fontsize=12)

plt.show()

In [None]:
# Getting 50th percentile values to use for prediction, and 16th and 84th differences for uncertainty
mvals_v = np.zeros(ndim)
err_vals_v = np.zeros(ndim)
for i in range(ndim):
    mcmc = np.percentile(flat_samplesv[:,i], [16, 50, 84])
    q = np.diff(mcmc)
    mvals_v[i] = mcmc[1]
    err_vals_v[i] = (abs(q[0]) + abs(q[1])) / 2
print(mvals_v)
print(err_vals_v)

mvals_b = np.zeros(ndim)
err_vals_b = np.zeros(ndim)
for i in range(ndim):
    mcmc = np.percentile(flat_samplesb[:,i], [16, 50, 84])
    q = np.diff(mcmc)
    mvals_b[i] = mcmc[1]
    err_vals_b[i] = (abs(q[0]) + abs(q[1])) / 2
print(mvals_b)
print(err_vals_b)

In [None]:
# calculate prediction and uncertainty
def err_mult(m, merr, x, xerr):
    err = abs(m * x) * np.sqrt((merr/m)**2 + (xerr/x)**2)
    return err

pred_v = np.dot(aq_opth1, mvals_v[:5])
pred_v_err = np.zeros(pred_v.shape)
for i in range(aq_opth1.shape[0]):
    mO_err = err_mult(mvals_v[0], err_vals_v[0], aq_opth1[i,0], air_err[0])
    mp_err = err_mult(mvals_v[1], err_vals_v[1], aq_opth1[i,1], air_err[1])
    mt_err = err_mult(mvals_v[2], err_vals_v[2], aq_opth1[i,2], air_err[2])
    mh_err = err_mult(mvals_v[3], err_vals_v[3], aq_opth1[i,3], air_err[3])
    pred_v_err[i] = np.linalg.norm(np.array([mO_err, mp_err, mt_err, mh_err, err_vals_v[4]]))
    
pred_b = np.dot(aq_opth1, mvals_b[:5])
pred_b_err = np.zeros(pred_b.shape)
for i in range(aq_opth1.shape[0]):
    mO_err = err_mult(mvals_b[0], err_vals_b[0], aq_opth1[i,0], air_err[0])
    mp_err = err_mult(mvals_b[1], err_vals_b[1], aq_opth1[i,1], air_err[1])
    mt_err = err_mult(mvals_b[2], err_vals_b[2], aq_opth1[i,2], air_err[2])
    mh_err = err_mult(mvals_b[3], err_vals_b[3], aq_opth1[i,3], air_err[3])
    pred_b_err[i] = np.linalg.norm(np.array([mO_err, mp_err, mt_err, mh_err, err_vals_b[4]]))

In [None]:
fig, ((ax1, ax2)) = plt.subplots(1, 2, figsize=[5, 5])
linetr = ax1.errorbar(D_veg, pred_v, xerr=np.full(D_veg.shape, verr), 
                      yerr=pred_v_err, fmt='.', color="darkred", alpha=0.2)
linlin, = ax1.plot(D_veg, D_veg, color="dodgerblue", lw=0.5)
ax1.set_xlabel('measured')
ax1.set_ylabel('predicted')
ax1.set_title('Vegetation')
#ax1.set_ylim(-1,1)
#ax[i].legend([linetr,linete], ['training set', 'testing set'], loc='upper left')

linetr = ax2.errorbar(D_bld, pred_b, xerr=np.full(D_bld.shape, berr), 
                      yerr=pred_b_err, fmt='.', color="darkred", alpha=0.2)
linlin, = ax2.plot(D_bld, D_bld, color="dodgerblue", lw=0.5)
ax2.set_xlabel('measured')
ax2.set_ylabel('predicted')
ax2.set_title('Buildings')
#ax2.set_ylim(-1,1)

plt.show()

#### reduced chi-squared per degree of freedom

In [None]:
def redchisqg(ydata, ymod, deg=2, sd=None):
    if np.any(sd == None):
        chisq = np.sum( (ydata-ymod)**2 )
    else:
        chisq = np.sum( ((ydata - ymod) / sd)**2 )
    
    # Number of degrees of freedom assuming 2 free parameters
    nu = ydata.size - deg
    
    return chisq/nu

In [None]:
# -- standard deviation of each scan from spread of samples

veg_solns = np.zeros((flat_samplesv.shape[0], len(D_veg)))
for i in range(flat_samplesv.shape[0]):
    sample = flat_samplesv[i]
    veg_solns[i] = np.dot(aq_opth1, sample[:5])

stddev_veg = np.std(veg_solns, axis=0, ddof=1)

In [None]:
bld_solns = np.zeros((flat_samplesb.shape[0], len(D_bld)))
for i in range(flat_samplesb.shape[0]):
    sample = flat_samplesb[i]
    bld_solns[i] = np.dot(aq_opth1, sample[:5])

stddev_bld = np.std(bld_solns, axis=0, ddof=1)

In [None]:
chisq_v_opth = redchisqg(D_veg, pred_v, deg=ndim-1, sd=stddev_veg)
chisq_b_opth = redchisqg(D_bld, pred_b, deg=ndim-1, sd=stddev_bld)

print("Vegetation chi^2/dof = ", chisq_v_opth)
print("Buildings chi^2/dof  = ", chisq_b_opth)

### OLS to obtain log-likelihood scores and p-values

In [None]:
b_opth = pd.DataFrame({'D_veg': D_veg, 'D_bld': D_bld, 'o3': o3,
          'pm25': pm25, 'temps': temps, 'humid':humid})
b_opth.describe()

In [None]:
# build models
modelv = sm.ols('D_veg ~ o3 + pm25 + temps + humid', data=b_opth)
modelb = sm.ols('D_bld ~ o3 + pm25 + temps + humid', data=b_opth)

# fit models to the data
resultv_opth = modelv.fit()
resultb_opth = modelb.fit()

In [None]:
# summarize model result
print("Vegetation Model")
resultv_opth.summary()

In [None]:
# summarize model result
print("Buildings Model")
resultb_opth.summary()