# Importing Necessary Libraries

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
import pandas as pd
import numpy as np
import pickle
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor

import mglearn

# Reading in the data

In [2]:
data = pd.read_csv('Modeling_Data/model_all_data.csv')
cols = [5,6]
data.drop(data.columns[cols], axis=1,inplace=True)
data['NZPBldgType'] = data['NZPBldgType'].astype('category')
data = pd.get_dummies(data, columns=['NZPBldgType'])
data['Characterized BldgSF'] = data['Characterized BldgSF'].str.replace(',', '').astype(float)
data.dropna(inplace = True)
data

Unnamed: 0,Characterized BldgSF,Material type,Total Pounds/Day,LBS/DAY/SF,NZPBldgType_CDC,NZPBldgType_DFAC,NZPBldgType_GIB,NZPBldgType_Office-Large,NZPBldgType_Office-Medium,NZPBldgType_Office-Small,NZPBldgType_PFF,NZPBldgType_PX,NZPBldgType_School-Primary,NZPBldgType_Training Barracks,NZPBldgType_UEPH
0,50819.0,#1,3.11,0.0000612,0,0,0,0,0,0,0,0,0,0,1
1,50819.0,#2,3.35,0.0000659,0,0,0,0,0,0,0,0,0,0,1
2,50819.0,#3,2.60,0.0000512,0,0,0,0,0,0,0,0,0,0,1
3,50819.0,#4,1.91,0.0000376,0,0,0,0,0,0,0,0,0,0,1
4,50819.0,#5,0.48,0.0000094,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1710,110953.0,Yard Trimmings,0.00,0,0,0,0,0,0,0,0,1,0,0,0
1711,6679.0,Yard Trimmings,0.00,0,0,0,0,0,0,1,0,0,0,0,0
1712,20326.0,Yard Trimmings,0.00,0,0,1,0,0,0,0,0,0,0,0,0
1713,49600.0,Yard Trimmings,0.00,0,0,0,1,0,0,0,0,0,0,0,0


In [3]:
data['Country'].value_counts()

KeyError: 'Country'

In [4]:
mat_type_1 = data.loc[data['Material type'] == "#1"]
mat_type_2 = data.loc[data['Material type'] == "#2"]
mat_type_3 = data.loc[data['Material type'] == "#3"]
mat_type_4 = data.loc[data['Material type'] == "#4"]
mat_type_5 = data.loc[data['Material type'] == "#5"]
mat_type_6 = data.loc[data['Material type'] == "#6"]
mat_type_7 = data.loc[data['Material type'] == "#7"]
mat_type_aluminum = data.loc[data['Material type'] == "Aluminum"]
mat_type_corrcardboard = data.loc[data['Material type'] == "Corr. Cardboard"]
#mat_type_ewaste = data.loc[data['Material type'] == "E-waste"]
mat_type_food = data.loc[data['Material type'] == "Food"]
mat_type_glass = data.loc[data['Material type'] == "Glass"]
mat_type_mixedpaper = data.loc[data['Material type'] == "Mixed Paper"]
mat_type_newspaper = data.loc[data['Material type'] == "Newspaper"]
mat_type_nonrecyclmsw = data.loc[data['Material type'] == "Non-recyclable MSW"]
mat_type_paperboard = data.loc[data['Material type'] == "Paperboard"]
mat_type_soiledpaper = data.loc[data['Material type'] == "Soiled Paper"]
mat_type_steelferrous = data.loc[data['Material type'] == "Steel / Ferrous"]
#mat_type_textiles = data.loc[data['Material type'] == "Textiles"]
mat_type_whitepaper = data.loc[data['Material type'] == "White Paper"]
#mat_type_wood = data.loc[data['Material type'] == "Wood"]
mat_type_yardtrimmings = data.loc[data['Material type'] == "Yard Trimmings"]

# Material #1 Model

In [151]:
X_train= mat_type_1.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_School-Primary', 'NZPBldgType_UEPH']].astype('int')
y_train = mat_type_1['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_1_model = sm.OLS(y_train, X_train_coef)
results1 = mat_type_1_model.fit()
results1.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.246
Model:,OLS,Adj. R-squared:,0.201
Method:,Least Squares,F-statistic:,5.423
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.00023
Time:,17:58:39,Log-Likelihood:,-322.22
No. Observations:,89,AIC:,656.4
Df Residuals:,83,BIC:,671.4
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.8532,1.680,0.508,0.613,-2.489,4.196
Characterized BldgSF,4.964e-05,1.14e-05,4.349,0.000,2.69e-05,7.23e-05
NZPBldgType_CDC,-0.3476,3.482,-0.100,0.921,-7.273,6.578
NZPBldgType_DFAC,4.1585,3.337,1.246,0.216,-2.479,10.796
NZPBldgType_School-Primary,-5.7194,5.581,-1.025,0.308,-16.819,5.380
NZPBldgType_UEPH,5.6848,2.991,1.900,0.061,-0.265,11.634

0,1,2,3
Omnibus:,95.141,Durbin-Watson:,2.284
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1415.5
Skew:,3.294,Prob(JB):,4.25e-308
Kurtosis:,21.393,Cond. No.,718000.0


# Material #2 Model

In [152]:
X_train= mat_type_2.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_GIB', 'NZPBldgType_Office-Large', 'NZPBldgType_School-Primary', 'NZPBldgType_Training Barracks', 'NZPBldgType_UEPH']].astype('int')
y_train = mat_type_2['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_2_model = sm.OLS(y_train, X_train_coef)
results2 = mat_type_2_model.fit()
results2.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.295
Model:,OLS,Adj. R-squared:,0.224
Method:,Least Squares,F-statistic:,4.176
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.000327
Time:,17:58:54,Log-Likelihood:,-279.35
No. Observations:,89,AIC:,576.7
Df Residuals:,80,BIC:,599.1
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.7347,1.266,0.581,0.563,-1.784,3.253
Characterized BldgSF,3.628e-05,8.18e-06,4.437,0.000,2e-05,5.26e-05
NZPBldgType_CDC,0.7348,2.306,0.319,0.751,-3.855,5.324
NZPBldgType_DFAC,5.7329,2.220,2.582,0.012,1.314,10.152
NZPBldgType_GIB,-2.5134,2.210,-1.138,0.259,-6.911,1.884
NZPBldgType_Office-Large,-3.9383,2.484,-1.586,0.117,-8.881,1.005
NZPBldgType_School-Primary,-5.6654,3.659,-1.548,0.125,-12.947,1.616
NZPBldgType_Training Barracks,-1.8425,2.279,-0.808,0.421,-6.378,2.693
NZPBldgType_UEPH,2.8731,2.103,1.366,0.176,-1.312,7.059

0,1,2,3
Omnibus:,68.517,Durbin-Watson:,2.447
Prob(Omnibus):,0.0,Jarque-Bera (JB):,499.533
Skew:,2.321,Prob(JB):,3.37e-109
Kurtosis:,13.638,Cond. No.,834000.0


# Material #3 Model

In [153]:
X_train= mat_type_3.loc[:,['Characterized BldgSF','NZPBldgType_GIB']].astype('int')
y_train = mat_type_3['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_3_model = sm.OLS(y_train, X_train_coef)
results3 = mat_type_3_model.fit()
results3.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.094
Model:,OLS,Adj. R-squared:,0.057
Method:,Least Squares,F-statistic:,2.586
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.0594
Time:,17:59:10,Log-Likelihood:,-158.37
No. Observations:,79,AIC:,324.7
Df Residuals:,75,BIC:,334.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.2645,0.317,0.835,0.406,-0.367,0.896
Characterized BldgSF,6.918e-07,2.51e-06,0.275,0.784,-4.32e-06,5.7e-06
NZPBldgType_CDC,0.2371,0.713,0.333,0.740,-1.182,1.657
NZPBldgType_GIB,1.8215,0.658,2.770,0.007,0.512,3.132

0,1,2,3
Omnibus:,134.664,Durbin-Watson:,2.213
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6290.85
Skew:,5.856,Prob(JB):,0.0
Kurtosis:,45.118,Cond. No.,417000.0


# Material #4 Model

In [154]:
X_train= mat_type_4.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_PX']].astype('int')
y_train = mat_type_4['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_4_model = sm.OLS(y_train, X_train_coef)
results4 = mat_type_4_model.fit()
results4.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.296
Model:,OLS,Adj. R-squared:,0.262
Method:,Least Squares,F-statistic:,8.705
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,6.44e-06
Time:,17:59:21,Log-Likelihood:,-332.6
No. Observations:,88,AIC:,675.2
Df Residuals:,83,BIC:,687.6
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.4090,1.968,0.208,0.836,-3.506,4.324
Characterized BldgSF,3.166e-05,1.32e-05,2.390,0.019,5.31e-06,5.8e-05
NZPBldgType_CDC,1.4726,4.059,0.363,0.718,-6.601,9.546
NZPBldgType_DFAC,16.8646,3.891,4.335,0.000,9.127,24.603
NZPBldgType_PX,15.9000,3.901,4.076,0.000,8.142,23.658

0,1,2,3
Omnibus:,75.538,Durbin-Watson:,2.481
Prob(Omnibus):,0.0,Jarque-Bera (JB):,635.83
Skew:,2.614,Prob(JB):,8.53e-139
Kurtosis:,15.086,Cond. No.,515000.0


# Material #5 Model

In [156]:
X_train= mat_type_5.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_PX']].astype('int')
y_train = mat_type_5['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_5_model = sm.OLS(y_train, X_train_coef)
results5 = mat_type_5_model.fit()
results5.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.297
Model:,OLS,Adj. R-squared:,0.273
Method:,Least Squares,F-statistic:,11.99
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,1.27e-06
Time:,17:59:36,Log-Likelihood:,-283.21
No. Observations:,89,AIC:,574.4
Df Residuals:,85,BIC:,584.4
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.4387,0.957,0.459,0.648,-1.463,2.341
Characterized BldgSF,1.474e-05,6.94e-06,2.123,0.037,9.33e-07,2.85e-05
NZPBldgType_DFAC,9.6429,2.079,4.639,0.000,5.510,13.776
NZPBldgType_PX,8.5424,2.115,4.039,0.000,4.337,12.748

0,1,2,3
Omnibus:,90.024,Durbin-Watson:,1.829
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1038.046
Skew:,3.165,Prob(JB):,3.9000000000000005e-226
Kurtosis:,18.488,Cond. No.,451000.0


# Material #6 Model

In [158]:
X_train= mat_type_6.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC']].astype('int')
y_train = mat_type_6['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_6_model = sm.OLS(y_train, X_train_coef)
results6 = mat_type_6_model.fit()
results6.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.204
Model:,OLS,Adj. R-squared:,0.186
Method:,Least Squares,F-statistic:,11.16
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,4.87e-05
Time:,17:59:46,Log-Likelihood:,-365.01
No. Observations:,90,AIC:,736.0
Df Residuals:,87,BIC:,743.5
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.9523,2.222,0.879,0.382,-2.464,6.368
Characterized BldgSF,1.191e-05,1.65e-05,0.721,0.473,-2.09e-05,4.47e-05
NZPBldgType_DFAC,22.2152,4.729,4.697,0.000,12.815,31.615

0,1,2,3
Omnibus:,147.474,Durbin-Watson:,1.961
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8542.957
Skew:,5.793,Prob(JB):,0.0
Kurtosis:,49.302,Cond. No.,408000.0


# Material #7 Model

In [160]:
X_train= mat_type_7.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_School-Primary']].astype('int')
y_train = mat_type_7['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_7_model = sm.OLS(y_train, X_train_coef)
results7 = mat_type_7_model.fit()
results7.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.198
Model:,OLS,Adj. R-squared:,0.171
Method:,Least Squares,F-statistic:,7.099
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.000256
Time:,17:59:58,Log-Likelihood:,-305.52
No. Observations:,90,AIC:,619.0
Df Residuals:,86,BIC:,629.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1394,1.156,-0.121,0.904,-2.438,2.159
Characterized BldgSF,2.704e-05,8.63e-06,3.132,0.002,9.88e-06,4.42e-05
NZPBldgType_DFAC,9.8201,2.458,3.995,0.000,4.934,14.706
NZPBldgType_School-Primary,-3.5556,4.372,-0.813,0.418,-12.246,5.135

0,1,2,3
Omnibus:,101.347,Durbin-Watson:,1.597
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1498.311
Skew:,3.613,Prob(JB):,0.0
Kurtosis:,21.637,Cond. No.,710000.0


# Material Aluminum Model

In [162]:
X_train= mat_type_aluminum.loc[:,['Characterized BldgSF', 'NZPBldgType_Office-Large', 'NZPBldgType_Office-Small', 'NZPBldgType_PFF', 'NZPBldgType_PX', 'NZPBldgType_School-Primary', 'NZPBldgType_Training Barracks', 'NZPBldgType_UEPH']].astype('int')
y_train = mat_type_aluminum['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_aluminum_model = sm.OLS(y_train, X_train_coef)
results_aluminum = mat_type_aluminum_model.fit()
results_aluminum.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.143
Model:,OLS,Adj. R-squared:,0.058
Method:,Least Squares,F-statistic:,1.672
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.118
Time:,18:00:12,Log-Likelihood:,-240.9
No. Observations:,89,AIC:,499.8
Df Residuals:,80,BIC:,522.2
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.6296,0.739,3.561,0.001,1.160,4.099
Characterized BldgSF,5.831e-06,5.25e-06,1.111,0.270,-4.62e-06,1.63e-05
NZPBldgType_Office-Large,-2.0474,1.630,-1.256,0.213,-5.292,1.197
NZPBldgType_Office-Small,-2.3713,1.609,-1.474,0.144,-5.574,0.831
NZPBldgType_PFF,-2.2068,1.454,-1.518,0.133,-5.100,0.687
NZPBldgType_PX,-1.5295,1.487,-1.029,0.307,-4.489,1.429
NZPBldgType_School-Primary,-1.9727,2.378,-0.830,0.409,-6.705,2.759
NZPBldgType_Training Barracks,-1.6071,1.460,-1.101,0.274,-4.513,1.299
NZPBldgType_UEPH,1.9549,1.360,1.438,0.154,-0.751,4.661

0,1,2,3
Omnibus:,77.078,Durbin-Watson:,2.343
Prob(Omnibus):,0.0,Jarque-Bera (JB):,509.816
Skew:,2.815,Prob(JB):,1.97e-111
Kurtosis:,13.284,Cond. No.,833000.0


# Material Corr Cardboard Model

In [164]:
X_train= mat_type_corrcardboard.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_Office-Large', 'NZPBldgType_PX']].astype('int')
y_train = mat_type_corrcardboard['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_corrcardboard_model = sm.OLS(y_train, X_train_coef)
results_corrcardboard = mat_type_corrcardboard_model.fit()
results_corrcardboard.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.195
Model:,OLS,Adj. R-squared:,0.154
Method:,Least Squares,F-statistic:,4.79
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.00164
Time:,18:00:27,Log-Likelihood:,-434.72
No. Observations:,84,AIC:,879.4
Df Residuals:,79,BIC:,891.6
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.1444,7.378,0.291,0.772,-12.541,16.829
Characterized BldgSF,0.0001,6.02e-05,1.791,0.077,-1.2e-05,0.000
NZPBldgType_DFAC,57.3916,15.489,3.705,0.000,26.562,88.221
NZPBldgType_Office-Large,-13.8734,17.366,-0.799,0.427,-48.439,20.692
NZPBldgType_PX,38.5549,16.672,2.313,0.023,5.370,71.740

0,1,2,3
Omnibus:,71.433,Durbin-Watson:,2.185
Prob(Omnibus):,0.0,Jarque-Bera (JB):,429.161
Skew:,2.722,Prob(JB):,6.44e-94
Kurtosis:,12.643,Cond. No.,470000.0


# Material Food Model

In [166]:
X_train= mat_type_food.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_PX']].astype('int')
y_train = mat_type_food['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_food_model = sm.OLS(y_train, X_train_coef)
results_food = mat_type_food_model.fit()
results_food.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.526
Model:,OLS,Adj. R-squared:,0.509
Method:,Least Squares,F-statistic:,31.79
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,6.34e-14
Time:,18:00:40,Log-Likelihood:,-606.75
No. Observations:,90,AIC:,1222.0
Df Residuals:,86,BIC:,1232.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.0695,33.617,0.151,0.880,-61.759,71.898
Characterized BldgSF,0.0005,0.000,1.861,0.066,-3.1e-05,0.001
NZPBldgType_DFAC,681.8709,70.215,9.711,0.000,542.289,821.453
NZPBldgType_PX,145.1589,74.325,1.953,0.054,-2.594,292.911

0,1,2,3
Omnibus:,19.928,Durbin-Watson:,1.435
Prob(Omnibus):,0.0,Jarque-Bera (JB):,111.041
Skew:,0.261,Prob(JB):,7.72e-25
Kurtosis:,8.417,Cond. No.,445000.0


# Material Glass Model

In [168]:
X_train= mat_type_glass.loc[:,['Characterized BldgSF', 'NZPBldgType_Office-Large', 'NZPBldgType_School-Primary', 'NZPBldgType_UEPH']].astype('int')
y_train = mat_type_glass['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_glass_model = sm.OLS(y_train, X_train_coef)
results_glass = mat_type_glass_model.fit()
results_glass.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.275
Model:,OLS,Adj. R-squared:,0.238
Method:,Least Squares,F-statistic:,7.475
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,3.69e-05
Time:,18:00:49,Log-Likelihood:,-397.34
No. Observations:,84,AIC:,804.7
Df Residuals:,79,BIC:,816.8
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-5.6102,4.300,-1.305,0.196,-14.170,2.950
Characterized BldgSF,0.0002,3.77e-05,4.144,0.000,8.12e-05,0.000
NZPBldgType_Office-Large,-22.2045,11.154,-1.991,0.050,-44.406,-0.003
NZPBldgType_School-Primary,-16.5834,17.078,-0.971,0.335,-50.577,17.410
NZPBldgType_UEPH,22.2042,9.227,2.406,0.018,3.838,40.570

0,1,2,3
Omnibus:,120.264,Durbin-Watson:,2.32
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3582.724
Skew:,4.685,Prob(JB):,0.0
Kurtosis:,33.592,Cond. No.,733000.0


# Material Mixed Paper Model

In [170]:
X_train= mat_type_mixedpaper.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_Office-Large', 'NZPBldgType_PX', 'NZPBldgType_School-Primary']].astype('int')
y_train = mat_type_mixedpaper['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_mixedpaper_model = sm.OLS(y_train, X_train_coef)
results_mixedpaper = mat_type_mixedpaper_model.fit()
results_mixedpaper.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.128
Model:,OLS,Adj. R-squared:,0.076
Method:,Least Squares,F-statistic:,2.442
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.0408
Time:,18:01:00,Log-Likelihood:,-388.01
No. Observations:,89,AIC:,788.0
Df Residuals:,83,BIC:,802.9
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.9410,3.187,0.295,0.769,-5.398,7.280
Characterized BldgSF,2.328e-05,2.61e-05,0.892,0.375,-2.86e-05,7.52e-05
NZPBldgType_DFAC,10.7686,6.592,1.634,0.106,-2.342,23.879
NZPBldgType_Office-Large,17.5217,7.615,2.301,0.024,2.375,32.668
NZPBldgType_PX,7.5754,7.103,1.067,0.289,-6.552,21.702
NZPBldgType_School-Primary,10.9623,11.800,0.929,0.356,-12.508,34.432

0,1,2,3
Omnibus:,96.215,Durbin-Watson:,2.316
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1004.142
Skew:,3.586,Prob(JB):,8.979999999999999e-219
Kurtosis:,17.811,Cond. No.,731000.0


# Material Newspaper Model

In [172]:
X_train= mat_type_newspaper.loc[:,['Characterized BldgSF', 'NZPBldgType_School-Primary', 'NZPBldgType_Training Barracks']].astype('int')
y_train = mat_type_newspaper['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_newspaper_model = sm.OLS(y_train, X_train_coef)
results_newspaper = mat_type_newspaper_model.fit()
results_newspaper.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.293
Model:,OLS,Adj. R-squared:,0.266
Method:,Least Squares,F-statistic:,10.65
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,6.18e-06
Time:,18:01:11,Log-Likelihood:,-174.18
No. Observations:,81,AIC:,356.4
Df Residuals:,77,BIC:,365.9
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.1480,0.335,-0.442,0.660,-0.815,0.519
Characterized BldgSF,1.475e-05,2.69e-06,5.485,0.000,9.4e-06,2.01e-05
NZPBldgType_School-Primary,-1.9552,1.270,-1.540,0.128,-4.484,0.574
NZPBldgType_Training Barracks,-0.8956,0.795,-1.126,0.264,-2.480,0.688

0,1,2,3
Omnibus:,72.55,Durbin-Watson:,2.195
Prob(Omnibus):,0.0,Jarque-Bera (JB):,503.609
Skew:,2.78,Prob(JB):,4.39e-110
Kurtosis:,13.877,Cond. No.,654000.0


# Material Non-recyclable MSW Model

In [174]:
X_train= mat_type_nonrecyclmsw.loc[:,['Characterized BldgSF', 'NZPBldgType_GIB', 'NZPBldgType_Office-Large', 'NZPBldgType_PX', 'NZPBldgType_School-Primary', 'NZPBldgType_Training Barracks']].astype('int')
y_train = mat_type_nonrecyclmsw['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_nonrecyclmsw_model = sm.OLS(y_train, X_train_coef)
results_nonrecyclmsw = mat_type_nonrecyclmsw_model.fit()
results_nonrecyclmsw.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.229
Model:,OLS,Adj. R-squared:,0.173
Method:,Least Squares,F-statistic:,4.106
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.00117
Time:,18:01:23,Log-Likelihood:,-546.58
No. Observations:,90,AIC:,1107.0
Df Residuals:,83,BIC:,1125.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,13.1448,17.286,0.760,0.449,-21.237,47.527
Characterized BldgSF,0.0007,0.000,4.779,0.000,0.000,0.001
NZPBldgType_GIB,-68.7525,38.553,-1.783,0.078,-145.432,7.927
NZPBldgType_Office-Large,-132.0047,44.123,-2.992,0.004,-219.763,-44.246
NZPBldgType_PX,-52.7939,40.451,-1.305,0.195,-133.249,27.661
NZPBldgType_School-Primary,-104.9820,66.584,-1.577,0.119,-237.415,27.450
NZPBldgType_Training Barracks,-51.3072,39.792,-1.289,0.201,-130.452,27.838

0,1,2,3
Omnibus:,142.84,Durbin-Watson:,2.402
Prob(Omnibus):,0.0,Jarque-Bera (JB):,7520.324
Skew:,5.511,Prob(JB):,0.0
Kurtosis:,46.405,Cond. No.,756000.0


# Material Paperboard Model

In [176]:
X_train= mat_type_paperboard.loc[:,['Characterized BldgSF', 'NZPBldgType_GIB', 'NZPBldgType_Office-Large', 'NZPBldgType_School-Primary']].astype('int')
y_train = mat_type_paperboard['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_paperboard_model = sm.OLS(y_train, X_train_coef)
results_paperboard = mat_type_paperboard_model.fit()
results_paperboard.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.217
Model:,OLS,Adj. R-squared:,0.179
Method:,Least Squares,F-statistic:,5.753
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.000389
Time:,18:01:35,Log-Likelihood:,-316.07
No. Observations:,88,AIC:,642.1
Df Residuals:,83,BIC:,654.5
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.7226,1.342,2.028,0.046,0.052,5.393
Characterized BldgSF,5.412e-05,1.18e-05,4.596,0.000,3.07e-05,7.75e-05
NZPBldgType_GIB,-5.6062,3.100,-1.809,0.074,-11.772,0.559
NZPBldgType_Office-Large,-7.9944,3.507,-2.280,0.025,-14.969,-1.020
NZPBldgType_School-Primary,-8.6202,5.437,-1.586,0.117,-19.434,2.193

0,1,2,3
Omnibus:,73.875,Durbin-Watson:,2.24
Prob(Omnibus):,0.0,Jarque-Bera (JB):,508.053
Skew:,2.65,Prob(JB):,4.76e-111
Kurtosis:,13.51,Cond. No.,728000.0


# Material Soiled Paper Model

In [178]:
X_train= mat_type_soiledpaper.loc[:,['Characterized BldgSF', 'NZPBldgType_GIB', 'NZPBldgType_Office-Small', 'NZPBldgType_PFF', 'NZPBldgType_School-Primary', 'NZPBldgType_Training Barracks', 'NZPBldgType_UEPH']].astype('int')
y_train = mat_type_soiledpaper['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_soiledpaper_model = sm.OLS(y_train, X_train_coef)
results_soiledpaper = mat_type_soiledpaper_model.fit()
results_soiledpaper.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.29
Model:,OLS,Adj. R-squared:,0.229
Method:,Least Squares,F-statistic:,4.733
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.000171
Time:,18:01:47,Log-Likelihood:,-441.28
No. Observations:,89,AIC:,898.6
Df Residuals:,81,BIC:,918.5
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,51.0531,6.870,7.432,0.000,37.385,64.722
Characterized BldgSF,6.176e-05,4.24e-05,1.457,0.149,-2.26e-05,0.000
NZPBldgType_GIB,-24.2969,12.804,-1.898,0.061,-49.773,1.179
NZPBldgType_Office-Small,-49.3432,15.063,-3.276,0.002,-79.315,-19.372
NZPBldgType_PFF,-27.4317,13.422,-2.044,0.044,-54.137,-0.726
NZPBldgType_School-Primary,-41.7264,21.787,-1.915,0.059,-85.076,1.623
NZPBldgType_Training Barracks,-49.7046,13.364,-3.719,0.000,-76.294,-23.115
NZPBldgType_UEPH,-46.3248,11.987,-3.865,0.000,-70.175,-22.475

0,1,2,3
Omnibus:,24.007,Durbin-Watson:,1.954
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32.856
Skew:,1.272,Prob(JB):,7.34e-08
Kurtosis:,4.545,Cond. No.,747000.0


# Material Steel / Ferrous Model

In [180]:
X_train= mat_type_steelferrous.loc[:,['Characterized BldgSF', 'NZPBldgType_DFAC', 'NZPBldgType_PFF', 'NZPBldgType_Training Barracks']].astype('int')
y_train = mat_type_steelferrous['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_steelferrous_model = sm.OLS(y_train, X_train_coef)
results_steelferrous = mat_type_steelferrous_model.fit()
results_steelferrous.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.106
Model:,OLS,Adj. R-squared:,0.061
Method:,Least Squares,F-statistic:,2.391
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.0575
Time:,18:01:59,Log-Likelihood:,-308.76
No. Observations:,86,AIC:,627.5
Df Residuals:,81,BIC:,639.8
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.3743,1.610,3.338,0.001,2.171,8.578
Characterized BldgSF,-4.368e-06,1.07e-05,-0.407,0.685,-2.57e-05,1.7e-05
NZPBldgType_DFAC,6.2451,3.215,1.942,0.056,-0.152,12.642
NZPBldgType_PFF,-4.7933,3.440,-1.394,0.167,-11.637,2.050
NZPBldgType_Training Barracks,-4.6200,3.413,-1.354,0.180,-11.410,2.170

0,1,2,3
Omnibus:,98.389,Durbin-Watson:,2.284
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1285.702
Skew:,3.693,Prob(JB):,6.51e-280
Kurtosis:,20.443,Cond. No.,521000.0


# Material White Paper Model

In [182]:
X_train= mat_type_whitepaper.loc[:,['Characterized BldgSF', 'NZPBldgType_GIB', 'NZPBldgType_Office-Large', 'NZPBldgType_PX', 'NZPBldgType_School-Primary']].astype('int')
y_train = mat_type_whitepaper['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_whitepaper_model = sm.OLS(y_train, X_train_coef)
results_whitepaper = mat_type_whitepaper_model.fit()
results_whitepaper.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.288
Model:,OLS,Adj. R-squared:,0.242
Method:,Least Squares,F-statistic:,6.376
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,5.05e-05
Time:,18:02:10,Log-Likelihood:,-327.79
No. Observations:,85,AIC:,667.6
Df Residuals:,79,BIC:,682.2
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.5408,1.842,0.294,0.770,-3.125,4.207
Characterized BldgSF,3.071e-05,1.59e-05,1.933,0.057,-9.13e-07,6.23e-05
NZPBldgType_GIB,10.2511,4.141,2.476,0.015,2.009,18.493
NZPBldgType_Office-Large,10.6082,4.991,2.125,0.037,0.673,20.543
NZPBldgType_PX,9.0196,4.347,2.075,0.041,0.367,17.673
NZPBldgType_School-Primary,19.4360,7.196,2.701,0.008,5.113,33.759

0,1,2,3
Omnibus:,55.358,Durbin-Watson:,1.886
Prob(Omnibus):,0.0,Jarque-Bera (JB):,238.417
Skew:,2.069,Prob(JB):,1.69e-52
Kurtosis:,10.085,Cond. No.,741000.0


# Material Yard Trimmings Model

In [184]:
X_train= mat_type_yardtrimmings.loc[:,['Characterized BldgSF', 'NZPBldgType_UEPH']].astype('int')
y_train = mat_type_yardtrimmings['Total Pounds/Day']
X_train_coef = sm.tools.add_constant(X_train)

mat_type_yardtrimmings_model = sm.OLS(y_train, X_train_coef)
results_yardtrimmings = mat_type_yardtrimmings_model.fit()
results_yardtrimmings.summary()

0,1,2,3
Dep. Variable:,Total Pounds/Day,R-squared:,0.089
Model:,OLS,Adj. R-squared:,0.047
Method:,Least Squares,F-statistic:,2.11
Date:,"Wed, 27 Apr 2022",Prob (F-statistic):,0.134
Time:,18:02:21,Log-Likelihood:,-215.92
No. Observations:,46,AIC:,437.8
Df Residuals:,43,BIC:,443.3
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.0646,5.913,0.518,0.607,-8.861,14.990
Characterized BldgSF,-1.178e-06,6.71e-05,-0.018,0.986,-0.000,0.000
NZPBldgType_UEPH,23.0710,11.238,2.053,0.046,0.407,45.735

0,1,2,3
Omnibus:,80.036,Durbin-Watson:,1.13
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1253.564
Skew:,4.522,Prob(JB):,6.2000000000000006e-273
Kurtosis:,26.921,Cond. No.,240000.0
