## Mayberry GPP Regression using Westpond Data Cross Validation

### Train Features
1. land surface temp (wp_LST.day)
2. sensible heat flux (wp_le)
3. latent heat flux (wp_h)
4. net radiation (net_rad)
5. avg air temp (avg_air_temp)

In [1]:
import sys
sys.path.append('../')
import exp
import regression as r

In [2]:
df = exp.get_exp1_data()
df.head()

Unnamed: 0,avg_air_temp,avg_soil_temp,doy,net_rad,year,wp_ch4_gf,wp_co2_gf,wp_er,wp_gpp,wp_h,...,mb_bnd2,mb_bnd3,mb_bnd7,mb_evi,mb_lswi,mb_ndvi,wp_LST.day,wp_LST.night,mb_LST.day,mb_LST.night
0,19.2,22.3,195,190.0,2012,4332.368657,-304.542172,145.072376,-449.614548,1447.549899,...,0.187575,0.025212,0.053137,0.298162,0.56237,0.6491,29.61,17.285,26.335,18.645
1,19.3,21.8,196,189.0,2012,5305.896768,-335.648791,150.278671,-485.927462,1921.833137,...,0.186562,0.024569,0.051306,0.296544,0.574074,0.6504,29.63,17.2325,26.4075,18.5925
2,20.3,21.9,197,187.0,2012,6215.371936,-313.150966,158.307017,-471.457982,1176.374322,...,0.18555,0.023925,0.049475,0.294925,0.585779,0.6517,29.65,17.18,26.48,18.54
3,16.8,22.4,198,186.0,2012,7129.353337,-339.900067,153.561669,-493.461736,2575.636175,...,0.184537,0.023281,0.047644,0.293306,0.597483,0.653,29.67,17.1275,26.5525,18.4875
4,17.0,21.5,199,151.0,2012,7070.768573,-319.771564,144.05348,-463.825044,1916.08126,...,0.183525,0.022638,0.045812,0.291687,0.609188,0.6543,29.69,17.075,26.625,18.435


In [3]:
train_cols, test_col = ["wp_LST.day", "wp_h", "wp_le", "net_rad", "avg_air_temp"], ["mb_gpp"]
X, Y = exp.featurize(df, train_cols, test_col)
X, Y, scaler = r.preprocess(X, Y)
X.shape

(1028, 5)

In [4]:
r.random_forests_cross_val(X, Y, feature_names=train_cols)

Running Random Forests Cross Validation...
10-fold CV Acc Mean:  0.855926995364
CV Scores:  0.81078684457, 0.856100035512, 0.828549759205, 0.840965813851, 0.859371268925, 0.852462811679, 0.876835692646, 0.849486000041, 0.890574203209, 0.894137524004
OOB score: 0.86042393449
Feature Importances:
('wp_le', 0.39163916948025551)
('wp_LST.day', 0.24745766551908016)
('avg_air_temp', 0.16501872702145193)
('net_rad', 0.13914815345890422)
('wp_h', 0.05673628452030844)


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='sqrt', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=200, n_jobs=1, oob_score=True, random_state=None,
           verbose=0, warm_start=False)

In [5]:
r.xgb_trees_cross_val(X, Y, feature_names=train_cols)

Running Gradient Boosted Trees Cross Validation...
10-fold CV Acc Mean:  0.842574047149
CV Scores:  0.790883841031, 0.860954815832, 0.804814175612, 0.831959950823, 0.855813637432, 0.846880829285, 0.851889699044, 0.850039717368, 0.878731434854, 0.853772370211
Feature Importances:
('wp_LST.day', 0.28121623762406395)
('wp_le', 0.23823080616760997)
('avg_air_temp', 0.17820247907133868)
('net_rad', 0.17772011493616316)
('wp_h', 0.12463036220082452)


GradientBoostingRegressor(alpha=0.9, init=None, learning_rate=0.1, loss='ls',
             max_depth=3, max_features='sqrt', max_leaf_nodes=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, n_estimators=200,
             presort='auto', random_state=None, subsample=1.0, verbose=0,
             warm_start=False)

In [None]:
r.svc_cross_val(X, Y)

Running SVC Cross Validation...
10-fold CV Acc Mean:  0.512790551053
CV Scores:  0.446803658401, 0.518398433128, 0.520225529295, 0.522525083646, 0.463028629609, 0.524867474039, 0.544431386886, 0.533054897011, 0.497922339156, 0.55664807936


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [None]:
r.dnn_cross_val(X, Y)

Running Neural Network Cross Validation...
Step #1, avg. loss: 59912.69922
Step #501, epoch #50, avg. loss: 15513.64062
Step #1001, epoch #100, avg. loss: 6464.88721
Step #1501, epoch #150, avg. loss: 4836.75879
Step #2001, epoch #200, avg. loss: 3822.83203
Step #2501, epoch #250, avg. loss: 3183.04858
Step #3001, epoch #300, avg. loss: 2821.86841
Step #3501, epoch #350, avg. loss: 2438.83789
Step #4001, epoch #400, avg. loss: 2230.06201
Step #4501, epoch #450, avg. loss: 2031.02515
Step #1, avg. loss: 57986.80078
Step #501, epoch #50, avg. loss: 15363.63379
Step #1001, epoch #100, avg. loss: 6214.46240
Step #1501, epoch #150, avg. loss: 4699.26416
Step #2001, epoch #200, avg. loss: 3774.84058
Step #2501, epoch #250, avg. loss: 3152.45215
Step #3001, epoch #300, avg. loss: 2705.05347
Step #3501, epoch #350, avg. loss: 2470.75610
Step #4001, epoch #400, avg. loss: 2235.34692
Step #4501, epoch #450, avg. loss: 2037.89429
Step #1, avg. loss: 60443.48438
Step #501, epoch #50, avg. loss: 15