# ML Regression Modeling

In [15]:
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.svm import SVR

In [16]:
# save our files
train = pd.read_pickle('data/SCUT-FBP5500_v2/train_df')
test = pd.read_pickle('data/SCUT-FBP5500_v2/test_df')    

In [17]:
X_train = train[['male', 'asian','bovw','PCA_1', 'PCA_2']]
y_train = train['rating']
X_test = test[['male', 'asian','bovw','PCA_1', 'PCA_2']]
y_test = test['rating']

In [None]:
#Standard scale the features so that it is easier computationally
sc_X = StandardScaler()

# train the SVR model 
# try polynomial as well
svr_reg = SVR(kernel = 'rbf')
# train

# predict
y_pred = svr_reg.predict(X_test)

#score

# plotting the line if two dimensional pca
plt.plot(X_grid, svr_reg.predict(X_grid), color = 'black')

In [None]:
# random forest regression
# Fitting Random Forest Regression to the dataset

rf_reg = RandomForestRegressor(n_estimators = 10, random_state = 0)
rf_reg.fit(X_train.reshape(-1,1), y_train.reshape(-1,1))

y_pred = rf_reg.predict(X_test.reshape(-1,1))

In [18]:
# grid search pipeline example
# pipelines
pipe_rf = Pipeline([('scl', StandardScaler()),
			('clf', RandomForestRegressor(random_state=42))])

pipe_svr = Pipeline([('scl', StandardScaler()),
			('clf', SVR())])
# Set grid search params
param_range = [9, 10]
param_range_fl = [1.0, 0.5]

grid_params_rf = [{'clf__criterion': ['gini', 'entropy'],
        'clf__max_depth': param_range,
        'clf__min_samples_split': param_range[1:]}]

grid_params_svr = [{'clf__kernel': ['poly', 'rbf'], 
        'clf__C': param_range_fl}]

# Construct grid searches
jobs = -1

gs_rf = GridSearchCV(estimator=pipe_rf,
			param_grid=grid_params_rf,
			scoring='accuracy',
			cv=5, 
			n_jobs=jobs)
gs_svr = GridSearchCV(estimator=pipe_svr,
			param_grid=grid_params_svr,
			scoring='accuracy',
			cv=5,
			n_jobs=jobs)

# List of pipelines for iterating through each of them
grids = [gs_rf,gs_svr]

# Creating a dict for our reference
grid_dict = {0: 'Random Forest',
        1: 'Support Vector Machine'}

grid_dict_results = {}

# Fit the grid search objects
print('Performing model optimizations...')
best_acc = 0.0
best_clf = 0
best_gs = ''
for idx, gs in enumerate(grids):
    print('\nEstimator: %s' % grid_dict[idx])
    gs.fit(X_train, y_train)
    print('Best params are : %s' % gs.best_params_)
    # Best training data accuracy
    print('Best training accuracy: %.3f' % gs.best_score_)
    # Predict on test data with best params
    y_pred = gs.predict(X_test)
    # Test data accuracy of model with best params
    acc = accuracy_score(y_test, y_pred)
    print('Test set accuracy score for best params: %.3f ' % acc)
    # Track best (highest test accuracy) model
    grid_dict_results[grid_dict[idx]] = [gs.cv_results_, gs.best_params_, gs.best_score_, acc]
    if acc > best_acc:
        best_acc = acc
        best_gs = gs
        best_clf = idx
print('\nClassifier with best test set accuracy: %s' % grid_dict[best_clf])

# Save best grid search pipeline to file
gs_dump_file = 'best_grid_search_pipeline.pkl'
dict_dump_file = 'ml_gs_dict.pkl'
path1 = 'data/SCUT-FBP5500_v2/' + gs_dump_file
path2 = 'data/SCUT-FBP5500_v2/' + dict_dump_file
with open(path1, 'wb') as f:
    pickle.dump(best_gs, f)
with open(path2, 'wb') as f:
    pickle.dump(grid_dict_results, f)
print('\nSaved %s grid search pipeline to file: %s' % (grid_dict[best_clf], dump_file))

Performing model optimizations...

Estimator: Random Forest


TypeError: only size-1 arrays can be converted to Python scalars

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/pipeline.py", line 341, in fit
    Xt = self._fit(X, y, **fit_params_steps)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/pipeline.py", line 303, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/joblib/memory.py", line 352, in __call__
    return self.func(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/pipeline.py", l