In [81]:
# Random Forest Regression with standardization
# 1. Import libraries and modules
import numpy as np
import pandas as pd
 
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.externals import joblib 

data = pd.read_csv("makeup.csv")
data.head()


y = data[['Arousal', 'Valence']]
X = data.drop(['Arousal', 'Valence'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# 2. Declare data preprocessing steps
max_dept = 30
pipeline = make_pipeline(preprocessing.StandardScaler(), 
                         RandomForestRegressor(n_estimators=10,max_depth=max_depth, random_state=2))

# 3. Declare hyperparameters to tune
hyperparameters = { 'randomforestregressor__max_features' : ['auto', 'sqrt', 'log2'],
                  'randomforestregressor__max_depth': [None, 5, 3, 1]}

# 4. Tune model using cross-validation pipeline
clf = GridSearchCV(pipeline, hyperparameters, cv=5)
clf.fit(X_train, y_train)

# 5. check the model performance
pred = clf.predict(X_test)
print(pred)
print(r2_score(y_test, pred))
print(mean_squared_error(y_test, pred))

# 6. test one
d = {'Neutral': [0], 'Happy': [1], 'Sad': [0], 'Suprise': [0],
    'Fear': [1], 'Disgust': [0.1], 'Anger': [0], 'Contempt': [0]}
df2 = pd.DataFrame(data=d)
pred = clf.predict(df2)
print(pred)

# API call
def calculateVA(neutral, happy, sad, suprise, fear, disgust, anger, contemp):
    d = {'Neutral': [neutral], 'Happy': [happy], 'Sad': [sad], 'Suprise': [suprise],
    'Fear': [fear], 'Disgust': [disgust], 'Anger': [anger], 'Contempt': [cotemp]}
    df2 = pd.DataFrame(data=d)
    pred = clf.predict(df2)

[[0.68 0.65]
 [0.33 0.67]
 [0.71 0.49]
 [0.52 0.79]
 [0.62 0.81]
 [0.6  0.69]]
0.08604630772228339
0.05933333333333335
[[0.61 0.55]]


In [88]:
# Random Forest Regression without standardization
# 1. Import libraries and modules
import numpy as np
import pandas as pd
 
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.externals import joblib 

data = pd.read_csv("makeup.csv")
data.head()


y = data[['Arousal', 'Valence']]
X = data.drop(['Arousal', 'Valence'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# 2. Declare data preprocessing steps
max_depth = 30
pipeline = make_pipeline(RandomForestRegressor(n_estimators=10,max_depth=max_depth, random_state=2))
# rf = RandomForestRegressor(n_estimators=10, max_depth=max_depth, random_state=2)

# 3. Declare hyperparameters to tune
hyperparameters = { 'randomforestregressor__max_features' : ['auto', 'sqrt', 'log2'],
                  'randomforestregressor__max_depth': [None, 5, 3, 1]}

# 4. Tune model using cross-validation pipeline
rf = GridSearchCV(pipeline, hyperparameters, cv=5)

rf.fit(X_train, y_train)

# 5. check the model performance
pred = rf.predict(X_test)
print(pred)
print(r2_score(y_test, pred))
print(mean_squared_error(y_test, pred))

# 6. test one
d = {'Neutral': [0], 'Happy': [1], 'Sad': [0], 'Suprise': [0],
    'Fear': [1], 'Disgust': [0.1], 'Anger': [0], 'Contempt': [0]}
df2 = pd.DataFrame(data=d)
pred = rf.predict(df2)
print(pred)


[[0.71 0.73]
 [0.33 0.67]
 [0.71 0.49]
 [0.52 0.79]
 [0.62 0.81]
 [0.6  0.69]]
0.03090374748327579
0.06534166666666669
[[0.61 0.55]]


In [103]:
# multiple output regression without Cross Validation -> Cannot fit ???
# 1. Import libraries and modules
import numpy as np
import pandas as pd
 
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.externals import joblib 
from sklearn.multioutput import MultiOutputRegressor

data = pd.read_csv("makeup.csv")
data.head()


y = data[['Arousal', 'Valence']]
X = data.drop(['Arousal', 'Valence'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# 2. Declare data preprocessing steps
max_depth = 30
crf = MultiOutputRegressor(
    RandomForestRegressor(n_estimators=10, max_depth=max_depth, random_state=2))

# 4. Tune model using cross-validation pipeline
crf.fit(X_train, y_train)

# 5. check the model performance
pred = clf.predict(X_test)
print(pred)
print(r2_score(y_test, pred))
print(mean_squared_error(y_test, pred))

# 6. test one
d = {'Neutral': [0], 'Happy': [1], 'Sad': [0], 'Suprise': [0],
    'Fear': [1], 'Disgust': [0.1], 'Anger': [0], 'Contempt': [0]}
df2 = pd.DataFrame(data=d)
pred = clf.predict(df2)
print(pred)
 


NotFittedError: This GridSearchCV instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.

In [None]:
# multiple output regression with Cross Validation -> format error ???
# 1. Import libraries and modules
import numpy as np
import pandas as pd
 
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.externals import joblib 
from sklearn.multioutput import MultiOutputRegressor

data = pd.read_csv("makeup.csv")
data.head()


y = data[['Arousal', 'Valence']]
X = data.drop(['Arousal', 'Valence'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

# 2. Declare data preprocessing steps
max_depth = 30
pipeline = make_pipeline(MultiOutputRegressor(
    RandomForestRegressor(n_estimators=10, max_depth=max_depth, random_state=2)))

# 3. Declare hyperparameters to tune
hyperparameters = { 'randomforestregressor__max_features' : ['auto', 'sqrt', 'log2'],
                  'randomforestregressor__max_depth': [None, 5, 3, 1]}

# 4. Tune model using cross-validation pipeline
clf = GridSearchCV(pipeline, hyperparameters, cv=5)
print(clf.get_params().keys())
clf.fit(X_train, y_train)

# 5. check the model performance
pred = clf.predict(X_test)
print(pred)
print(r2_score(y_test, pred))
print(mean_squared_error(y_test, pred))

# 6. test one
d = {'Neutral': [0], 'Happy': [1], 'Sad': [0], 'Suprise': [0],
    'Fear': [1], 'Disgust': [0.1], 'Anger': [0], 'Contempt': [0]}
df2 = pd.DataFrame(data=d)
pred = clf.predict(df2)
print(pred)
 
