In [None]:
%pip install pandas
%pip install scipy
%pip install numpy
%pip install seaborn
%pip install matplotlib
%pip install statsmodels
%pip install scikit-learn
%pip install shap

# Welcome to Machine Learning and Hyperparamater Tuning!

## Functions needed for this notebook

In [None]:
import pandas as pd
import numpy as np
import pprint

def pretty_print(dictionary):
    pp = pprint.PrettyPrinter(indent=1)
    pp.pprint(dictionary)
    print('\n')


In [None]:
def print_df(df):
    # Print the DataFrame
    with pd.option_context('display.max_rows', 20, 'display.max_columns', None): 
        display(df)

### Evaluation Metrics

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import make_scorer
from scipy.stats import pearsonr


def pearson_correlation(y_true, y_pred):
    return pearsonr(y_true, y_pred)[0]


scorers = {
    'neg_mean_absolute_error': 'neg_mean_absolute_error',
    'neg_mean_absolute_percentage_error': 'neg_mean_absolute_percentage_error',
    'neg_median_absolute_error': 'neg_median_absolute_error',
    'pearson_corr': make_scorer(pearson_correlation)
}

### Feature Importance

In [None]:
import shap
import matplotlib.pyplot as plt


shap.initjs()

def feature_importance(explainer, X_test):

    shap_values = explainer(X_test)

    # Summarize the feature importances
    shap_summary = np.abs(shap_values.values).mean(axis=0)

    # Get the most contributing feature
    most_contributing_feature = X_test.columns[np.argmax(shap_summary)]

    print(f"The most contributing feature is: {most_contributing_feature}")

    # Calculate the mean absolute SHAP values for each feature
    mean_abs_shap_values = np.abs(shap_values.values).mean(axis=0)
    feature_names = shap_values.feature_names

    # Sort the SHAP values and feature names in descending order
    sorted_indices = np.argsort(mean_abs_shap_values)[::-1]
    sorted_shap_values = mean_abs_shap_values[sorted_indices]
    sorted_feature_names = np.array(feature_names)[sorted_indices]

    # Create a horizontal bar plot using Matplotlib
    fig, ax = plt.subplots()
    bars = ax.barh(sorted_feature_names, sorted_shap_values, color='orange')
    ax.set_xlabel("Mean |SHAP value| (impact on model output)")
    ax.set_title("SHAP Feature Importance")

    # Adding text labels for each bar
    for bar in bars:
        width = bar.get_width()
        ax.text(width, bar.get_y() + bar.get_height() / 2, f'{width:.4f}', ha='left', va='center')

    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()


    shap.plots.beeswarm(shap_values)
    shap.plots.scatter(shap_values[:, most_contributing_feature])

    print("Force Plot on First Sample")
    shap.plots.force(shap_values[0], matplotlib=True)

    print("Force Plot on Last Sample")
    shap.plots.force(shap_values[-1], matplotlib=True)

### Regression Models

#### Linear Regression

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import median_absolute_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression


def linr_ml(X, y, X_test, y_test):
    # Define the parameter grid
    param_grid = {
        'fit_intercept':[True, False],
        'positive': [True, False]
    }

    # Initialize the Random Forest regressor
    rf_regressor = LinearRegression()

    # Perform Grid Search Cross-Validation
    grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=10, scoring=scorers, refit='neg_mean_absolute_error', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Get the best hyperparameters
    best_params = grid_search.best_params_

    # Evaluate the best model
    y_pred = best_model.predict(X_test)

    performances = {
        'mean_absolute_error': mean_absolute_error(y_test, y_pred),
        'median_absolute_error': median_absolute_error(y_test, y_pred),
        'mean_absolute_percentage_error': mean_absolute_percentage_error(y_test, y_pred),
        'pearson_corr': pearsonr(y_test, y_pred)[0]
    }

    # feature_importance(shap.LinearExplainer(best_model, X), X_test)
    return best_model, performances, best_params


#### Additive Regression

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import median_absolute_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.ensemble import GradientBoostingRegressor

def addr_ml(X,y,X_test, y_test):
    # Define the parameter grid
    param_grid = {
        'n_estimators': [10, 50, 100],
        'max_depth': [10, 20],
        'min_samples_split': [5, 10],
        'min_samples_leaf': [2, 4],
        'max_features': ['sqrt', 'log2'],
        'learning_rate': [0.001, 0.01, 0.1]
    }

    # Initialize the Random Forest regressor
    rf_regressor = GradientBoostingRegressor(random_state=42, loss='absolute_error')

    # Perform Grid Search Cross-Validation
    grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=10, scoring=scorers, refit='neg_mean_absolute_error', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Get the best hyperparameters
    best_params = grid_search.best_params_

    # Evaluate the best model
    y_pred = best_model.predict(X_test)

    performances = {
        'mean_absolute_error': mean_absolute_error(y_test, y_pred),
        'median_absolute_error': median_absolute_error(y_test, y_pred),
        'mean_absolute_percentage_error': mean_absolute_percentage_error(y_test, y_pred),
        'pearson_corr': pearsonr(y_test, y_pred)[0]
    }

    # explainer = shap.TreeExplainer(best_rf_model)
    # feature_importance(explainer, X_test)



    return best_model, performances, best_params


#### Decision Tree Regression

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import median_absolute_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.tree import DecisionTreeRegressor

def dest_ml(X,y,X_test, y_test):
    # Define the parameter grid
    param_grid = {
        'criterion': ['friedman_mse', 'absolute_error'],
        'splitter': ['best', 'random'],
        'max_depth': [10, 20, 30, 50],
        'min_samples_split': [5, 10, 20, 30],
        'min_samples_leaf': [2, 4, 8, 10],
        'max_features': ['sqrt', 'log2']
    }

    # Initialize the Random Forest regressor
    rf_regressor = DecisionTreeRegressor(random_state=42)

    # Perform Grid Search Cross-Validation
    grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=10, scoring=scorers, refit='neg_mean_absolute_error', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Get the best hyperparameters
    best_params = grid_search.best_params_

    # Evaluate the best model
    y_pred = best_model.predict(X_test)

    performances = {
        'mean_absolute_error': mean_absolute_error(y_test, y_pred),
        'median_absolute_error': median_absolute_error(y_test, y_pred),
        'mean_absolute_percentage_error': mean_absolute_percentage_error(y_test, y_pred),
        'pearson_corr': pearsonr(y_test, y_pred)[0]
    }

    # explainer = shap.TreeExplainer(best_rf_model)
    # feature_importance(explainer, X_test)

    return best_model, performances, best_params


#### K-Nearest Neighbor 

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import median_absolute_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.neighbors import KNeighborsRegressor

def knn_ml(X,y,X_test, y_test):
    # Define the parameter grid
    param_grid = {
        'weights': ['uniform', 'distance'],
        'algorithm': ['ball_tree', 'kd_tree', 'auto'],
        'leaf_size': [30, 40, 50],
        'p': [1, 2],
        'metric': ['minkowski', 'l1', 'l2']
    }

    # Initialize the Random Forest regressor
    rf_regressor = KNeighborsRegressor()

    # Perform Grid Search Cross-Validation
    grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=10, scoring=scorers, refit='neg_mean_absolute_error', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Get the best hyperparameters
    best_params = grid_search.best_params_

    # Evaluate the best model
    y_pred = best_model.predict(X_test)

    performances = {
        'mean_absolute_error': mean_absolute_error(y_test, y_pred),
        'median_absolute_error': median_absolute_error(y_test, y_pred),
        'mean_absolute_percentage_error': mean_absolute_percentage_error(y_test, y_pred),
        'pearson_corr': pearsonr(y_test, y_pred)[0]
    }

    # explainer = shap.KernelExplainer(best_rf_model.predict, shap.kmeans(X, 5))
    # feature_importance(explainer, X_test)

    return best_model, performances, best_params


#### Random Forest Regression

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import median_absolute_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.ensemble import RandomForestRegressor

def rf_ml(X,y,X_test, y_test):
    # Define the parameter grid
    param_grid = {
        'n_estimators': [10, 50, 100, 200],
        'max_depth': [10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt', 'log2']
        
    }

    # Initialize the Random Forest regressor
    rf_regressor = RandomForestRegressor(random_state=42)

    # Perform Grid Search Cross-Validation
    grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=10, scoring=scorers, refit='neg_mean_absolute_error', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Get the best hyperparameters
    best_params = grid_search.best_params_

    # Evaluate the best model
    y_pred = best_model.predict(X_test)

    performances = {
        'mean_absolute_error': mean_absolute_error(y_test, y_pred),
        'median_absolute_error': median_absolute_error(y_test, y_pred),
        'mean_absolute_percentage_error': mean_absolute_percentage_error(y_test, y_pred),
        'pearson_corr': pearsonr(y_test, y_pred)[0]
    }

    # explainer = shap.TreeExplainer(best_rf_model)
    # feature_importance(explainer, X_test)

    return best_model, performances, best_params


#### Support Vector Regression (Linear Kernel)

In [None]:
from sklearn.svm import LinearSVR
from sklearn.metrics import median_absolute_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV

def lin_svr_ml(X,y,X_test, y_test):
    # Define the parameter grid
    param_grid = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5],
    'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'], 
    'intercept_scaling': [10, 50, 75],  
    'max_iter': [3000, 4000, 5000]
    }

    # Initialize the Random Forest regressor
    regressor = LinearSVR(random_state=42)

    # Perform Grid Search Cross-Validation
    grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=10, scoring=scorers, refit='neg_mean_absolute_error', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Get the best hyperparameters
    best_params = grid_search.best_params_

    # Evaluate the best model
    y_pred = best_model.predict(X_test)

    performances = {
        'mean_absolute_error': mean_absolute_error(y_test, y_pred),
        'median_absolute_error': median_absolute_error(y_test, y_pred),
        'mean_absolute_percentage_error': mean_absolute_percentage_error(y_test, y_pred),
        'pearson_corr': pearsonr(y_test, y_pred)[0]
    }

    # explainer = shap.KernelExplainer(best_model.predict, shap.sample(X, 5))
    # feature_importance(explainer, X_test)

    return best_model, performances, best_params

#### Support Vector Regression (RBF-Kernel)

In [None]:
from sklearn.svm import SVR
from sklearn.metrics import median_absolute_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV

def rbf_svr_ml(X,y,X_test, y_test):
    # Define the parameter grid
    param_grid = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5],
    'gamma': ['scale', 'auto', 0.1, 1, 10], 
    'degree': [2, 3, 4],  
    'shrinking': [True, False]
    }

    # Initialize the Random Forest regressor
    regressor = SVR(kernel='rbf')

    # Perform Grid Search Cross-Validation
    grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=10, scoring=scorers, refit='neg_mean_absolute_error', n_jobs=-1, verbose=2)
    grid_search.fit(X, y)

    # Get the best model
    best_model = grid_search.best_estimator_

    # Get the best hyperparameters
    best_params = grid_search.best_params_

    # Evaluate the best model
    y_pred = best_model.predict(X_test)

    performances = {
        'mean_absolute_error': mean_absolute_error(y_test, y_pred),
        'median_absolute_error': median_absolute_error(y_test, y_pred),
        'mean_absolute_percentage_error': mean_absolute_percentage_error(y_test, y_pred),
        'pearson_corr': pearsonr(y_test, y_pred)[0]
    }

    # explainer = shap.KernelExplainer(best_model.predict, shap.sample(X, 5))
    # feature_importance(explainer, X_test)

    
    return best_model, performances, best_params

## Whole Dataset

### Original Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("whole.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('whole_df_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('whole_df_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

### Extrapolated Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("whole-extrapolated.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('whole_ex_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('whole_ex_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

## Quadrant 1 (High Valence - High Arousal)

### Original Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q1.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q1_df_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q1_df_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

### Extrapolated Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q1-extrapolated.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q1_ex_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q1_ex_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

## Quadrant 2 (Low Valence - High Arousal)

### Original Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q2.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q2_df_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q2_df_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

### Extrapolated Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q2-extrapolated.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q2_ex_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q2_ex_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

## Quadrant 3 (Low Valence - Low Arousal)

### Original Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q3.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q3_df_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q3_df_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

### Extrapolated Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q3-extrapolated.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q3_ex_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q3_ex_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

## Quadrant 4 (High Valence - Low Arousal)

### Original Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q4.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q4_df_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q4_df_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

### Extrapolated Dataset

#### Data Splitting and Feature Selection

In [None]:
df = pd.read_csv("q4-extrapolated.csv")

In [None]:
# Get the X Data for Valence
features_valence = np.genfromtxt('q4_ex_val_sig.txt', dtype=str)
X_valence = df[features_valence]
print_df(X_valence)

In [None]:
# Get the X Data for Arousal
features_arousal = np.genfromtxt('q4_ex_ars_sig.txt', dtype=str)
X_arousal = df[features_arousal]
print_df(X_arousal)

In [None]:
# Get the Y Labels
y = df[['valenceValue', 'arousalValue']]
print_df(y)

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train_valence, X_test_valence, y_train_valence, y_test_valence = train_test_split(X_valence, y['valenceValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Valence Training data shape:', X_train_valence.shape)
print('Valence Test data shape:', X_test_valence.shape)
print('Training labels shape (Valence):', y_train_valence.shape)
print('Test labels shape (Valence):', y_test_valence.shape)

In [None]:
# Split the data into training and testing sets
X_train_arousal, X_test_arousal, y_train_arousal, y_test_arousal = train_test_split(X_arousal, y['arousalValue'], test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print('Arousal Training data shape:', X_train_arousal.shape)
print('Arousal Test data shape:', X_test_arousal.shape)
print('Training labels shape (Arousal):', y_train_arousal.shape)
print('Test labels shape (Arousal):', y_test_arousal.shape)


#### Linear Regression

In [None]:
model, performances, best_params = linr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_valence), X_test_valence)

In [None]:
model, performances, best_params = linr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.LinearExplainer(model, X_train_arousal), X_test_arousal)

#### Additive Regression

In [None]:
model, performances, best_params = addr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = addr_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Decision Tree Regression

In [None]:
model, performances, best_params = dest_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = dest_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### K-Nearest Neighbor

In [None]:
model, performances, best_params = knn_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = knn_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.kmeans(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Random Forest Regressor

In [None]:
model, performances, best_params = rf_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
feature_importance(shap.TreeExplainer(model), X_test_arousal)

#### Support Vector Regression (Linear Kernel)

In [None]:
model, performances, best_params = lin_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)

#### Support Vector Regression (RBF-Kernel)

In [None]:
model, performances, best_params = rbf_svr_ml(X_train_valence, y_train_valence, X_test_valence, y_test_valence)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_valence, 5))
feature_importance(explainer, X_test_valence)

In [None]:
model, performances, best_params = rf_ml(X_train_arousal, y_train_arousal, X_test_arousal, y_test_arousal)

print("\n Paramaters of Best Model: ")
pretty_print(best_params)

print("\n Model Performances: ")
pretty_print(performances)

In [None]:
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train_arousal, 5))
feature_importance(explainer, X_test_arousal)