# Requirements

In [None]:
import numpy as np
import pandas as pd

# Seed for reproducibility
np.random.seed(42)
SEED = 42

# Preprocessing
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import IsolationForest

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.model_selection import learning_curve
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

# Evaluation
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score, log_loss, mean_absolute_error, mean_squared_error, r2_score

# Hyperparameter Tuning
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

# Data Preprocessing and Cleaning

## Preprocessing

The data's first two rows are headers.
The first row contains the configuration numbers, but spaced one apart.
The second row contains the completion time and mistakes under each configuration number column.

In [None]:
df = pd.read_csv('data.csv')
df.head()

In [None]:
# Convert df. We can make each row contain the config num, completion time, and mistakes.
# Therefore, the data will contain 3 columns: config_num, completion_time, mistakes
# and the data will multiply the number of rows by the 7 (because there are 7 configs)

converted_df:pd.DataFrame = pd.DataFrame(columns=['nick', 'config_num', 'completion_time', 'mistakes'])

# For each participant
num_participants = df.shape[0] - 1 # -1 because the first two rows are headers

for i in range(num_participants):
    # For each config
    for j in range(1, 8):
        config_num = j-1

        # Mistakes and completion time are in the same row
        # But they are in different columns. Each config has 2 columns for mistakes and completion time
        mistakes_col = j * 2
        completion_time_col = j * 2 - 1

        mistakes = df.iloc[i+1, mistakes_col]
        completion_time = df.iloc[i+1, completion_time_col]

        # Add row to converted_df
        converted_df = pd.concat([converted_df, pd.DataFrame([[df.iloc[i+1, 0], config_num, completion_time, mistakes]], columns=['nick', 'config_num', 'completion_time', 'mistakes'])])

        # Remove index
        converted_df = converted_df.reset_index(drop=True)

converted_df.head(14)

In [None]:
print("Previous shape: ", df.shape)
print("New shape: ", converted_df.shape)

df = converted_df

With our current data, the data uses configurations. However, that does not tell us enough. We can add three new variables (`size`, `color`,  and `position`) to give us more insights about our data.
- The baseline by default has `size: regular`, `color: yellow`, and `position: top`.
- Config 1 and 2 changes the size into `small` and `large`
- Config 3 and 4 changes the color into `blue` and `black`
- Config 5 and 6 changes the position into `sticky` and `bottom`

In [None]:
# Add new column for size, color, and position. Filled based on config_num

df['size'] = df['config_num'].apply(lambda x: 'regular' if x == 0 or x == 3 or x == 4 else 'small' if x == 1 else 'large')
df['color'] = df['config_num'].apply(lambda x: 'yellow' if x == 0 or x == 1 or x == 2 or x == 5 else 'blue' if x == 3 else 'black' if x == 4 else 'yellow')
df['position'] = df['config_num'].apply(lambda x: 'top' if x == 0 or x == 1 or x == 2 or x == 3 or x == 4 else 'sticky' if x == 5 else 'bottom')

df.head(14)

## Cleaning

The data currently has outliers so we have to remove them from our dataset.

In [None]:
df_cleaned = df.copy()

# Convert size, color, and position to numerical values (0, 1, 2)
# df_cleaned['size'] = df_cleaned['size'].apply(lambda x: 1 if x == 'small' else 0 if x == 'regular' else 2)
# df_cleaned['color'] = df_cleaned['color'].apply(lambda x: 1 if x == 'blue' else 2 if x == 'black' else 0)
# df_cleaned['position'] = df_cleaned['position'].apply(lambda x: 1 if x == 'sticky' else 2 if x == 'top' else 0)

# Remove outliers based on completion time (and possibly other features, just edit the features_of_interest)
features_of_interest = ['completion_time']
clf = IsolationForest(max_samples=100, random_state=SEED)
clf.fit(df_cleaned[features_of_interest])
df_cleaned['anomaly'] = clf.predict(df_cleaned[features_of_interest])

# Plot the data to reveal outliers in completion time using histograms
# Make anomalies red x's and normal data blue dots
completion_times = np.random.normal(loc=50, scale=10, size=1000)

# Create a histogram with adjusted x-axis labels and outliers in red
plt.figure(figsize=(10, 6))

# Plot histogram for normal data in blue
plt.hist(completion_times, bins=20, alpha=0.5, color='blue', edgecolor='black', label='Normal')

# Detect outliers using IQR or other methods (replace this with your outlier detection code)
Q1 = np.percentile(completion_times, 25)
Q3 = np.percentile(completion_times, 75)
IQR = Q3 - Q1
outlier_threshold = 1.5 * IQR
outliers = completion_times[(completion_times < Q1 - outlier_threshold) | (completion_times > Q3 + outlier_threshold)]

# Plot histogram bars for outliers in red
plt.hist(outliers, bins=20, alpha=0.5, color='red', edgecolor='black', label='Outliers')

plt.xlabel('Completion Time')
plt.ylabel('Frequency')
plt.title('Distribution of Completion Time')
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
plt.legend()
plt.tight_layout()  # Adjust layout for better spacing
plt.show()

# Filter out outliers based on completion time
df_cleaned = df_cleaned[df_cleaned['anomaly'] == 1]

# Drop the 'anomaly' column
df_cleaned = df_cleaned.drop('anomaly', axis=1)

print("Previous shape:", df.shape)
print("New shape after removing outliers based on completion time:", df_cleaned.shape)

df_cleaned.head(14)


## Data Breakdown

Since we've cleaned and preprocessed the data, let's now see the completion times without outliers

In [None]:
# Show the mean, standard deviation, and quartiles of the completion time of each config (remove feaatures back)
df_analysis = df_cleaned.drop(['nick', 'size', 'color', 'position'], axis=1)
df_analysis = df_analysis.groupby('config_num')

# Print best and worst configurations based on completion time
print("Best configuration based on completion time:")
print(df_analysis['completion_time'].mean().idxmin())
print("Worst configuration based on completion time:")
print(df_analysis['completion_time'].mean().idxmax())

df_analysis.describe()


# Model

## Initial Preprocessing

First we need to convert our initial categorical features into numerical values.

In [None]:
# Convert size, color, and position to numerical values (0, 1, 2)
df_cleaned['size'] = df_cleaned['size'].apply(lambda x: 1 if x == 'small' else 0 if x == 'regular' else 2)
df_cleaned['color'] = df_cleaned['color'].apply(lambda x: 1 if x == 'blue' else 2 if x == 'black' else 0)
df_cleaned['position'] = df_cleaned['position'].apply(lambda x: 1 if x == 'sticky' else 2 if x == 'top' else 0)

To help us decide whether we would keep the mistakes column as a feature of the our model, we first check how many mistakes people made in our experiment.

In [None]:
mistakes_num = df_cleaned['mistakes'].value_counts()
mistakes_num

In [None]:
mistake_rate = mistakes_num[1] / (mistakes_num[0] + mistakes_num[1])
mistake_rate

Since the mistake rate is low, we can leave it out. The features of the model are the size, color, and position, and the predicted value is the completion time. The config_num is also disregarded, as it is redundant. It can be derived from the unique combination of size, color, and position. This avoids multicollinearity in our model.

We can now begin splitting the data for our models to use. We conduct an 80-20 train-test split. Validation split is no longer needed because it's done by the model.

In [None]:
# Split data into train, and test sets. Goal is completion time
X = df_cleaned[['size', 'color', 'position']]
y = df_cleaned['completion_time']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

# Count number of samples per split
print("Total number of samples:", len(X))
print("Number of samples in training set:", len(X_train))
print("Number of samples in test set:", len(X_test))

## Model Training

### Training

We call RandomForestRegressor from scikitlearn and fit our training data.  

In [None]:
rf = RandomForestRegressor()

rf.fit(X_train, y_train)

### Evaluation

In [None]:
train_predictions = rf.predict(X_train)
test_predictions = rf.predict(X_test)

After making the predictions, we evaluate our model using the mean absolute error (MAE), mean squared error (MSE), R^2, and mean absolute percentage error (MAPE) metrics.

In [None]:
# Convert 'y_test' and 'y_test_pred' to numeric types
y_test = pd.to_numeric(y_test)
test_predictions = pd.to_numeric(test_predictions)

# Calculate evaluation metrics for the test set
mae_test = mean_absolute_error(y_test, test_predictions)
mse_test = mean_squared_error(y_test, test_predictions)
r2_test = r2_score(y_test, test_predictions)
mape = np.mean(np.abs((y_test - test_predictions) / np.abs(y_test)))

# Print or log the evaluation metrics for the test set
training_evaluation_df = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R^2', 'Mean Absolute Percentage Error'],
    'Value': [mae_test, mse_test, r2_test, mape]
})

print("Training Evaluation Metrics:")
print(training_evaluation_df)

To provide additional context to the MAE metric, we get the range of the completion times. 

In [None]:
df_cleaned['completion_time'] = df_cleaned['completion_time'].astype(float)
max_time = df_cleaned['completion_time'].max()
min_time = df_cleaned['completion_time'].min()
min_time, max_time

### Findings

The relatively low MAE is indicates that on average, the model's predictions are pretty close to the actual values. This is a relatively low value as the range of the predicted value (completion time) is from `21.826` to `50.157`.

The MSE metric is fairly high. This may be due to the limited number of samples in the dataset, where the model is not able to identify key patterns in the data to be able to make predictions effectively.

The low R^2 score suggests that the model explains a small portion of variability in the target variable.

The MAPE value indicates that on average, the predictions are `15.90%` off from the actual values.



## Hyperparameter Tuning

We use GridSearchCV to find the best parameters for the model that would lead to the best results.

### Hyperparameter Searching (GridSearchCV)

In [None]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=5)

In [None]:
grid_search.fit(X_train, y_train)

In [None]:
best_params = grid_search.best_params_
best_params

The above parameters turned out to be optimal for our model. 

### Model Training

We train a new model with our optimal parameters.

In [None]:
tuned_rf = RandomForestRegressor(**best_params)
tuned_rf.fit(X_train, y_train)

In [None]:
train_predictions = tuned_rf.predict(X_train)
test_predictions = tuned_rf.predict(X_test)

### Evaluation

In [None]:
# Convert 'y_test' and 'y_test_pred' to numeric types
y_test = pd.to_numeric(y_test)
test_predictions = pd.to_numeric(test_predictions)

# Calculate evaluation metrics for the test set
mae_test = mean_absolute_error(y_test, test_predictions)
mse_test = mean_squared_error(y_test, test_predictions)
r2_test = r2_score(y_test, test_predictions)
mape = np.mean(np.abs((y_test - test_predictions) / np.abs(y_test)))

# Print or log the evaluation metrics for the test set
tuned_evaluation_df = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R^2', 'Mean Absolute Percentage Error'],
    'Value': [mae_test, mse_test, r2_test, mape]
})

print("Tuned Evaluation Metrics:")
print(tuned_evaluation_df)

In [None]:
# Compare the evaluation metrics of the baseline and tuned models
# Create a DataFrame to store the evaluation metrics by comining the baseline and tuned evaluation metrics.
# Combine them on the 'Metric' column
evaluation_metrics_df = pd.merge(training_evaluation_df, tuned_evaluation_df, on='Metric', suffixes=('_baseline', '_tuned'))

# Add a column to show the improvement in the metric after tuning
evaluation_metrics_df['Improvement'] = evaluation_metrics_df['Value_baseline'] - evaluation_metrics_df['Value_tuned']

evaluation_metrics_df

After getting the evaluation metrics of the baseline model and the hyperparameter tuned model, we observe:
- `01.43%` improvement in *Mean Absolute Error (MAE)*
- `16.26%` improvement in *Mean Squared Error (MSE)*
- `-00.35%` improvement in *R^2*; and
- `00.07%` improvement in *Mean Absolute Percentage Error (MAPE)*

### Findings

The evaluation metrics of the tuned model is very similar to the performance of the original model. The MAE and MSE are slightly lesser in the tuned model than in the original model, which indicates that the tuned model are slightly closer to the actual values.

The R^2 score is slightly higher for the tuned model, which suggests that its explains more variance in the target variable compared to the original model.

The MAPE is slightly lower for the tuned model, indicating that the predicted completion times are slightly closer to the actual values.

Overall, despite the slight improvements, the tuning process did not significantly improve the model's performance. One possible reason is that the model was already performing at its maximum performance even before the tuning. The nature of the dataset such as the features and the number of samples is also a possible reason for the insignficant difference in model performance before and after the tuning.

# Best Features

With our model trained, we can also figure out how important each feature is in predicting the completion time. Let's see what these are.

In [None]:
# Get feature importances from the tuned random forest model
importances_df = pd.DataFrame(tuned_rf.feature_importances_, index=['size', 'color', 'position'], columns=['Importance'])
importances_df = importances_df.sort_values('Importance', ascending=False)

# Resetting the index to convert the dataframe into a long-format dataframe expected by seaborn
importances_df.reset_index(inplace=True)
importances_df.rename(columns={'index': 'Feature'}, inplace=True)

# Horizontal bar plot of feature importances
plt.figure(figsize=(8, 2))
sns.barplot(x='Importance', y="Feature", data=importances_df, hue=importances_df.index, dodge=False)
plt.title('Feature Importances')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.tight_layout()
plt.show()

# Print values
importances_df

It appears that `color` plays the largest role in affecting a participant's completion time, where it contributes `37.03%` in the model's predictions.

We can also figure out which features will provide us the lowest completion time. Let's grab the sample with the lowest completion time and see what these are.

In [None]:
# Find the lowest possible completion time by going through all possible combinations of size, color, and position
# Create a list of all possible combinations of size, color, and position
size_values = [0, 1, 2]
color_values = [0, 1, 2]
position_values = [0, 1, 2]

# Combine all possible values of size, color, and position
all_combinations = [(size, color, position) for size in size_values for color in color_values for position in position_values]

# Predict completion times for all possible combinations
completion_times = tuned_rf.predict(all_combinations)

# Find the combination with the lowest completion time
min_completion_time = min(completion_times)
min_completion_time_idx = np.argmin(completion_times)
best_size, best_color, best_position = all_combinations[min_completion_time_idx]

# Create a dataframe to store the completion times for all possible combinations
completion_times_df = pd.DataFrame({
    'Size': [size for size, _, _ in all_combinations],
    'Color': [color for _, color, _ in all_combinations],
    'Position': [position for _, _, position in all_combinations],
    'Completion Time': completion_times
})

# Find the highest possible completion time by going through all possible combinations of size, color, and position
# Find the combination with the highest completion time
max_completion_time = max(completion_times)
max_completion_time_idx = np.argmax(completion_times)
worst_size, worst_color, worst_position = all_combinations[max_completion_time_idx]

# Add the best and worst combinations to the completion_times_df
completion_times_df = pd.concat([completion_times_df, pd.DataFrame({
    'Size': [best_size, worst_size],
    'Color': [best_color, worst_color],
    'Position': [best_position, worst_position],
    'Completion Time': [min_completion_time, max_completion_time]
})], ignore_index=True)


# Print the best and worst completion times and their corresponding configurations through the df
print("Best Configuration:")
print(completion_times_df[completion_times_df['Completion Time'] == min_completion_time])

print("\nWorst Configuration:")
print(completion_times_df[completion_times_df['Completion Time'] == max_completion_time])

# Plot the completion times for all possible combinations
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')

# Scatter plot of completion times for all possible combinations
sc = ax.scatter(completion_times_df['Size'], completion_times_df['Color'], completion_times_df['Position'], c=completion_times_df['Completion Time'], cmap='viridis', s=100)

# Add labels and title
ax.set_xlabel('Size')
ax.set_ylabel('Color')
ax.set_zlabel('Position')
plt.title('Completion Times for All Possible Combinations')

# Add a colorbar to show the completion time values
cbar = plt.colorbar(sc)
cbar.set_label('Completion Time')

plt.tight_layout()
plt.show()

In [None]:
# Print out the best size, color, and position in a human-readable format
size_dict = {0: 'Regular', 1: 'Small', 2: 'Large'}
color_dict = {0: 'Yellow', 1: 'Blue', 2: 'Black'}
position_dict = {0: 'Top', 1: 'Sticky', 2: 'Bottom'}

best_size_str = size_dict[best_size]
best_color_str = color_dict[best_color]
best_position_str = position_dict[best_position]

worst_size_str = size_dict[worst_size]
worst_color_str = color_dict[worst_color]
worst_position_str = position_dict[worst_position]

configuration_df = pd.DataFrame({
    'Configuration': ['Best', 'Worst'],
    'Size': [best_size_str, worst_size_str],
    'Color': [best_color_str, worst_color_str],
    'Position': [best_position_str, worst_position_str],
    'Completion Time': [min_completion_time, max_completion_time]
})

configuration_df

Upon further inspection, we find out that the best configuration was our original `Configuration 1` which was the small button size.

### Heatmap Attention Score and Completion Time Analysis

Please refer to the heatmap_analysis.ipynb file for details on how the attention scores were derived.

In [None]:
heatmap_results = pd.read_csv('heatmap_analysis_results.csv')
heatmap_results

Here, we check the relationship between the attention score of the checkout button and the completion time. 

In [None]:
checkout = heatmap_results['checkout']

avg_completion_time = df_cleaned.groupby(['config_num'])['completion_time'].mean()

colors = ['red', 'blue', 'green', 'orange', 'purple', 'yellow', 'pink']

for config_num, color in zip(avg_completion_time.index, colors):
    plt.scatter(checkout[avg_completion_time.index == config_num],
                avg_completion_time[avg_completion_time.index == config_num],
                color=color,
                label=f'Config Num: {config_num}')

plt.xlabel('Checkout Attention Score')
plt.ylabel('Average Completion Time')
plt.title('Scatterplot of Checkout Attention Score vs Average Completion Time')
plt.legend()
plt.show()


In [None]:
ct_checkout_corr = avg_completion_time.corr(checkout)
ct_checkout_corr

The negative correlation indicates that as the completion time increases, the attention put on the checkout button decreases. 

Now, we investigate the relationship between the attention score of the quantity dropdowns and the completion time.

In [None]:
qty_score = heatmap_results.iloc[:, :7].mean()

avg_completion_time = df_cleaned.groupby(['config_num'])['completion_time'].mean()

colors = ['red', 'blue', 'green', 'orange', 'purple', 'yellow', 'pink']

for config_num, color in zip(avg_completion_time.index, colors):
    plt.scatter(qty_score[avg_completion_time.index == config_num],
                avg_completion_time[avg_completion_time.index == config_num],
                color=color,
                label=f'Config Num: {config_num}')

plt.xlabel('Qty Button Attention Score')
plt.ylabel('Average Completion Time')
plt.title('Scatterplot of Qty Button Attention Score vs Average Completion Time')
plt.legend()
plt.show()

In [None]:
qty_score = qty_score.reset_index(drop=True)
ct_qty_corr = avg_completion_time.corr(qty_score)
ct_qty_corr

The negative correlation indicates that as the completion time increases, the attention put on the quantity dropdowns decreases. The quantitiy dropdowns have a stronger negative correlation with the completion times compared to the checkout button. 

# Model variations

Since our model dataset is quite simple (config number) and was simply pre-processed into several features based on the configuration number, let's see how it's like with different model variations.

In [None]:
# For the different variations, we will mix the features by removing one feature at a time
# cleaned_df variations
cleaned_df_no_size = df_cleaned.drop('size', axis=1)
cleaned_df_no_color = df_cleaned.drop('color', axis=1)
cleaned_df_no_position = df_cleaned.drop('position', axis=1)

# Split the data into train and test sets
X_no_size = cleaned_df_no_size[['color', 'position']]
X_no_color = cleaned_df_no_color[['size', 'position']]
X_no_position = cleaned_df_no_position[['size', 'color']]
y = df_cleaned['completion_time']

X_train_no_size, X_test_no_size, y_train, y_test = train_test_split(X_no_size, y, test_size=0.2, random_state=SEED)
X_train_no_color, X_test_no_color, y_train, y_test = train_test_split(X_no_color, y, test_size=0.2, random_state=SEED)
X_train_no_position, X_test_no_position, y_train, y_test = train_test_split(X_no_position, y, test_size=0.2, random_state=SEED)

# Train a random forest model on each variation
rf_no_size = RandomForestRegressor(random_state=SEED)
rf_no_color = RandomForestRegressor(random_state=SEED)
rf_no_position = RandomForestRegressor(random_state=SEED)

rf_no_size.fit(X_train_no_size, y_train)
rf_no_color.fit(X_train_no_color, y_train)
rf_no_position.fit(X_train_no_position, y_train)

# Make predictions on the test set for each variation
test_predictions_no_size = rf_no_size.predict(X_test_no_size)
test_predictions_no_color = rf_no_color.predict(X_test_no_color)
test_predictions_no_position = rf_no_position.predict(X_test_no_position)

# Calculate evaluation metrics for each variation
mae_no_size = mean_absolute_error(y_test, test_predictions_no_size)
mae_no_color = mean_absolute_error(y_test, test_predictions_no_color)
mae_no_position = mean_absolute_error(y_test, test_predictions_no_position)

mse_no_size = mean_squared_error(y_test, test_predictions_no_size)
mse_no_color = mean_squared_error(y_test, test_predictions_no_color)
mse_no_position = mean_squared_error(y_test, test_predictions_no_position)

r2_no_size = r2_score(y_test, test_predictions_no_size)
r2_no_color = r2_score(y_test, test_predictions_no_color)
r2_no_position = r2_score(y_test, test_predictions_no_position)

In [None]:
# Print the evaluation metrics for each variation
evaluation_metrics_no_size = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R^2'],
    'Value': [mae_no_size, mse_no_size, r2_no_size]
})

evaluation_metrics_no_color = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R^2'],
    'Value': [mae_no_color, mse_no_color, r2_no_color]
})

evaluation_metrics_no_position = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R^2'],
    'Value': [mae_no_position, mse_no_position, r2_no_position]
})

# Column names for the evaluation metrics DataFrames
evaluation_metrics_no_size['Feature'] = 'Size'
evaluation_metrics_no_color['Feature'] = 'Color'
evaluation_metrics_no_position['Feature'] = 'Position'

# Combine the evaluation metrics for each variation into a single DataFrame
evaluation_metrics_combined = pd.concat([evaluation_metrics_no_size, evaluation_metrics_no_color, evaluation_metrics_no_position])

evaluation_metrics_combined

In [None]:
# Predict the fastest completion time and its features usign the variations of cleaned_df
# Find the lowest possible completion time by going through all possible combinations of size, color, and position
# Create a list of all possible combinations of size, color, and position
feature_one_values = [0, 1, 2]
feature_two_values = [0, 1, 2]

# Combine all possible values of size, color, and position
all_combinations = [(feature_one, feature_two) for feature_one in feature_one_values for feature_two in feature_two_values]

# Predict completion times for all possible combinations using the models trained on the variations
completion_times_no_size = rf_no_size.predict(all_combinations)
completion_times_no_color = rf_no_color.predict(all_combinations)
completion_times_no_position = rf_no_position.predict(all_combinations)

# Create a dataframe to store the completion times for all possible combinations
completion_times_no_size_df = pd.DataFrame({
    'Color': [feature_one for feature_one, _ in all_combinations],
    'Position': [feature_two for _, feature_two in all_combinations],
    'Completion Time': completion_times_no_size
})

completion_times_no_color_df = pd.DataFrame({
    'Size': [feature_one for feature_one, _ in all_combinations],
    'Position': [feature_two for _, feature_two in all_combinations],
    'Completion Time': completion_times_no_color
})

completion_times_no_position_df = pd.DataFrame({
    'Size': [feature_one for feature_one, _ in all_combinations],
    'Color': [feature_two for _, feature_two in all_combinations],
    'Completion Time': completion_times_no_position
})

# Add the missing feature as -1 to each variation
completion_times_no_size_df['Size'] = -1
completion_times_no_color_df['Color'] = -1
completion_times_no_position_df['Position'] = -1

# Find the lowest completion time for each variation
min_completion_time_no_size = min(completion_times_no_size)
min_completion_time_no_color = min(completion_times_no_color)
min_completion_time_no_position = min(completion_times_no_position)

# Get feature values for the best completion time for each variation
best_no_size_features_idx = np.argmin(completion_times_no_size)
best_no_color_features_idx = np.argmin(completion_times_no_color)
best_no_position_features_idx = np.argmin(completion_times_no_position)

best_no_size_features = all_combinations[best_no_size_features_idx]
best_no_color_features = all_combinations[best_no_color_features_idx]
best_no_position_features = all_combinations[best_no_position_features_idx]


# Combine the best configurations for each variation into a single DataFrame
# no_size = ['color', 'position']
# no_color = ['size', 'position']
# no_position = ['size', 'color']
best_configurations_df = pd.DataFrame({
    'Feature': ['Size', 'Color', 'Position'],
    'Best Completion Time': [min_completion_time_no_size, min_completion_time_no_color, min_completion_time_no_position],
    'Size': [-1, best_no_color_features[0], best_no_position_features[1]],
    'Color': [best_no_size_features[0], -1, best_no_position_features[1]],
    'Position': [best_no_size_features[0], best_no_color_features[1], -1]
})

best_configurations_df

## Config number instead of features

In [None]:
# Train with config number instead of size, color, and position
X = df_cleaned[['config_num']]
y = df_cleaned['completion_time']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

rf_config_num = RandomForestRegressor(random_state=SEED)
rf_config_num.fit(X_train, y_train)

test_predictions = rf_config_num.predict(X_test)

# Calculate evaluation metrics for the test set
mae_test = mean_absolute_error(y_test, test_predictions)
mse_test = mean_squared_error(y_test, test_predictions)
r2_test = r2_score(y_test, test_predictions)

# Print or log the evaluation metrics for the test set
evaluation_metrics_config_num = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R^2'],
    'Value': [mae_test, mse_test, r2_test]
})

evaluation_metrics_config_num

In [None]:
# Grid Search for config_num
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(), param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

# Train a random forest model on the config_num feature with the best hyperparameters
tuned_rf_config_num = RandomForestRegressor(**best_params)
tuned_rf_config_num.fit(X_train, y_train)

test_predictions = tuned_rf_config_num.predict(X_test)

# Calculate evaluation metrics for the test set
mae_test = mean_absolute_error(y_test, test_predictions)
mse_test = mean_squared_error(y_test, test_predictions)
r2_test = r2_score(y_test, test_predictions)

# Print or log the evaluation metrics for the test set
tuned_evaluation_metrics_config_num = pd.DataFrame({
    'Metric': ['Mean Absolute Error', 'Mean Squared Error', 'R^2'],
    'Value': [mae_test, mse_test, r2_test]
})

tuned_evaluation_metrics_config_num

In [None]:
# Get the lowest possible completion time and its configuration using the config_num feature
# Find the lowest completion time by going through all possible config numbers
# Create a list of all possible config numbers
config_nums = np.arange(0, 7)

# Predict completion times for all possible config numbers
completion_times_config_num = rf_config_num.predict(config_nums.reshape(-1, 1))
completion_times_config_num_tuned = tuned_rf_config_num.predict(config_nums.reshape(-1, 1))

# Find the lowest completion time
min_completion_time_config_num = min(completion_times_config_num)
best_config_num = config_nums[np.argmin(completion_times_config_num)]

# Find the lowest completion time after tuning
min_completion_time_config_num_tuned = min(completion_times_config_num_tuned)
best_config_num_tuned = config_nums[np.argmin(completion_times_config_num_tuned)]

# Find the highest completion time
max_completion_time_config_num = max(completion_times_config_num)
worst_config_num = config_nums[np.argmax(completion_times_config_num)]

# Find the highest completion time after tuning
max_completion_time_config_num_tuned = max(completion_times_config_num_tuned)
worst_config_num_tuned = config_nums[np.argmax(completion_times_config_num_tuned)]

# Create a dataframe to store the completion times for all possible config numbers including tuned
completion_times_config_num_df = pd.DataFrame({
    'Config Num': config_nums,
    'Completion Time': completion_times_config_num
})

completion_times_config_num_tuned_df = pd.DataFrame({
    'Config Num': config_nums,
    'Completion Time': completion_times_config_num_tuned
})

# Create a DataFrame to store the best and worst completion times and their corresponding configurations
configurations_df = pd.DataFrame({
    'Configuration': ['Best', 'Worst'],
    'Config Num': [best_config_num, worst_config_num],
    'Completion Time': [min_completion_time_config_num, max_completion_time_config_num]
})

configurations_tuned_df = pd.DataFrame({
    'Configuration': ['Best', 'Worst'],
    'Config Num': [best_config_num_tuned, worst_config_num_tuned],
    'Completion Time': [min_completion_time_config_num_tuned, max_completion_time_config_num_tuned]
})

# Print the best and worst completion times and their corresponding configurations
print("Best Configuration:")
print(configurations_df[configurations_df['Completion Time'] == min_completion_time_config_num])

print("\nWorst Configuration:")
print(configurations_df[configurations_df['Completion Time'] == max_completion_time_config_num])

print("\nBest Configuration After Tuning:")
print(configurations_tuned_df[configurations_tuned_df['Completion Time'] == min_completion_time_config_num_tuned])

print("\nWorst Configuration After Tuning:")
print(configurations_tuned_df[configurations_tuned_df['Completion Time'] == max_completion_time_config_num_tuned])