In [22]:
from google.colab import drive
drive.mount('/content/drive')
# Navigate to the project folder
# %cd YOUR PATH TO THE PROJECT FOLDER IN GOOGLE COLAB

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab_Notebooks/footprints_mtr/journal


In [23]:
import numpy as np
import pandas as pd
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score

In [24]:
from IPython.display import display, Latex

In [33]:
DATA_PATH = f"Data/Experiment_2"
RESULT_PATH = f"Bradley Trees/Results"
SAVE_PATH = f"Bradley Trees/Results"
SAVE_PATH

'Bradley Trees/Results'

In [34]:
# Load ground-truth data
y_test = pd.read_csv(f"{DATA_PATH}/y_test.csv", index_col=["f_id", "i_id"])#
print("Preview y_test: ")
print(y_test.head())
print(y_test.shape)

Preview y_test: 
               EMNA       PSO  RandomSearch
f_id i_id                                  
1    2     1.446916  1.147932      0.400261
     3     1.237159  1.054423      0.383157
     21    1.222564  1.050437      0.360819
     22    1.256922  1.065896      0.404110
     30    0.731482  0.689249      0.395334
(120, 3)


In [35]:
# Load BT predictions
y_predicted_best = pd.read_csv(f"{RESULT_PATH}/predictions_test.csv")
y_predicted_best.index = y_test.index
y_predicted_best = y_predicted_best.rename(columns={"label": "predicted_algorithm"})
print("Preview predictions: ")
print(y_predicted_best.head())
print(y_predicted_best.shape)

Preview predictions: 
          predicted_algorithm
f_id i_id                    
1    2           RandomSearch
     3           RandomSearch
     21          RandomSearch
     22          RandomSearch
     30          RandomSearch
(120, 1)


In [36]:
y_predicted_node = pd.read_csv(f"{RESULT_PATH}/predictions_test_node.csv")
y_predicted_node.index = y_test.index
y_predicted_node = y_predicted_node.rename(columns={"label": "node_id"})
print("Preview predictions: ")
print(y_predicted_node.head(5))
print(y_predicted_node.shape)

Preview predictions: 
           node_id
f_id i_id         
1    2           4
     3           4
     21          4
     22          5
     30          5
(120, 1)


### Latex table the Bradley Tree performance

In [37]:
# load data
X_train = pd.read_csv(f"{DATA_PATH}/X_train.csv", index_col = ["f_id", "i_id"])
X_test = pd.read_csv(f"{DATA_PATH}/X_test.csv", index_col = ["f_id", "i_id"])
y_train = pd.read_csv(f"{DATA_PATH}/y_train.csv", index_col = ["f_id", "i_id"])
y_train["true_best"] = y_train.idxmin(axis=1)
y_test["true_best"] = y_test.idxmin(axis=1)

In [38]:
# Init dummy classifier
dummy_clf = DummyClassifier(strategy="most_frequent")
# Train
dummy_clf.fit(X_train, y_train["true_best"])

In [40]:
# Score
score_df = pd.DataFrame({"model": "dummy", "f1_score": f1_score(y_test["true_best"], dummy_clf.predict(X_test), average='weighted')}, index=[0])
score_df = pd.concat([score_df, pd.DataFrame({"model": "BT", "f1_score": f1_score(y_test["true_best"], y_predicted_best["predicted_algorithm"], average='weighted')}, index=[0])], axis=0)
display(Latex(score_df.to_latex()))

<IPython.core.display.Latex object>

### Latex table to summarize the Bradley Tree predictions

In [13]:
df = pd.merge(y_predicted_best, y_predicted_node, left_index=True, right_index=True).reset_index()
df.head()

Unnamed: 0,f_id,i_id,predicted_algorithm,node_id
0,1,2,RandomSearch,4
1,1,3,RandomSearch,4
2,1,21,RandomSearch,4
3,1,22,RandomSearch,5
4,1,30,RandomSearch,5


In [14]:
df.shape

(120, 4)

In [15]:
# Group by 'predicted_algorithm' and 'node_id' and aggregate unique 'f_id' values into lists
df = df.groupby(['predicted_algorithm', 'node_id'])['f_id'].unique().apply(lambda x: ', '.join(map(str, x)))
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,f_id
predicted_algorithm,node_id,Unnamed: 2_level_1
EMNA,20,19
EMNA,21,9
EMNA,23,"10, 19"
RandomSearch,4,"1, 3, 6, 7, 8, 12, 13, 15"
RandomSearch,5,"1, 3, 4, 6, 8, 12, 13, 15, 18"


In [16]:
# Display the LaTeX code
display(Latex(df.to_latex()))

<IPython.core.display.Latex object>