In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

np.random.seed(42)
random_state = 42

In [7]:
modelNames = ["r00_g10_3000k",
                "r01_g10_3000k",
                "r02_g10_3000k",
                "r03_g10_3000k",
                "r04_g10_3000k",
                "r05_g10_3000k",
                "r06_g10_3000k",
                "r07_g10_3000k",
                "r08_g10_3000k",
                "r09_g10_3000k",
                "r10_g10_3000k",
                "r10_g09_3000k",
                "r10_g08_3000k",
                "r10_g07_3000k",
                "r10_g06_3000k",
                "r10_g05_3000k",
                "r10_g04_3000k",
                "r10_g03_3000k",
                "r10_g02_3000k",
                "r10_g01_3000k",
                "r10_g00_3000k"]


# all 21 act datasets
act_r00_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r00_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r01_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r01_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r02_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r02_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r03_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r03_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r04_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r04_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r05_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r05_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r06_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r06_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r07_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r07_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r08_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r08_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r09_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r09_g10_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g00_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g00_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g01_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g01_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g02_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g02_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g03_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g03_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g04_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g04_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g05_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g05_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g06_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g06_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g07_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g07_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g08_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g08_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g09_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g09_3000k.csv", header=0, index_col=None, float_precision='high')
act_r10_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g10_3000k.csv", header=0, index_col=None, float_precision='high')

# all 21 shap datasets
shap_r00_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r00_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r01_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r01_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r02_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r02_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r03_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r03_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r04_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r04_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r05_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r05_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r06_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r06_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r07_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r07_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r08_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r08_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r09_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r09_g10_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g00_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g00_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g01_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g01_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g02_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g02_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g03_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g03_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g04_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g04_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g05_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g05_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g06_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g06_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g07_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g07_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g08_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g08_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g09_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g09_3000k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g10_3000k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g10_3000k.csv", header=0, index_col=None, float_precision='high')




act_r10_g10_3000k.head()

Unnamed: 0,target,00r,00g,00b,01r,01g,01b,02r,02g,02b,...,a119,a120,a121,a122,a123,a124,a125,a126,a127,a128
0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.346661,-0.041218,-0.399761,-0.15461,-0.082005,-0.385907,-0.02098,-0.095356,-0.057807,-0.046994
1,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.353112,-0.038465,-0.307965,-0.113183,-0.069152,-0.372982,-0.022941,-0.116544,-0.052728,-0.045544
2,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,...,-0.259241,-0.046841,-0.364604,-0.115042,-0.097405,-0.301718,-0.025251,-0.078899,-0.06258,-0.045875
3,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.183448,-0.045546,-0.467831,-0.117987,-0.079052,-0.253726,-0.027061,-0.006901,-0.054231,-0.047483
4,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.182716,-0.040429,-0.35208,-0.109351,-0.080584,-0.31818,-0.023514,0.003143,-0.05569,-0.045628


#### Create shap background data

In [8]:
#all_act_datasets = pd.concat([act_r00_g10_3000k, act_r01_g10_3000k, act_r02_g10_3000k, act_r03_g10_3000k, act_r04_g10_3000k, act_r05_g10_3000k, act_r06_g10_3000k, act_r07_g10_3000k, act_r08_g10_3000k, act_r09_g10_3000k, act_r10_g00_3000k, act_r10_g01_3000k, act_r10_g02_3000k, act_r10_g03_3000k, act_r10_g04_3000k, act_r10_g05_3000k, act_r10_g06_3000k, act_r10_g07_3000k, act_r10_g08_3000k, act_r10_g09_3000k, act_r10_g10_3000k])
#only_pixels_dataset = all_act_datasets.drop([f'a{i}' for i in range(1, 129)], axis=1).copy()
#only_pixels_dataset = only_pixels_dataset.drop(['target'], axis=1)

# save csv
#only_pixels_dataset.to_csv("C:/Projects/public/XAI_Master/datasets/42000_gamestates.csv", index=False)

In [None]:
# Merge and shuffle(?) the datasets
act_train = pd.concat([act_r01_g10_3000k, act_r02_g10_3000k, act_r03_g10_3000k, act_r04_g10_3000k, act_r05_g10_3000k, act_r06_g10_3000k, act_r07_g10_3000k, act_r08_g10_3000k, act_r09_g10_3000k, act_r10_g01_3000k, act_r10_g02_3000k, act_r10_g03_3000k, act_r10_g04_3000k, act_r10_g05_3000k, act_r10_g06_3000k, act_r10_g07_3000k, act_r10_g08_3000k, act_r10_g09_3000k], ignore_index=True)
act_test = pd.concat([act_r00_g10_3000k, act_r10_g00_3000k], ignore_index=True) # leave out dataset: act_r10_g10_3000k
act_x_train = act_train.drop('target', axis=1)
act_y_train = act_train['target']
act_x_test = act_test.drop('target', axis=1)
act_y_test = act_test['target']
# Apply MinMaxScaler
act_scaler = MinMaxScaler()
act_x_train = pd.DataFrame(act_scaler.fit_transform(act_x_train), columns=act_x_train.columns)
act_x_test = pd.DataFrame(act_scaler.transform(act_x_test), columns=act_x_test.columns)

shap_train = pd.concat([shap_r01_g10_3000k, shap_r02_g10_3000k, shap_r03_g10_3000k, shap_r04_g10_3000k, shap_r05_g10_3000k, shap_r06_g10_3000k, shap_r07_g10_3000k, shap_r08_g10_3000k, shap_r09_g10_3000k, shap_r10_g01_3000k, shap_r10_g02_3000k, shap_r10_g03_3000k, shap_r10_g04_3000k, shap_r10_g05_3000k, shap_r10_g06_3000k, shap_r10_g07_3000k, shap_r10_g08_3000k, shap_r10_g09_3000k], ignore_index=True)
shap_test = pd.concat([shap_r00_g10_3000k, shap_r10_g00_3000k], ignore_index=True) # leave out dataset: shap_r10_g10_3000k
shap_x_train = shap_train.drop('target', axis=1)
shap_y_train = shap_train['target']
shap_x_test = shap_test.drop('target', axis=1)
shap_y_test = shap_test['target']
# Apply MinMaxScaler
shap_scaler = MinMaxScaler()
shap_x_train = pd.DataFrame(shap_scaler.fit_transform(shap_x_train), columns=shap_x_train.columns)
shap_x_test = pd.DataFrame(shap_scaler.transform(shap_x_test), columns=shap_x_test.columns)

shap_cols = [f"shap{i}{j}{c}" for i in range(7) for j in range(7) for c in ['r', 'g', 'b']]
non_shap_cols = [f"{i}{j}{c}" for i in range(7) for j in range(7) for c in ['r', 'g', 'b']]
shap_sum_x_train = shap_x_train.drop(columns=shap_cols).copy()
shap_sum_x_train[non_shap_cols] += shap_x_train[shap_cols].values
shap_sum_x_test = shap_x_test.drop(columns=shap_cols).copy()
shap_sum_x_test[non_shap_cols] += shap_x_test[shap_cols].values
shap_sum_y_train = shap_y_train
shap_sum_y_test = shap_y_test

pixels_train_from_act = act_train.drop([f'a{i}' for i in range(1, 129)], axis=1).copy()
pixels_train_from_shap = shap_train.drop([f'shap{i}{j}{c}' for i in range(7) for j in range(7) for c in ['r','g','b']], axis=1).copy()
pixels_test_from_act = act_test.drop([f'a{i}' for i in range(1, 129)], axis=1).copy()
pixels_test_from_shap = shap_test.drop([f'shap{i}{j}{c}' for i in range(7) for j in range(7) for c in ['r','g','b']], axis=1).copy()
#pixels_train = pd.concat([pixels_train_from_act, pixels_train_from_shap], axis=1)
#pixels_test = pd.concat([pixels_test_from_act, pixels_test_from_shap], axis=1)
pixels_act_x_train = pixels_train_from_act.drop('target', axis=1)
pixels_act_y_train = pixels_train_from_act['target']
pixels_act_x_test = pixels_test_from_act.drop('target', axis=1)
pixels_act_y_test = pixels_test_from_act['target']
pixels_shap_x_train = pixels_train_from_shap.drop('target', axis=1)
pixels_shap_y_train = pixels_train_from_shap['target']
pixels_shap_x_test = pixels_test_from_shap.drop('target', axis=1)
pixels_shap_y_test = pixels_test_from_shap['target']

# Apply MinMaxScaler
#pixels_scaler = MinMaxScaler()
#pixels_x_train = pd.DataFrame(pixels_scaler.fit_transform(pixels_x_train), columns=pixels_x_train.columns)
#pixels_x_test = pd.DataFrame(pixels_scaler.transform(pixels_x_test), columns=pixels_x_test.columns)


shap_sum_x_train.head()
pixels_train_from_act.head()

Unnamed: 0,target,00r,00g,00b,01r,01g,01b,02r,02g,02b,...,63b,64r,64g,64b,65r,65g,65b,66r,66g,66b
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
def train_and_test(model, x_train, y_train, x_test, y_test):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    if isinstance(y_pred[0], float):
        y_pred = np.clip(np.round(y_pred), -1, 1) # Round y_pred to int, max 1 and min -1
    return accuracy_score(y_test, y_pred)

dataDict = {
    'pixels_act': {
        'data': (pixels_act_x_train, pixels_act_y_train, pixels_act_x_test, pixels_act_y_test)},
    'pixels_shap': {
        'data': (pixels_shap_x_train, pixels_shap_y_train, pixels_shap_x_test, pixels_shap_y_test)},
    'act': {
        'data': (act_x_train, act_y_train, act_x_test, act_y_test)},
    'shap': {
        'data': (shap_x_train, shap_y_train, shap_x_test, shap_y_test)},
    'shap_sum': {
        'data': (shap_sum_x_train, shap_sum_y_train, shap_sum_x_test, shap_sum_y_test)}}

models = {
    'linear_regression': lambda: LinearRegression(),
    'decision_tree': lambda: DecisionTreeClassifier(),
    'random_forest': lambda: RandomForestClassifier(),
    'hist_gradient_boosting': lambda: HistGradientBoostingClassifier()}

for dataset in ['pixels_act', 'pixels_shap', 'act', 'shap', 'shap_sum']:  
    for model_name, model_fn in models.items():
        model = model_fn()  # Creating a new model instance, ensuring no transfer learning
        accuracy = train_and_test(model, *dataDict[dataset]['data'])
        print(f"{dataset}: {model_name} - Accuracy: {accuracy:.2%}")

pixels_act: decision_tree - Accuracy: 50.28%
pixels_act: hist_gradient_boosting - Accuracy: 49.15%
pixels_shap: decision_tree - Accuracy: 48.68%
pixels_shap: hist_gradient_boosting - Accuracy: 50.85%
act: decision_tree - Accuracy: 33.27%
act: hist_gradient_boosting - Accuracy: 89.62%
shap: decision_tree - Accuracy: 66.40%
shap: hist_gradient_boosting - Accuracy: 78.47%
shap_sum: decision_tree - Accuracy: 66.33%
shap_sum: hist_gradient_boosting - Accuracy: 70.23%


In [11]:
actLinRegModel = LinearRegression()
actLinRegModel.fit(act_x_train, act_y_train)
act_y_pred = actLinRegModel.predict(act_x_test)
act_y_pred = np.clip(np.round(act_y_pred), -1, 1) # Round y_pred to int, max 1 and min -1
total_correct = (act_y_test == act_y_pred).sum()
total_samples = len(act_y_test)
print(f"Linear regression, Activation dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(act_y_test, act_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(act_y_test, act_y_pred)}")
cm = confusion_matrix(act_y_test, act_y_pred)
labels=["red", "fail", "green"]

pd.DataFrame(cm, index=[f"Actual {label}" for label in labels], columns=[f"Predicted {label}" for label in labels])



Linear regression, Activation dataset
Total correct predictions: 2022 out of 4000
Accuracy: 50.55%

Confusion matrix:
[[1992    1    0]
 [   2    0    0]
 [1754  221   30]]


Unnamed: 0,Predicted red,Predicted fail,Predicted green
Actual red,1992,1,0
Actual fail,2,0,0
Actual green,1754,221,30


Earlier..

Done:
- Train 21 models
- Create all activation and shap datasets
- Precicted unseen network datasets

Answered questions:
- Should the models retain the 5% epsilon during dataset creation? Yes
- Does the SHAP background data sound sound? Compare
- Should the decision tree recieve coordinates too? Format? Yes, no feature engineering

### Meeting 2025-02-24

Next up:
- Implement Gradcam

Questions:
- Should I switch to an environment where the agent can't see the goal? Yes
- What other XAI methods makes sense to implement after gradcam? Wait
- Should I spend time visualising these results to be able to put them into the thesis? Yes

Todo:
- Visualise using shap
- Alter environment




- Create a new baseline for how the models perform if one only plots the pixels