In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

np.random.seed(42)
random_state = 42

In [3]:
modelNames = ["r00_g10_1500k",
              "r01_g10_1500k",
              "r02_g10_1500k",
              "r03_g10_1500k",
              "r04_g10_1500k",
              "r05_g10_1500k",
              "r06_g10_1500k",
              "r07_g10_1500k",
              "r08_g10_1500k",
              "r09_g10_1500k",
              "r10_g00_1500k",
              "r10_g01_1500k",
              "r10_g02_1500k",
              "r10_g03_1500k",
              "r10_g04_1500k",
              "r10_g05_1500k",
              "r10_g06_1500k",
              "r10_g07_1500k",
              "r10_g08_1500k",
              "r10_g09_1500k",
              "r10_g10_1500k"]

# all 21 act datasets
act_r00_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r00_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r01_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r01_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r02_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r02_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r03_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r03_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r04_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r04_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r05_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r05_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r06_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r06_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r07_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r07_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r08_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r08_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r09_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r09_g10_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g00_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g00_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g01_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g01_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g02_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g02_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g03_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g03_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g04_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g04_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g05_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g05_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g06_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g06_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g07_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g07_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g08_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g08_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g09_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g09_1500k.csv", header=0, index_col=None, float_precision='high')
act_r10_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/act_r10_g10_1500k.csv", header=0, index_col=None, float_precision='high')

# all 21 shap datasets
shap_r00_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r00_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r01_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r01_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r02_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r02_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r03_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r03_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r04_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r04_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r05_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r05_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r06_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r06_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r07_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r07_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r08_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r08_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r09_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r09_g10_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g00_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g00_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g01_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g01_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g02_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g02_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g03_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g03_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g04_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g04_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g05_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g05_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g06_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g06_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g07_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g07_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g08_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g08_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g09_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g09_1500k.csv", header=0, index_col=None, float_precision='high')
shap_r10_g10_1500k = pd.read_csv("C:/Projects/public/XAI_Master/datasets/shap_r10_g10_1500k.csv", header=0, index_col=None, float_precision='high')





act_r10_g10_1500k.head()

Unnamed: 0,target,00r,00g,00b,01r,01g,01b,02r,02g,02b,...,a119,a120,a121,a122,a123,a124,a125,a126,a127,a128
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,...,-0.370958,0.015664,0.200085,-0.10426,-0.042845,-0.14723,-0.252146,-0.114353,-0.054936,-0.079697
1,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.310895,0.02599,0.219086,-0.10122,-0.038705,-0.135953,-0.253984,-0.115242,-0.059519,-0.069593
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,...,-0.283055,-0.372314,-0.212476,-0.132545,-0.052872,-0.147475,-0.231721,-0.065279,-0.038018,-0.078415
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.424393,-0.338818,-0.157856,-0.135356,-0.053728,-0.122333,-0.116633,-0.060886,-0.031313,-0.087665
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,-0.308029,-0.315605,0.258038,-0.096551,-0.037616,-0.135054,-0.302021,-0.082572,-0.049799,-0.0616


In [4]:
shap_r10_g10_1500k.head()

Unnamed: 0,target,00r,00g,00b,01r,01g,01b,02r,02g,02b,...,shap63b,shap64r,shap64g,shap64b,shap65r,shap65g,shap65b,shap66r,shap66g,shap66b
0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.057482,-0.136571,0.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.069929,-0.179187,0.0


#### Create shap background data

In [5]:
#all_act_datasets = pd.concat([act_r00_g10_1500k, act_r01_g10_1500k, act_r02_g10_1500k, act_r03_g10_1500k, act_r04_g10_1500k, act_r05_g10_1500k, act_r06_g10_1500k, act_r07_g10_1500k, act_r08_g10_1500k, act_r09_g10_1500k, act_r10_g00_1500k, act_r10_g01_1500k, act_r10_g02_1500k, act_r10_g03_1500k, act_r10_g04_1500k, act_r10_g05_1500k, act_r10_g06_1500k, act_r10_g07_1500k, act_r10_g08_1500k, act_r10_g09_1500k, act_r10_g10_1500k])
#only_pixels_dataset = all_act_datasets.drop([f'a{i}' for i in range(1, 129)], axis=1).copy()
#only_pixels_dataset = only_pixels_dataset.drop(['target'], axis=1)

# save csv
#only_pixels_dataset.to_csv("C:/Projects/public/XAI_Master/datasets/42000_gamestates.csv", index=False)

In [6]:
# Merge and shuffle(?) the datasets
act_train = pd.concat([act_r01_g10_1500k, act_r02_g10_1500k, act_r03_g10_1500k, act_r04_g10_1500k, act_r05_g10_1500k, act_r06_g10_1500k, act_r07_g10_1500k, act_r08_g10_1500k, act_r09_g10_1500k, act_r10_g01_1500k, act_r10_g02_1500k, act_r10_g03_1500k, act_r10_g04_1500k, act_r10_g05_1500k, act_r10_g06_1500k, act_r10_g07_1500k, act_r10_g08_1500k, act_r10_g09_1500k], ignore_index=True)
act_test = pd.concat([act_r00_g10_1500k, act_r10_g00_1500k], ignore_index=True) # leave out on both datasets: act_r10_g10_1500k
act_x_train = act_train.drop('target', axis=1)
act_y_train = act_train['target']
act_x_test = act_test.drop('target', axis=1)
act_y_test = act_test['target']
# Apply MinMaxScaler
act_scaler = MinMaxScaler()
act_x_train = pd.DataFrame(act_scaler.fit_transform(act_x_train), columns=act_x_train.columns)
act_x_test = pd.DataFrame(act_scaler.transform(act_x_test), columns=act_x_test.columns)

shap_train = pd.concat([shap_r01_g10_1500k, shap_r02_g10_1500k, shap_r03_g10_1500k, shap_r04_g10_1500k, shap_r05_g10_1500k, shap_r06_g10_1500k, shap_r07_g10_1500k, shap_r08_g10_1500k, shap_r09_g10_1500k, shap_r10_g01_1500k, shap_r10_g02_1500k, shap_r10_g03_1500k, shap_r10_g04_1500k, shap_r10_g05_1500k, shap_r10_g06_1500k, shap_r10_g07_1500k, shap_r10_g08_1500k, shap_r10_g09_1500k], ignore_index=True)
shap_test = pd.concat([shap_r00_g10_1500k, shap_r10_g00_1500k], ignore_index=True) # leave out on both datasets: shap_r10_g10_1500k
shap_x_train = shap_train.drop('target', axis=1)
shap_y_train = shap_train['target']
shap_x_test = shap_test.drop('target', axis=1)
shap_y_test = shap_test['target']
# Apply MinMaxScaler
shap_scaler = MinMaxScaler()
shap_x_train = pd.DataFrame(shap_scaler.fit_transform(shap_x_train), columns=shap_x_train.columns)
shap_x_test = pd.DataFrame(shap_scaler.transform(shap_x_test), columns=shap_x_test.columns)

shap_cols = [f"shap{i}{j}{c}" for i in range(7) for j in range(7) for c in ['r', 'g', 'b']]
non_shap_cols = [f"{i}{j}{c}" for i in range(7) for j in range(7) for c in ['r', 'g', 'b']]
shap_sum_x_train = shap_x_train.drop(columns=shap_cols).copy()
shap_sum_x_train[non_shap_cols] += shap_x_train[shap_cols].values
shap_sum_x_test = shap_x_test.drop(columns=shap_cols).copy()
shap_sum_x_test[non_shap_cols] += shap_x_test[shap_cols].values
shap_sum_y_train = shap_y_train
shap_sum_y_test = shap_y_test

pixels_train_from_act = act_train.drop([f'a{i}' for i in range(1, 129)], axis=1).copy()
pixels_train_from_shap = shap_train.drop([f'shap{i}{j}{c}' for i in range(7) for j in range(7) for c in ['r','g','b']], axis=1).copy()
pixels_test_from_act = act_test.drop([f'a{i}' for i in range(1, 129)], axis=1).copy()
pixels_test_from_shap = shap_test.drop([f'shap{i}{j}{c}' for i in range(7) for j in range(7) for c in ['r','g','b']], axis=1).copy()
pixels_train = pd.concat([pixels_train_from_act, pixels_train_from_shap], axis=1)
pixels_test = pd.concat([pixels_test_from_act, pixels_test_from_shap], axis=1)
pixels_x_train = shap_train.drop('target', axis=1)
pixels_y_train = shap_train['target']
pixels_x_test = shap_test.drop('target', axis=1)
pixels_y_test = shap_test['target']
# Apply MinMaxScaler
pixels_scaler = MinMaxScaler()
pixels_x_train = pd.DataFrame(pixels_scaler.fit_transform(pixels_x_train), columns=pixels_x_train.columns)
pixels_x_test = pd.DataFrame(pixels_scaler.transform(pixels_x_test), columns=pixels_x_test.columns)

shap_sum_x_train.head()

Unnamed: 0,00r,00g,00b,01r,01g,01b,02r,02g,02b,03r,...,63b,64r,64g,64b,65r,65g,65b,66r,66g,66b
0,1.389624,1.328807,1.934878,1.263418,1.298278,1.817573,1.467477,0.424117,0.494902,0.845285,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.719807,0.732796,0.0
1,1.389624,1.328807,1.934878,1.263418,1.298278,1.817573,1.467477,1.495076,1.488282,1.410624,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0
2,1.389624,1.328807,1.934878,0.491502,1.298278,0.700803,1.467477,1.495076,1.488282,1.410624,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0
3,1.389624,1.328807,1.934878,1.263418,1.298278,1.817573,1.467477,1.495076,1.488282,1.410624,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0
4,1.389624,1.328807,1.934878,1.263418,1.298278,1.817573,1.467477,1.495076,1.488282,1.410624,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0


In [7]:
def train_and_test(model, x_train, y_train, x_test, y_test):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    if isinstance(y_pred[0], float):
        y_pred = np.clip(np.round(y_pred), -1, 1) # Round y_pred to int, max 1 and min -1
    return accuracy_score(y_test, y_pred)

dataDict = {
    'pixels': {
        'data': (pixels_x_train, pixels_y_train, pixels_x_test, pixels_y_test)},
    'act': {
        'data': (act_x_train, act_y_train, act_x_test, act_y_test)},
    'shap': {
        'data': (shap_x_train, shap_y_train, shap_x_test, shap_y_test)},
    'shap_sum': {
        'data': (shap_sum_x_train, shap_sum_y_train, shap_sum_x_test, shap_sum_y_test)}}

models = {
    'linear_regression': lambda: LinearRegression(),
    'decision_tree': lambda: DecisionTreeClassifier(),
    'random_forest': lambda: RandomForestClassifier(),
    'hist_gradient_boosting': lambda: HistGradientBoostingClassifier()}#,
    #'svm': lambda: SVC(kernel='linear')}

for dataset in ['pixels', 'act', 'shap', 'shap_sum']:  
    for model_name, model_fn in models.items():
        model = model_fn()  # Creating a new model instance, ensuring no transfer learning
        accuracy = train_and_test(model, *dataDict[dataset]['data'])
        print(f"{dataset}: {model_name} - Accuracy: {accuracy:.2%}")

pixels: linear_regression - Accuracy: 19.85%
pixels: decision_tree - Accuracy: 72.85%
pixels: random_forest - Accuracy: 75.02%
pixels: hist_gradient_boosting - Accuracy: 77.65%
act: linear_regression - Accuracy: 20.90%
act: decision_tree - Accuracy: 75.60%
act: random_forest - Accuracy: 67.92%
act: hist_gradient_boosting - Accuracy: 44.98%
shap: linear_regression - Accuracy: 19.85%
shap: decision_tree - Accuracy: 73.22%
shap: random_forest - Accuracy: 73.08%
shap: hist_gradient_boosting - Accuracy: 78.08%
shap_sum: linear_regression - Accuracy: 10.45%
shap_sum: decision_tree - Accuracy: 71.78%
shap_sum: random_forest - Accuracy: 73.05%
shap_sum: hist_gradient_boosting - Accuracy: 81.17%


In [8]:
actLinRegModel = LinearRegression()
actLinRegModel.fit(act_x_train, act_y_train)
act_y_pred = actLinRegModel.predict(act_x_test)
act_y_pred = np.clip(np.round(act_y_pred), -1, 1) # Round y_pred to int, max 1 and min -1
total_correct = (act_y_test == act_y_pred).sum()
total_samples = len(act_y_test)
print(f"Linear regression, Activation dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(act_y_test, act_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(act_y_test, act_y_pred)}")


Linear regression, Activation dataset
Total correct predictions: 836 out of 4000
Accuracy: 20.90%

Confusion matrix:
[[ 118  589 1283]
 [   2    6    5]
 [1029  256  712]]


In [9]:
shapLinRegModel = LinearRegression()
shapLinRegModel.fit(shap_x_train, shap_y_train)
shap_y_pred = shapLinRegModel.predict(shap_x_test)
shap_y_pred = np.clip(np.round(shap_y_pred), -1, 1) # Round y_pred to int, max 1 and min -1
total_correct = (shap_y_test == shap_y_pred).sum()
total_samples = len(shap_y_test)
print(f"Linear regression, Shap dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(shap_y_test, shap_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(shap_y_test, shap_y_pred)}")

# Checka koefficienter
"""
# Get the coefficients
coefficients = shapLinRegModel.coef_
intercept = shapLinRegModel.intercept_

# If your model has a single target (not multiclass), reshape coefficients
if len(coefficients.shape) > 1:
    # For multiclass, you might want to plot coefficients for each class
    # This example plots coefficients for the first class
    coefficients = coefficients[0]

# Create a list of feature names (replace with your actual feature names if available)
feature_names = [f"Feature {i}" for i in range(len(coefficients))]

# Sort coefficients by absolute value to highlight the most significant ones
sorted_indices = np.argsort(np.abs(coefficients))[::-1]
sorted_coefficients = coefficients[sorted_indices]
sorted_features = [feature_names[i] for i in sorted_indices]

# Plot
plt.figure(figsize=(12, 8))
bars = plt.bar(range(len(sorted_coefficients)), sorted_coefficients)

# Color code the bars (positive: blue, negative: red)
for i, bar in enumerate(bars):
    if sorted_coefficients[i] < 0:
        bar.set_color('r')
    else:
        bar.set_color('b')

plt.xlabel('Features')
plt.ylabel('Coefficient Value')
plt.title('Linear Regression Coefficients')
# plt.xticks(range(len(sorted_coefficients)), sorted_features, rotation=90)
plt.tight_layout()

# Add a horizontal line at y=0
plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)

# Add intercept value as text
plt.figtext(0.7, 0.01, f'Intercept: {intercept:.4f}', ha='center')

plt.show()
"""

Linear regression, Shap dataset
Total correct predictions: 794 out of 4000
Accuracy: 19.85%

Confusion matrix:
[[ 148 1760   82]
 [   4   11    5]
 [ 262 1093  635]]


'\n# Get the coefficients\ncoefficients = shapLinRegModel.coef_\nintercept = shapLinRegModel.intercept_\n\n# If your model has a single target (not multiclass), reshape coefficients\nif len(coefficients.shape) > 1:\n    # For multiclass, you might want to plot coefficients for each class\n    # This example plots coefficients for the first class\n    coefficients = coefficients[0]\n\n# Create a list of feature names (replace with your actual feature names if available)\nfeature_names = [f"Feature {i}" for i in range(len(coefficients))]\n\n# Sort coefficients by absolute value to highlight the most significant ones\nsorted_indices = np.argsort(np.abs(coefficients))[::-1]\nsorted_coefficients = coefficients[sorted_indices]\nsorted_features = [feature_names[i] for i in sorted_indices]\n\n# Plot\nplt.figure(figsize=(12, 8))\nbars = plt.bar(range(len(sorted_coefficients)), sorted_coefficients)\n\n# Color code the bars (positive: blue, negative: red)\nfor i, bar in enumerate(bars):\n    if s

In [10]:
actTreeModel = DecisionTreeClassifier(random_state=42)
actTreeModel.fit(act_x_train, act_y_train)
act_y_pred = actTreeModel.predict(act_x_test)
total_correct = (act_y_test == act_y_pred).sum()
total_samples = len(act_y_test)
print(f"Decicion tree, Activation dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(act_y_test, act_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(act_y_test, act_y_pred)}")

Decicion tree, Activation dataset
Total correct predictions: 3170 out of 4000
Accuracy: 79.25%

Confusion matrix:
[[1270    0  720]
 [   2    0   11]
 [  76   21 1900]]


In [11]:
shapTreeModel = DecisionTreeClassifier(random_state=42)
shapTreeModel.fit(shap_x_train, shap_y_train)
shap_y_pred = shapTreeModel.predict(shap_x_test)
total_correct = (shap_y_test == shap_y_pred).sum()
total_samples = len(shap_y_test)
print(f"Decicion tree, Shap dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(shap_y_test, shap_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(shap_y_test, shap_y_pred)}")

Decicion tree, Shap dataset
Total correct predictions: 2914 out of 4000
Accuracy: 72.85%

Confusion matrix:
[[1524   76  390]
 [  13    0    7]
 [ 571   29 1390]]


In [12]:
# Ensamble model
actForestModel = RandomForestClassifier(random_state=42)
actForestModel.fit(act_x_train, act_y_train)
act_y_pred = actForestModel.predict(act_x_test)
total_correct = (act_y_test == act_y_pred).sum()
total_samples = len(act_y_test)
print(f"Random forest, Activation dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(act_y_test, act_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(act_y_test, act_y_pred)}")

Random forest, Activation dataset
Total correct predictions: 782 out of 4000
Accuracy: 19.55%

Confusion matrix:
[[  90    0 1900]
 [   3    0   10]
 [1305    0  692]]


In [13]:

shapForestModel = RandomForestClassifier(random_state=42)
shapForestModel.fit(shap_x_train, shap_y_train)
shap_y_pred = shapForestModel.predict(shap_x_test)
total_correct = (shap_y_test == shap_y_pred).sum()
total_samples = len(shap_y_test)
print(f"Random forest, Shap dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(shap_y_test, shap_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(shap_y_test, shap_y_pred)}")

Random forest, Shap dataset
Total correct predictions: 2995 out of 4000
Accuracy: 74.88%

Confusion matrix:
[[1503    0  487]
 [  10    0   10]
 [ 495    3 1492]]


In [14]:

actGradientBoostModel = HistGradientBoostingClassifier(random_state=42)
actGradientBoostModel.fit(act_x_train, act_y_train)
act_y_pred = actGradientBoostModel.predict(act_x_test)
total_correct = (act_y_test == act_y_pred).sum()
total_samples = len(act_y_test)
print(f"HistGradientBoostingClassifier, Activation dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(act_y_test, act_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(act_y_test, act_y_pred)}")

HistGradientBoostingClassifier, Activation dataset
Total correct predictions: 2487 out of 4000
Accuracy: 62.18%

Confusion matrix:
[[1670    0  320]
 [  10    0    3]
 [1180    0  817]]


In [15]:
shapGradientBoostModel = HistGradientBoostingClassifier(random_state=42)
shapGradientBoostModel.fit(shap_x_train, shap_y_train)
shap_y_pred = shapGradientBoostModel.predict(shap_x_test)
total_correct = (shap_y_test == shap_y_pred).sum()
total_samples = len(shap_y_test)
print(f"HistGradientBoostingClassifier, Shap dataset")
print(f"Total correct predictions: {total_correct} out of {total_samples}")
print(f"Accuracy: {accuracy_score(shap_y_test, shap_y_pred):.2%}")
print(f"\nConfusion matrix:\n{confusion_matrix(shap_y_test, shap_y_pred)}")

HistGradientBoostingClassifier, Shap dataset
Total correct predictions: 3154 out of 4000
Accuracy: 78.85%

Confusion matrix:
[[1624    4  362]
 [   9    0   11]
 [ 435   25 1530]]


Earlier..

Done:
- Train 21 models
- Create all activation and shap datasets
- Precicted unseen network datasets

Answered questions:
- Should the models retain the 5% epsilon during dataset creation? Yes
- Does the SHAP background data sound sound? Compare
- Should the decision tree recieve coordinates too? Format? Yes, no feature engineering

### Meeting 2025-02-24

Next up:
- Implement Gradcam

Questions:
- Should I switch to an environment where the agent can't see the goal? Yes
- What other XAI methods makes sense to implement after gradcam? Wait
- Should I spend time visualising these results to be able to put them into the thesis? Yes

Todo:
- Visualise using shap
- Alter environment




- Create a new baseline for how the models perform if one only plots the pixels