# Install the necessary packages

In [None]:
pip install scikit-learn==1.2.2



In [2]:
pip install interpret

Collecting interpret
  Downloading interpret-0.5.1-py3-none-any.whl (1.4 kB)
Collecting interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.1 (from interpret)
  Downloading interpret_core-0.5.1-py3-none-any.whl (13.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.0/13.0 MB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
Collecting dash>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.1->interpret)
  Downloading dash-2.15.0-py3-none-any.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dash-core-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.1->interpret)
  Downloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Collecting dash-html-components>=1.0.0 (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.5.1->interpret)
  Downloading 

# Import necessary packages

In [37]:
import pandas as pd
import openpyxl
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error
from sklearn.naive_bayes import MultinomialNB
from sklearn.multioutput import MultiOutputClassifier
from sklearn.svm import SVC
from interpret import show
from interpret.blackbox import ShapKernel

#Utility functions

In [38]:
def get_bold_elements(sheet, column_index):
    bold_elements = []
    for row in sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=column_index, max_col=column_index):
        for cell in row:
            if cell.font.bold:
                cell.value and bold_elements.append(cell.value)
    return bold_elements

def extractProcessNumbers(file_path):
    wb = openpyxl.load_workbook(file_path)

    # Assuming you are working with the first sheet, change it accordingly
    sheet = wb.active

    column_lists = []
    for column in sheet.iter_cols(min_row=1, max_row=1, min_col=1, max_col=sheet.max_column):
        column_index = column[0].column
        bold_elements = get_bold_elements(sheet, column_index)
        column_lists.append(bold_elements)

    wb.close()
    return column_lists

def get_output_classifier(classifier, result_df, testSize):
    X = result_df.iloc[:, :-49]  # Assuming the last 7 columns are the target variables
    y = result_df.iloc[:, -49:]
    random_state=40

    while True:
        try:
            # Split the data into training and testing sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testSize, random_state=random_state)

            # Create a MultiOutputClassifier to handle multiple target variables
            multi_output_classifier = MultiOutputClassifier(classifier)

            # Train the model
            #multi_output_classifier.fit(X, y)
            multi_output_classifier.fit(X_train, y_train)

            break;
        except Exception as e:
            random_state+=1

    predictions = multi_output_classifier.predict_proba(test_data.values.reshape(1, -1))  # Get probability of being picked

    # Check and add 0.0 if needed
    for i in range(len(predictions)):
        if predictions[i].shape[1] != 2:
            predictions[i] = np.concatenate([predictions[i], np.array([[0.0]])], axis=1)

    class_labels = ['0', '1']

    # Enumerate predictions to keep track of the original indices
    enumerated_predictions = list(enumerate(predictions))

    # Sort the predictions based on the probability of the second class
    sorted_predictions = sorted(enumerated_predictions, key=lambda x: x[1][0][1], reverse=True)

    # Collect the indices of the top 9 predictions where the second class has the highest probability
    top_indices = []
    for i in range(9):
        original_index, pred_array = sorted_predictions[i]
        top_indices.append(original_index)


    # Set the values at the specified indices to 1
    final_prediction = [0]*49

    for index in top_indices:
        final_prediction[index] = 1

    # Print the resulting array
    final_prediction

    result_prediction = [test_data.values.tolist()[i] for i in top_indices]

    return [top_indices, result_prediction, random_state]

def build_SHAP_graphs(classifier, result_df, testSize, y_actual, random_state):
  shap_graphs=[]
  predictions = []

  # Extracting features (X) and target variable (y)
  X = result_df.iloc[:, :-49]  # Assuming the last 7 columns are the target variables
  y = result_df.iloc[:, -49:]

  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testSize, random_state=random_state)


  for i in range(49):

      classifier.fit(X_train, y_train[f'picked_{i+1}'])

      shap = ShapKernel(classifier, X_train)
      shap_local = shap.explain_local(test_data.values.reshape(1, -1), y_actual[i])
      shap_graphs.append(shap_local)


      # Make predictions on the test set
      prediction = classifier.predict_proba(test_data.values.reshape(1, -1))
      print(classifier.classes_)
      predictions.append(prediction)

  # Check and add 0.0 if needed
  for i in range(len(predictions)):
      if predictions[i].shape[1] != 2:
          predictions[i] = np.concatenate([predictions[i], np.array([[0.0]])], axis=1)

  class_labels = ['0', '1']

  # Enumerate predictions to keep track of the original indices
  enumerated_predictions = list(enumerate(predictions))

  # Sort the predictions based on the probability of the second class
  sorted_predictions = sorted(enumerated_predictions, key=lambda x: x[1][0][1], reverse=True)

  # Collect the indices of the top 9 predictions where the second class has the highest probability
  top_indices = []
  for i in range(9):
      original_index, pred_array = sorted_predictions[i]
      top_indices.append(original_index)


  # Set the values at the specified indices to 1
  final_prediction = [0]*49

  for index in top_indices:
      final_prediction[index] = 1

  # Print the resulting array
  final_prediction

  result_prediction = [test_data.values.tolist()[i] for i in top_indices]

  return [top_indices, final_prediction, result_prediction, shap_graphs, predictions]



# Intialize testSize

In [39]:
testSize=0.2

In [31]:
pip install scikit-learn==1.2.2



# Importing Data
## Make sure you have uploaded the data in files section of Google colab before loading it

In [40]:
import pandas as pd
import openpyxl

# Replace 'your_excel_file.xlsx' with the path to your Excel file

# Upload your data in file section of Google colab
file_name = 'TEST SHEET 394-1 (91 Data sests 303 to 393 and target 394  dated  03 02 2024'
excel_file_path = '/content/'+file_name+'.xlsx';


# Read Excel file
df = pd.read_excel(excel_file_path)

#change the indices according to your dataset
df = df.loc[1:50, :394]
df = df.iloc[:, 1:]
df

Unnamed: 0,303,304,305,306,307,308,309,310,311,312,...,385,386,387,388,389,390,391,392,393,394
1,82,117,146,151,151,6,6,13,20,27,...,119,119,69,69,69,69,69,69,69,69
2,63,84,112,119,21,28,35,35,7,14,...,198,123,235,1,1,1,1,1,1,1
3,30,12,18,24,30,30,40,76,111,146,...,78,75,78,66,67,62,63,64,86,107
4,76,79,129,39,39,43,54,72,90,104,...,70,77,14,21,28,35,35,2,2,14
5,62,85,126,168,210,210,1,1,1,1,...,88,90,92,185,85,28,21,24,45,55
6,1,1,1,1,1,1,2,7,14,41,...,205,216,231,229,159,159,159,160,159,112
7,96,120,125,14,13,35,58,77,76,95,...,63,84,105,105,2,4,2,4,7,17
8,98,98,98,98,98,98,98,98,98,98,...,77,136,1,2,3,5,3,15,30,45
9,148,148,8,8,8,8,53,2,2,2,...,9,12,23,20,4,8,28,16,21,28
10,2,9,19,27,36,64,69,83,93,21,...,155,103,160,160,7,6,4,5,3,2


# Data Formatting

In [41]:
new_df = df.T
new_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,40,41,42,43,44,45,46,47,48,49
303,82,63,30,76,62,1,96,98,148,2,...,117,140,92,127,31,49,139,68,8,91
304,117,84,12,79,85,1,120,98,148,9,...,141,112,50,6,37,70,66,19,7,99
305,146,112,18,129,126,1,125,98,8,19,...,115,113,24,5,37,84,70,30,7,71
306,151,119,24,39,168,1,14,98,8,27,...,139,3,4,5,16,85,76,42,7,35
307,151,21,30,39,210,1,13,98,8,36,...,141,18,4,5,42,91,78,57,40,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,69,1,62,35,28,159,4,5,8,6,...,71,20,41,36,85,3,105,80,82,37
391,69,1,63,35,21,159,2,3,28,4,...,89,20,26,24,96,12,105,23,85,37
392,69,1,64,2,24,160,4,15,16,5,...,91,14,23,36,82,45,105,3,84,37
393,69,1,86,2,45,159,7,30,21,3,...,5,11,23,42,83,46,105,4,82,65


In [42]:
test_data = new_df.iloc[-1,:]
test_data

1      69
2       1
3     107
4      14
5      55
6     112
7      17
8      45
9      28
10      2
11     18
12     15
13     91
14     94
15      3
16    116
17     57
18    105
19      5
20    101
21     56
22     66
23     39
24      4
25    136
26      6
27    140
28      7
29     50
30     48
31     80
32     42
33    145
34     16
35    125
36     40
37    103
38     51
39     63
40     36
41      8
42     23
43     49
44     52
45     70
46    100
47     24
48     71
49     93
Name: 394, dtype: object

# Build y_actual

In [43]:
y_actual=[]
actual_out = [28, 57, 66 ,136, 7 ,103, 49]
for num in test_data:
    if num in actual_out:
        y_actual.append(1)
    else:
        y_actual.append(0)

In [44]:
new_df = new_df.drop(new_df.index[-1])
new_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,40,41,42,43,44,45,46,47,48,49
303,82,63,30,76,62,1,96,98,148,2,...,117,140,92,127,31,49,139,68,8,91
304,117,84,12,79,85,1,120,98,148,9,...,141,112,50,6,37,70,66,19,7,99
305,146,112,18,129,126,1,125,98,8,19,...,115,113,24,5,37,84,70,30,7,71
306,151,119,24,39,168,1,14,98,8,27,...,139,3,4,5,16,85,76,42,7,35
307,151,21,30,39,210,1,13,98,8,36,...,141,18,4,5,42,91,78,57,40,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,69,1,67,28,85,159,2,3,4,7,...,134,16,39,32,81,66,105,80,74,37
390,69,1,62,35,28,159,4,5,8,6,...,71,20,41,36,85,3,105,80,82,37
391,69,1,63,35,21,159,2,3,28,4,...,89,20,26,24,96,12,105,23,85,37
392,69,1,64,2,24,160,4,15,16,5,...,91,14,23,36,82,45,105,3,84,37


In [45]:
process_numbers = extractProcessNumbers(excel_file_path)
process_numbers = process_numbers[1:new_df.shape[0]+1]
print(process_numbers)

[[2, 5, 126, 64, 40, 92, 127], [79, 123, 2, 4, 5, 62, 25], [129, 21, 3, 57, 77, 113, 24], [98, 41, 10, 171, 3, 16, 7], [36, 92, 72, 35, 55, 141, 40], [8, 83, 70, 82, 24, 18, 57], [2, 13, 88, 72, 10, 78, 8], [84, 15, 56, 11, 20, 147, 63], [93, 89, 35, 88, 8, 9, 64], [14, 21, 42, 18, 70, 43, 126], [62, 1, 96, 3, 140, 180, 4], [98, 42, 12, 135, 75, 6, 62], [1, 115, 21, 99, 140, 35, 32], [108, 120, 33, 112, 121, 64, 36], [41, 45, 21, 47, 28, 89, 26], [25, 72, 5, 15, 39, 62, 33], [98, 1, 16, 137, 17, 19, 216], [110, 3, 15, 16, 98, 29, 175], [92, 48, 71, 22, 113, 62, 148], [195, 15, 5, 8, 74, 10, 12], [44, 123, 28, 105, 78, 147, 50], [35, 40, 20, 90, 75, 46, 109], [62, 66, 16, 86, 10, 43, 113], [116, 6, 122, 14, 24, 10, 11], [21, 96, 37, 27, 12, 8, 9], [47, 40, 71, 123, 18, 11, 39], [56, 20, 125, 5, 9, 37, 34], [1, 2, 84, 28, 15, 132, 11], [229, 1, 67, 11, 158, 4, 15], [123, 11, 70, 80, 196, 96, 43], [114, 68, 100, 7, 182, 124, 71], [1, 64, 42, 106, 21, 12, 36], [6, 113, 1, 173, 5, 9, 133], 

In [46]:
new_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,40,41,42,43,44,45,46,47,48,49
303,82,63,30,76,62,1,96,98,148,2,...,117,140,92,127,31,49,139,68,8,91
304,117,84,12,79,85,1,120,98,148,9,...,141,112,50,6,37,70,66,19,7,99
305,146,112,18,129,126,1,125,98,8,19,...,115,113,24,5,37,84,70,30,7,71
306,151,119,24,39,168,1,14,98,8,27,...,139,3,4,5,16,85,76,42,7,35
307,151,21,30,39,210,1,13,98,8,36,...,141,18,4,5,42,91,78,57,40,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,69,1,67,28,85,159,2,3,4,7,...,134,16,39,32,81,66,105,80,74,37
390,69,1,62,35,28,159,4,5,8,6,...,71,20,41,36,85,3,105,80,82,37
391,69,1,63,35,21,159,2,3,28,4,...,89,20,26,24,96,12,105,23,85,37
392,69,1,64,2,24,160,4,15,16,5,...,91,14,23,36,82,45,105,3,84,37


In [47]:
i=0
process_binary = []
for row_list in new_df.values.tolist():
    pb=[]
    for num in row_list:
        if(num in process_numbers[i]):
            pb.append(1)
        else:
            pb.append(0)
    process_binary.append(pb)
    i+=1
process_binary

[[0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  1,
  0,
  0,
  0,
  1,
  0],
 [0,
  0,
  0,
 

In [48]:
# Convert the list to a DataFrame
process_bin_df = pd.DataFrame(process_binary, index=new_df.index, columns = [f'picked_{i+1}' for i in range(49)])

# Display the DataFrame
process_bin_df

Unnamed: 0,picked_1,picked_2,picked_3,picked_4,picked_5,picked_6,picked_7,picked_8,picked_9,picked_10,...,picked_40,picked_41,picked_42,picked_43,picked_44,picked_45,picked_46,picked_47,picked_48,picked_49
303,0,0,0,0,0,0,0,0,0,1,...,0,0,1,1,0,0,0,0,0,0
304,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
305,0,0,0,1,0,0,0,0,0,0,...,0,1,1,0,0,0,0,0,0,0
306,0,0,0,0,0,0,0,1,0,0,...,0,1,0,0,1,0,0,0,1,0
307,0,0,0,0,0,0,0,0,0,1,...,1,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
390,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
391,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,1,0,0
392,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [49]:
# Concatenate the original dataframe with the new binary columns
result_df = pd.concat([new_df, process_bin_df], axis=1)

# Display the result
result_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,picked_40,picked_41,picked_42,picked_43,picked_44,picked_45,picked_46,picked_47,picked_48,picked_49
303,82,63,30,76,62,1,96,98,148,2,...,0,0,1,1,0,0,0,0,0,0
304,117,84,12,79,85,1,120,98,148,9,...,0,0,0,0,0,0,0,0,0,0
305,146,112,18,129,126,1,125,98,8,19,...,0,1,1,0,0,0,0,0,0,0
306,151,119,24,39,168,1,14,98,8,27,...,0,1,0,0,1,0,0,0,1,0
307,151,21,30,39,210,1,13,98,8,36,...,1,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,69,1,67,28,85,159,2,3,4,7,...,0,0,0,0,0,1,0,0,0,0
390,69,1,62,35,28,159,4,5,8,6,...,0,0,0,0,0,0,0,0,1,0
391,69,1,63,35,21,159,2,3,28,4,...,0,0,0,1,0,1,0,1,0,0
392,69,1,64,2,24,160,4,15,16,5,...,0,0,0,0,0,0,0,0,0,0


# View training data set

In [50]:
from sklearn.model_selection import train_test_split

# Load your data
# Assuming your data is in a DataFrame called 'df'
# Extracting features (X) and target variable (y)
X = result_df.iloc[:, :-49]  # Assuming the last 7 columns are the target variables
y = result_df.iloc[:, -49:]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testSize, random_state=40)

In [51]:
X_train

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,40,41,42,43,44,45,46,47,48,49
305,146,112,18,129,126,1,125,98,8,19,...,115,113,24,5,37,84,70,30,7,71
383,99,102,90,77,88,173,21,63,158,25,...,35,40,6,24,79,42,106,64,50,37
323,38,84,6,77,120,35,43,122,80,1,...,24,11,147,12,22,19,13,50,39,91
357,29,1,3,36,110,137,24,43,33,78,...,13,77,42,138,30,45,72,41,98,51
350,1,2,22,75,5,113,57,116,82,141,...,21,69,79,71,160,42,31,22,19,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,1,2,106,71,6,190,158,83,44,15,...,13,48,43,85,34,49,50,47,33,5
359,85,1,28,29,174,169,96,43,83,71,...,14,82,33,31,18,45,57,148,151,108
340,7,31,9,70,75,5,43,36,1,10,...,77,206,69,6,52,51,87,30,29,71
310,13,35,76,72,1,7,77,98,2,83,...,44,20,147,28,16,29,74,73,12,63


In [52]:
testSize

0.2

In [53]:
from ipywidgets import interact, widgets

selected_classifier = None

def handle_dropdown_change(selected_option):
    global selected_classifier
    print(f"{selected_option} chosen.")
    selected_classifier = selected_option

# Create a dropdown widget
options = ['Random Forest Classifier', 'SVM linear Classifier', 'SVM rbf Classifier',
           'SVM poly Classifier', 'Naive Bayes']
dropdown = widgets.Dropdown(options=options, description='Select a classifier:')

# Attach the callback function to the dropdown's change event
value = interact(handle_dropdown_change, selected_option=dropdown)


interactive(children=(Dropdown(description='Select a classifier:', options=('Random Forest Classifier', 'SVM l…

In [54]:
selected_classifier

'SVM linear Classifier'

In [55]:
if selected_classifier == 'Random Forest Classifier':
  # Create a RandomForestClassifier
  classifier = RandomForestClassifier(random_state=40)

elif selected_classifier == 'SVM linear Classifier':
  # Create a SVM linear
  classifier = SVC(kernel='linear', probability=True, random_state=40)

elif selected_classifier == 'SVM rbf Classifier':
  # Create a SVM rbf
  classifier = SVC(kernel='rbf', probability=True, random_state=40)

elif selected_classifier == 'SVM poly Classifier':
  # Create a SVM poly
  classifier = SVC(kernel='poly', probability=True, random_state=40)

elif selected_classifier == 'Naive Bayes':
  #Naive Bayes
  classifier = MultinomialNB()

else:
  print("Invalid model selected.")



# Recreate the model

In [56]:
top_indices, result_prediction, random_state = get_output_classifier(classifier, result_df, testSize)

print("Indices of the top 9 predictions where label '1' probability is higher:", top_indices)

print(selected_classifier, ": ",  result_prediction, "\n")

Indices of the top 9 predictions where label '1' probability is higher: [16, 26, 9, 24, 21, 37, 27, 29, 2]
SVM linear Classifier :  [57, 140, 2, 136, 66, 51, 7, 48, 107] 



# Bulid SHAP graphs

In [57]:
top_indices, final_prediction, result_prediction, shap_graphs, predictions = build_SHAP_graphs(classifier, result_df, testSize, y_actual, random_state)

print("Indices of the top 9 predictions where label '1' probability is higher:", top_indices)

print(selected_classifier, ": ",  result_prediction, "\n")

  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]


  0%|          | 0/1 [00:00<?, ?it/s]

[0 1]
Indices of the top 9 predictions where label '1' probability is higher: [16, 26, 9, 24, 21, 37, 27, 29, 2]
SVM linear Classifier :  [57, 140, 2, 136, 66, 51, 7, 48, 107] 



# SHAP inference

In [58]:
print(predictions)

[array([[0.88875946, 0.11124054]]), array([[0.93730676, 0.06269324]]), array([[0.78038176, 0.21961824]]), array([[0.87050589, 0.12949411]]), array([[0.91887752, 0.08112248]]), array([[0.86424662, 0.13575338]]), array([[0.96286488, 0.03713512]]), array([[0.92861511, 0.07138489]]), array([[0.8704611, 0.1295389]]), array([[0.58160498, 0.41839502]]), array([[0.82478914, 0.17521086]]), array([[0.88236841, 0.11763159]]), array([[0.88218875, 0.11781125]]), array([[0.82556065, 0.17443935]]), array([[0.92769728, 0.07230272]]), array([[0.89639423, 0.10360577]]), array([[0.48930433, 0.51069567]]), array([[0.88251622, 0.11748378]]), array([[0.87447561, 0.12552439]]), array([[0.8830161, 0.1169839]]), array([[0.83267732, 0.16732268]]), array([[0.73616857, 0.26383143]]), array([[0.90417958, 0.09582042]]), array([[0.8722707, 0.1277293]]), array([[0.64493407, 0.35506593]]), array([[0.84537961, 0.15462039]]), array([[0.5, 0.5]]), array([[0.77360529, 0.22639471]]), array([[0.96731796, 0.03268204]]), arra

In [59]:
print("Prediced:", final_prediction)
print("Actual:", y_actual)

Prediced: [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Actual: [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]


In [60]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_actual, final_prediction)
result_prediction = [test_data.values.tolist()[i] for i in top_indices]
print("Prediced:", result_prediction)
print("Actual:", actual_out)
print("Common Elements: ", set(result_prediction) & set(actual_out))
print("\nFinal Model Accuracy:", accuracy)

Prediced: [57, 140, 2, 136, 66, 51, 7, 48, 107]
Actual: [28, 57, 66, 136, 7, 103, 49]
Common Elements:  {136, 57, 66, 7}

Final Model Accuracy: 0.8367346938775511


In [61]:
print("Result Indices: ", top_indices)

Result Indices:  [16, 26, 9, 24, 21, 37, 27, 29, 2]


In [62]:
for index, element in enumerate(test_data.values.tolist()):
    print(f"Index: {index}, Element: {element}")

Index: 0, Element: 69
Index: 1, Element: 1
Index: 2, Element: 107
Index: 3, Element: 14
Index: 4, Element: 55
Index: 5, Element: 112
Index: 6, Element: 17
Index: 7, Element: 45
Index: 8, Element: 28
Index: 9, Element: 2
Index: 10, Element: 18
Index: 11, Element: 15
Index: 12, Element: 91
Index: 13, Element: 94
Index: 14, Element: 3
Index: 15, Element: 116
Index: 16, Element: 57
Index: 17, Element: 105
Index: 18, Element: 5
Index: 19, Element: 101
Index: 20, Element: 56
Index: 21, Element: 66
Index: 22, Element: 39
Index: 23, Element: 4
Index: 24, Element: 136
Index: 25, Element: 6
Index: 26, Element: 140
Index: 27, Element: 7
Index: 28, Element: 50
Index: 29, Element: 48
Index: 30, Element: 80
Index: 31, Element: 42
Index: 32, Element: 145
Index: 33, Element: 16
Index: 34, Element: 125
Index: 35, Element: 40
Index: 36, Element: 103
Index: 37, Element: 51
Index: 38, Element: 63
Index: 39, Element: 36
Index: 40, Element: 8
Index: 41, Element: 23
Index: 42, Element: 49
Index: 43, Element:

In [63]:
i=0
for shap_local in shap_graphs:
    print(f"Index {i}:")
    show(shap_local, 0)
    i+=1

Index 0:


Index 1:


Index 2:


Index 3:


Index 4:


Index 5:


Index 6:


Index 7:


Index 8:


Index 9:


Index 10:


Index 11:


Index 12:


Index 13:


Index 14:


Index 15:


Index 16:


Index 17:


Index 18:


Index 19:


Index 20:


Index 21:


Index 22:


Index 23:


Index 24:


Index 25:


Index 26:


Index 27:


Index 28:


Index 29:


Index 30:


Index 31:


Index 32:


Index 33:


Index 34:


Index 35:


Index 36:


Index 37:


Index 38:


Index 39:


Index 40:


Index 41:


Index 42:


Index 43:


Index 44:


Index 45:


Index 46:


Index 47:


Index 48:
