In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from explanation_generation import find_features_to_change, find_top_influential_instances
from autoencoder import Autoencoder, EnhancedLR, AutoEncoderClassifier
import torch
from sklearn.metrics import accuracy_score
from torch.utils.data import  TensorDataset

In [2]:
X_train = pd.read_csv("data/dataset.csv", index_col=0)
y_train = pd.read_csv("data/labels.csv", index_col=0)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.998, random_state=2)
len(X_train)

138

In [4]:
embedder = Autoencoder(X_train.shape[1],embedding_dim=64,reconstruction_loss=torch.nn.BCELoss(), classification_loss=torch.nn.BCELoss())
model = autoencoder = AutoEncoderClassifier(embedder=embedder)

In [5]:
model.fit(X_train.values, y_train.values, 25)
accuracy_score(model.predict(X_test), y_test)

Epoch: 1, Training Loss: 2.18087899684906, Recon Loss: 0.34290212392807007, Classification Loss: 0.34198999404907227
Epoch: 2, Training Loss: 2.121616840362549, Recon Loss: 0.3369373083114624, Classification Loss: 0.3309010863304138
Epoch: 3, Training Loss: 2.0765026807785034, Recon Loss: 0.33102497458457947, Classification Loss: 0.3289940357208252
Epoch: 4, Training Loss: 2.0155235528945923, Recon Loss: 0.32409825921058655, Classification Loss: 0.31214553117752075
Epoch: 5, Training Loss: 1.980546772480011, Recon Loss: 0.3162600100040436, Classification Loss: 0.3250485062599182
Epoch: 6, Training Loss: 1.8776084184646606, Recon Loss: 0.30288025736808777, Classification Loss: 0.27233463525772095
Epoch: 7, Training Loss: 1.8482618927955627, Recon Loss: 0.29452723264694214, Classification Loss: 0.2984786927700043
Epoch: 8, Training Loss: 1.7477986216545105, Recon Loss: 0.27398478984832764, Classification Loss: 0.26288285851478577
Epoch: 9, Training Loss: 1.7342920303344727, Recon Loss: 0

0.781185354492456

In [6]:
model.predict(X_test).sum()

40610.0

In [7]:
len(X_test)

69127

In [8]:
recommendation = pd.DataFrame(model.predict_proba(X_test))[1].max()
recommendation

0.9875273

In [9]:
recommendation = pd.DataFrame(model.predict_proba(X_test))[1].idxmax()
recommendation

58499

In [10]:
recommendation = X_test.iloc[recommendation].copy()
recommendation.sum()

19

In [11]:
features = find_features_to_change(model, recommendation) 
features

(3, 86, 117, 126, 183, 233, 236, 266, 299, 332)

In [12]:
sorted([X_train.columns.values[i] for i in features ])

['anise',
 'cornstarch',
 'flour',
 'ginger',
 'milk',
 'pork',
 'powder',
 'sauce',
 'sugar',
 'water']

In [13]:
temp =recommendation.copy()

for feat in features:
    temp[feat] = 0

model.predict(temp.values)[0]

0.0

In [14]:
temp.sum()

9

In [15]:
def get_ingridients(recommendation):
    recommendation_ = pd.DataFrame(recommendation).T
    features = recommendation_[recommendation_ > 0].dropna(axis=1).columns.values
    return features

list(get_ingridients(recommendation))

['anise',
 'bamboo',
 'clove',
 'cornstarch',
 'flour',
 'ginger',
 'gingerroot',
 'milk',
 'mushroom',
 'oil',
 'onion',
 'paper',
 'pork',
 'powder',
 'sauce',
 'shortening',
 'sugar',
 'water',
 'yeast']

In [16]:
def concatenate_weights_biases(grad):
    concatenated_grad = []
    for i in range(0, len(grad)-1, 2):  # Iterate over weight and bias pairs
        concatenated_tensor = torch.cat([torch.flatten(grad[i]), torch.flatten(grad[i+1])])
        concatenated_grad.append(concatenated_tensor)
    return concatenated_grad

In [17]:
X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1) 
dataset = TensorDataset(X_tensor, y_tensor)

In [18]:
most_influencial = find_top_influential_instances(model.embedder ,dataset, recommendation )

In [19]:
most_influencial

[(132, 0.0651148334145546),
 (122, -0.0605943463742733),
 (46, 0.058781854808330536),
 (133, 0.05548936128616333),
 (19, 0.0512673556804657)]

In [20]:
indices = [_[0] for _ in most_influencial]

In [21]:
top = pd.DataFrame()

for i in range(0,5):
    top[i] =  [pd.DataFrame(_) for _ in X_train.iloc[indices].T.apply(get_ingridients).values][i]

In [22]:
top.fillna(" ")

Unnamed: 0,0,1,2,3,4
0,breadcrumb,butter,breadcrumb,avocado,bacon
1,cheese,juice,cheese,chive,cheese
2,clove,orange,ground,cream,mayonnaise
3,egg,plum,leaf,dill,onion
4,fillet,sugar,liquid,egg,potato
5,milk,water,oil,juice,salt
6,oil,,pepper,lemon,
7,oregano,,salt,lobster,
8,pepper,,seasoning,milk,
9,salt,,,oil,


In [23]:
get_ingridients(recommendation)

array(['anise', 'bamboo', 'clove', 'cornstarch', 'flour', 'ginger',
       'gingerroot', 'milk', 'mushroom', 'oil', 'onion', 'paper', 'pork',
       'powder', 'sauce', 'shortening', 'sugar', 'water', 'yeast'],
      dtype=object)

In [24]:
y_train.iloc[indices]

Unnamed: 0,0
34544,1
8506,0
49938,1
55809,1
23074,1


In [25]:
model.predict(recommendation)

array([1.], dtype=float32)

In [26]:
model.predict(temp)

array([0.], dtype=float32)

In [27]:
recommendation.sum()

19