In [22]:
import os
import torch
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

from model_lp import BaseLabelPropagation, LabelPropagation, LabelSpreading
from sklearn.metrics import f1_score

In [23]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
print(parent_dir)

/home/erf6575/Desktop/KTN-main/LP Model


In [24]:
def create_boolean_tensor(length, train_percent):
    num_true = int(length * train_percent)
    numpy_array = np.concatenate((np.ones(num_true, dtype=np.bool), np.zeros(length - num_true, dtype=np.bool)))
    np.random.seed(42)
    np.random.shuffle(numpy_array)
    return numpy_array

In [26]:
product_features = np.load(parent_dir + '/data/nodes-product.npy').astype(np.float32)
product_features = torch.from_numpy(product_features).to(torch.float)
print(product_features)
print(product_features.shape)

product_labels = np.load(parent_dir + '/data/labels-product.npy').astype(np.int32).flatten()
product_labels = torch.from_numpy(product_labels).to(torch.long)
print(product_labels)
print(product_labels.shape)

# product_df = pd.read_csv(parent_dir + '/data/product_train_test.csv').drop(columns=['product_asin'])
# product_is_train = product_df.to_numpy(dtype=bool).flatten()

product_is_train = create_boolean_tensor(product_labels.shape[0], 0.003)
product_is_test = ~torch.from_numpy(product_is_train)
print(product_is_test)
print(product_is_test.shape)

tensor([[1.2650e+03, 1.2648e-01, 1.0435e-01,  ..., 9.1885e-01, 0.0000e+00,
         1.3000e+01],
        [2.8600e+02, 8.0420e-02, 1.9930e-01,  ..., 4.1765e+00, 0.0000e+00,
         3.5000e+01],
        [1.5190e+03, 2.0145e-01, 9.4799e-02,  ..., 4.7784e-01, 0.0000e+00,
         5.0000e+00],
        ...,
        [2.0000e+00, 5.0000e-01, 5.0000e-01,  ..., 0.0000e+00, 1.3800e+02,
         1.3800e+02],
        [1.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [1.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00]])
torch.Size([3408, 17])
tensor([0, 0, 1,  ..., 1, 0, 1])
torch.Size([3408])
tensor([True, True, True,  ..., True, True, True])
torch.Size([3408])


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  numpy_array = np.concatenate((np.ones(num_true, dtype=np.bool), np.zeros(length - num_true, dtype=np.bool)))


In [27]:
# Create the product graph
half_1 = pd.read_csv(parent_dir + '/data/edges-to-product-from-product.csv')
half_2 = half_1[['Target', 'Source']].rename(columns={'Target': 'Source', 'Source': 'Target'})
half_3 = pd.DataFrame(data={'Source': list(range(0, 3408)), 'Target': list(range(0, 3408))})
adj_df = pd.concat([half_1, half_2, half_3], axis=0, ignore_index=True)
adj_df['Weight'] = 1
#print(adj_df)

vals = np.unique(adj_df[['Source', 'Target']])
my_adj_matrix = pd.DataFrame(0, index=vals, columns=vals)
f = my_adj_matrix.index.get_indexer
my_adj_matrix.values[f(adj_df.Source), f(adj_df.Target)] = adj_df.Weight.values

print(my_adj_matrix)


# Labels
my_labels = product_labels.detach().clone()
print(my_labels)
# All the labels that are boolean test will turn into -1 (empty)
my_labels[product_is_test] = -1
print(my_labels)


# Create input tensors
adj_matrix_t = torch.FloatTensor(my_adj_matrix.values)
labels_t = my_labels

print(adj_matrix_t.shape)
print(labels_t.shape)

      0     1     2     3     4     5     6     7     8     9     ...  3398  \
0        1     1     1     0     1     1     1     0     0     0  ...     0   
1        1     1     1     0     0     0     1     0     0     1  ...     0   
2        1     1     1     0     1     1     1     0     1     1  ...     0   
3        0     0     0     1     0     0     1     0     0     0  ...     0   
4        1     0     1     0     1     1     1     0     0     1  ...     0   
...    ...   ...   ...   ...   ...   ...   ...   ...   ...   ...  ...   ...   
3403     0     0     0     0     0     0     0     0     0     0  ...     0   
3404     0     0     0     0     0     0     0     0     0     0  ...     0   
3405     0     0     0     0     0     0     0     0     0     0  ...     0   
3406     0     0     0     0     0     0     0     0     0     0  ...     0   
3407     0     0     0     0     0     0     0     0     0     0  ...     0   

      3399  3400  3401  3402  3403  3404  3405  340

In [28]:
# Learn with Label Propagation
label_propagation = LabelPropagation(adj_matrix_t)
print("Label Propagation: ", end="")
label_propagation.fit(labels_t)
label_propagation_output_labels = label_propagation.predict_classes()

f1_score(product_labels, label_propagation_output_labels, average='macro')

Label Propagation: 

0.3462757056018203

In [29]:
# Learn with Label Spreading
label_spreading = LabelSpreading(adj_matrix_t)
print("Label Spreading: ", end="")
label_spreading.fit(labels_t, alpha=0.8)
label_spreading_output_labels = label_spreading.predict_classes()

f1_score(product_labels, label_propagation_output_labels, average='macro')

Label Spreading: The method stopped after 13 iterations, variation=0.0005.


0.3462757056018203