In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [4]:
import numpy as np
from sklearn.model_selection import train_test_split

In [53]:
api_emb = np.load('api_emb.npy')
app_emb = np.load('app_tr_emb.npy') # (# of apps, 64)

bm_tr = np.load('bm_tr.npy') # (# of tr apps, 1000 apis)
counts_tr = np.load('counts_tr.npy')
counts_tst = np.load('counts_tst.npy')

In [52]:
ls

api_emb.npy     counts_tr.npy            metaPrediction.ipynb
app_emb.npy     counts_tst.npy           node2vec.ipynb
app_tr_emb.npy  feature_selection.ipynb  node2vec_StellarGraph.ipynb
bm_tr.npy       infer.ipynb              test.w2v
bm_tst.npy      metapath2vec.ipynb       word2vec.ipynb


In [6]:
bm_tr.shape, counts_tr.shape, api_emb.shape

((1335, 1000), (1335, 1000), (1000, 64))

In [7]:
X_train, X_val, y_train, y_val = train_test_split(counts_tr, app_emb)

In [49]:
multihot_tr = np.where(counts_tr == 0, counts_tr, 1)

In [67]:
X_train, X_val, y_train, y_val = train_test_split(multihot_tr, app_emb)

In [10]:
X_train, X_val, y_train, y_val = train_test_split(bm_tr, app_emb)

In [62]:
device = torch.device('cuda:0')

In [68]:
X_train = torch.tensor(X_train, dtype=torch.float).to(device)
X_val = torch.tensor(X_val, dtype=torch.float).to(device)
y_train = torch.tensor(y_train, dtype=torch.float).to(device)
y_val = torch.tensor(y_val, dtype=torch.float).to(device)

In [69]:
api_emb.shape

(1000, 64)

In [97]:
class CustomNet(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(CustomNet, self).__init__()
        self.w = torch.nn.Linear(64, 64)
        self.api_emb = torch.tensor(api_emb, dtype=torch.float, device=device)
        
    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        # app (1, 1000) 0 0 -10 15, 0 0 ...
        masked_apis = x.matmul(self.api_emb).float() # (1, 1000) * (1000, 64)
        y_pred = self.w(masked_apis) # (1, 64) (64, 64) - > (1, 64)
        
        return y_pred

In [71]:
nn.Embedding

torch.nn.modules.sparse.Embedding

In [135]:
class CustomNet(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(CustomNet, self).__init__()
        self.w1 = torch.nn.Linear(64, 128)
        self.w2 = torch.nn.Linear(128, 64)
        self.api_emb = torch.tensor(api_emb, dtype=torch.float, device=device, requires_grad=False)
        
    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        # app (1, 1000) 0 0 -10 15, 0 0 ...
        masked_apis = x.matmul(self.api_emb).float() # (1, 1000) * (1000, 64)
        l1 = F.leaky_relu(self.w1(masked_apis))
        y_pred = self.w2(l1)
        
        return y_pred

In [142]:
model = CustomNet().to(device)
criterion = torch.nn.MSELoss(reduction='mean')

In [143]:
learning_rate = 1e-4
optimizer = torch.optim.Adamax(model.parameters(), lr=learning_rate)
for t in range(20000):
    y_pred = model(X_train)

    # Compute and print loss
    loss = criterion(y_pred, y_train)
#     if t % 100 == 99:
#         print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if t % 1000 == 999 or t == 0:
        with torch.no_grad():
            valid_loss = criterion(model(X_val), y_val)
            print(t, loss.item(), valid_loss.item())
            
    # add svm here and print score

999 0.42082446813583374 0.44589319825172424
1999 0.22428525984287262 0.24331176280975342
2999 0.13483192026615143 0.15035301446914673
3999 0.08810924738645554 0.09803062677383423
4999 0.06548546254634857 0.07347427308559418
5999 0.05406970903277397 0.06190504506230354
6999 0.048183586448431015 0.056709930300712585
7999 0.04533015564084053 0.0545327253639698
8999 0.0433502234518528 0.053624775260686874
9999 0.0417759045958519 0.05295892804861069
10999 0.0407695434987545 0.05267821252346039
11999 0.0400400310754776 0.05247397720813751
12999 0.0394720733165741 0.05218688026070595
13999 0.039038460701704025 0.05210603028535843
14999 0.03870140016078949 0.05212204158306122


KeyboardInterrupt: 

In [None]:
0.05 0.07

In [144]:
multihot_tst = np.where(counts_tst == 0, counts_tst, 1)
multihot_tst = torch.tensor(multihot_tst, dtype=torch.float).to(device)
app_tst_emb = model.forward(multihot_tst).cpu().detach().numpy()

In [37]:
bm_tst = np.load('bm_tst.npy')
bm_tst = torch.tensor(bm_tst, dtype=torch.float).to(device)
app_tst_emb = model.forward(bm_tst).cpu().detach().numpy()

In [None]:
counts_tst = np.load('counts_tst.npy')
counts_tst = torch.tensor(counts_tst, dtype=torch.float).to(device)
app_tst_emb = model.forward(counts_tst).cpu().detach().numpy()

In [76]:
import pandas as pd

In [145]:
meta_tr = pd.read_csv('../data/processed/meta_tr.csv', index_col=0)
meta_tst = pd.read_csv('../data/processed/meta_tst.csv', index_col=0)
y_tr = meta_tr.label == 'class1'
y_tst = meta_tst.label == 'class1'

In [146]:
from sklearn.svm import SVC
svm = SVC(kernel='rbf')

In [147]:
svm.fit(app_emb, y_tr)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [148]:
svm.score(app_emb, y_tr)

0.9730337078651685

In [149]:
svm.score(app_tst_emb, y_tst)

0.9588014981273408