# Signed Graph Convolutional Network(SGCN)を用いたFraud User Detection

```sh
pip install -r /home/ubuntu/SGCN/requirements.txt
pip install torch_scatter
pip install torch_sparse
pip install easydict
```


In [None]:
from sgcn import SignedGCNTrainer
from parser import parameter_parser
from utils import tab_printer, read_graph, score_printer, save_logs
import argparse
import pandas as pd
import numpy as np
import torch
import json
from sklearn.metrics import roc_auc_score
%matplotlib inline

In [None]:
import easydict
args = easydict.EasyDict({
        "edge_path": '../input/amazon/amazon_network.csv',#'../input/amazon/user_network.csv',
        "features_path":  '../input/amazon/amazon_node_feature.csv',#'../input/amazon/user_network.csv',
        "nodes_path": '../input/amazon/amazon_gt.csv',
        "embedding_path": '../tmp/embedding/amazon_sgcn_feature05.csv', # tmp folder for cross-validation
        "regression_weights_path": '../tmp/weights/amazon_sgcn_feature05.csv',
        "log_path": '../logs/amazon_logs_feature05.json',
        "epochs":500,
        "test_size":0.33,
        "reduction_iterations": 128,
        "reduction_dimensions": 30,
        "seed": 42,
        "lamb": 1.0,
        "learning_rate": 0.005,  
        "weight_decay": 10e-5, 
        "layers": [64, 32,16,8],
        "spectral_features":False,
        "general_features": True,  
})

In [None]:
pd.io.json.json_normalize(args).T

## 10-fold cross-validation
- train : validation : test = 6:3:1

In [None]:
#tab_printer(args)
edges, nodes_dict = read_graph(args) # nodes_dict['indice']:node_id , nodes_dict['label'] : label

In [None]:
from sklearn.model_selection import StratifiedKFold

In [None]:
kf = StratifiedKFold(n_splits=10)
all_indice = nodes_dict['indice']
all_labels = nodes_dict['label']
auc_scores = []

In [None]:
for i, (train_index, test_index) in enumerate(kf.split(X=nodes_dict['indice'],y=nodes_dict['label'])):
    print(f'{i}-th fold')
    # training
    train_node_indice = all_indice[train_index]
    train_node_labels = all_labels[train_index]
    tmp_nodes_dict = {}
    tmp_nodes_dict['all_ncount'] = nodes_dict['all_ncount']
    tmp_nodes_dict['indice'] = train_node_indice
    tmp_nodes_dict['label'] = train_node_labels
    trainer = SignedGCNTrainer(args, edges, tmp_nodes_dict)
    trainer.setup_dataset()
    trainer.create_and_train_model()
    if args.test_size > 0:
        trainer.save_model()
        score_printer(trainer.logs)
        save_logs(args, trainer.logs)
    # test
    test_node_indice = all_indice[test_index]
    test_node_labels = all_labels[test_index]
    feature = pd.read_csv(args.embedding_path,index_col='id').values
    test_feature = feature[test_node_indice]
    weight = pd.read_csv(args.regression_weights_path)
    predictions = np.dot(test_feature,weight.values.T)
    probabilities = torch.nn.functional.softmax(torch.from_numpy(predictions)).numpy()
    auc_score = roc_auc_score(y_true=[0 if i==-1 else 1for i in test_node_labels],y_score=probabilities[:,1])
    auc_scores.append(auc_score)

In [None]:
np.mean(auc_scores)

結果
- amazon : 
- alpha : 
- epinions : 

## single-validation

In [None]:
trainer = SignedGCNTrainer(args, edges, nodes_dict)
trainer.setup_dataset()
trainer.create_and_train_model()

In [None]:
if args.test_size > 0:
    trainer.save_model()
    score_printer(trainer.logs)
    save_logs(args, trainer.logs)

In [None]:
import json

In [None]:
performance = pd.DataFrame(json.load(open('../logs/alpha_logs_feature05.json','r'))['performance'])

performance.columns = performance.iloc[0,:]

performance = performance.iloc[1:,:]

In [None]:
performance['AUC'].plot()

In [None]:
feature = pd.read_csv(args.embedding_path,index_col='id')

weight = pd.read_csv(args.regression_weights_path)

In [None]:
predictions = np.dot(feature.values,weight.values.T)

In [None]:
probabilities = torch.nn.functional.softmax(torch.from_numpy(predictions)).numpy()

In [None]:
predict_labels = probabilities.argmax(1)

In [None]:
roc_auc_score(y_true=[0 if i==-1 else 1 for i in nodes_dict['label']],y_score=probabilities[:,1][nodes_dict['indice']])

In [None]:
from sklearn.metrics import accuracy_score,confusion_matrix
confusion_matrix([0 if i==-1 else 1 for i in nodes_dict['label']],predict_labels[nodes_dict['indice']])