In [11]:
import numpy as np
import tensorflow as tf
import networkx as nx
import time
import pandas as pd
from tqdm import tqdm
import tensorflow.contrib.eager as tfe
from tensorflow import keras
import math
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
tf.enable_eager_execution()

In [None]:
with open('train.txt', 'r') as content:
    lines = content.readlines()
    
source_list = []
sink_list = []
user_set = set()
for line in lines:
    nodes = line.strip().split('\t')
    source_list.append(nodes[0])
    for user in nodes:
        user_set.add(user)

In [None]:
edgeList = []
for line in lines:
    nodes = line.strip()
    edgeList.append(nodes)

FG = nx.parse_adjlist(edgeList, nodetype=str, delimiter='\t')
edges = set(FG.edges)

In [None]:
# shuffle edges
import random

valid_edges = random.sample(edges,10000)
fake_source = random.sample(user_set, 1000)
fake_edges = []

In [None]:
for source in fake_source:
    sink = random.sample(user_set, 1)
    if (source, sink[0]) not in edges:
        fake_edges.append((source, sink[0]))

In [None]:
totalEdges = []
for u, v in valid_edges:
    totalEdges.append((u,v,1))
for u, v in fake_edges:
    totalEdges.append((u,v,0))
    
random.shuffle(totalEdges)

In [None]:
with open('test-public.txt') as fc:
    pred = fc.readlines()
    
predEdge = []
for line in pred[1:]:
    a,b,c = line.strip().split('\t')
    p = (b,c)
    predEdge.append(p)

In [None]:
import math

def salton(u, v):
    common_neighbors = len(list(nx.common_neighbors(FG, u, v)))
    a = FG.degree(u)
    b = FG.degree(v)
    salton = common_neighbors / math.sqrt(a * b)
    return salton

In [None]:
edge_name = [(s,d) for s,d,l in totalEdges]
Y_train = np.array([l for s, d, l in totalEdges])
X_train = np.empty((len(Y_train), 6))

for i in range(len(edge_name)):
    X_train[i][0] = len(list(nx.common_neighbors(FG, edge_name[i][0], edge_name[i][1])))

feature_jac = nx.jaccard_coefficient(FG, edge_name)
for u,v,p in feature_jac:
    i = edge_name.index((u,v))
    X_train[i][1] = p

feature_RAI = nx.resource_allocation_index(FG, edge_name)
for u,v,p in feature_RAI:
    i = edge_name.index((u,v))
    X_train[i][2] = p

feature_AAI = nx.adamic_adar_index(FG, edge_name)
for u,v,p in feature_AAI:
    i = edge_name.index((u,v))
    X_train[i][3] = p

feature_pre = nx.preferential_attachment(FG, edge_name)
for u,v,p in feature_pre:
    i = edge_name.index((u,v))
    X_train[i][4] = p
    
for edge in edge_name:
    i = edge_name.index((edge[0],edge[1]))
    X_train[i][5] = salton(edge[0],edge[1])

In [None]:
testF = np.empty((len(predEdge), 6))

for i in range(len(predEdge)):
    testF[i][0] = len(list(nx.common_neighbors(FG, predEdge[i][0], predEdge[i][1])))

test_pre1 = nx.jaccard_coefficient(FG, predEdge)
for u,v,p in test_pre1:
    i = predEdge.index((u,v))
    testF[i][1] = p

test_pre2 = nx.resource_allocation_index(FG, predEdge)
for u,v,p in test_pre2:
    i = predEdge.index((u,v))
    testF[i][2] = p

test_pre3 = nx.adamic_adar_index(FG, predEdge)
for u,v,p in test_pre3:
    i = predEdge.index((u,v))
    testF[i][3] = p

test_pre4 = nx.preferential_attachment(FG, predEdge)
for u,v,p in test_pre4:
    i = predEdge.index((u,v))
    testF[i][4] = p
    
for edge in predEdge:
    i = predEdge.index((edge[0],edge[1]))
    testF[i][5] = salton(edge[0],edge[1])


In [2]:
train_collection = pd.read_pickle("1W1k.pkl")
test_collection = pd.read_pickle("test_6F.pkl")

In [7]:
features = ["resource_allocation_index","salton","common_neighbors"]
X_train = train_collection[features]
Y_train = train_collection["Label"]

In [8]:
X_test = test_collection[features]

In [14]:
scaler = MinMaxScaler()

In [15]:
X_train_scaled = scaler.fit_transform(X_train)

In [16]:
X_test_scaled = scaler.transform(X_test)

In [17]:
clf = MLPClassifier(hidden_layer_sizes = [10, 10, 5], alpha = 5,random_state = 0, solver='lbfgs', verbose=0, activation='logistic')
clf.fit(X_train_scaled, Y_train)
test_proba = clf.predict_proba(X_test_scaled)[:, 1]

In [36]:
import csv

with open('test7.csv', 'w', newline='', encoding='utf-8') as csv_fp:
    csv_fp_writer = csv.writer(csv_fp, delimiter=',')
    csv_fp_writer.writerow(["Id", "Prediction"])
    csv_fp.close()

In [37]:
for i in range(test_proba.shape[0]):
    with open('test7.csv', 'a', newline='') as csv_fp2:
        csv_fp_writer2 = csv.writer(csv_fp2)
        csv_fp_writer2.writerow([i+1, test_proba[i]])
        
csv_fp2.close()