In [None]:
import pandas as pd

train = pd.read_csv(r'Data/new_train_data.csv', dtype = {'node1': int, 'node2': int, 'label': int})
test = pd.read_csv(r'Data/new_test_data.csv', dtype = {'node1': int, 'node2': int})

In [None]:
from core import Graph

graph = Graph()

for _, row in train[train['label'] == 1].iterrows():
    graph.add_edge(row['node1'], row['node2'])

In [None]:
def neighbors(node):
    return graph.get_neighbor_size(node)

def common_neighbors(row):
    return graph.common_neighbors(row['node1'], row['node2'])

def jaccard_coefficient(row):
    return graph.jaccard_coefficient(row['node1'], row['node2'])

def adamic_adar(row):
    return graph.adamic_adar(row['node1'], row['node2'])

In [None]:
def cal_func_score(df: pd.DataFrame):
    
    df['node1_neighbors'] = df['node1'].apply(neighbors)
    df['node2_neighbors'] = df['node2'].apply(neighbors)
    df['common_neighbors'] = df.apply(common_neighbors, axis = 1)
    df['jaccard_coefficient'] = df.apply(jaccard_coefficient, axis = 1)
    df['adamic_adar'] = df.apply(adamic_adar, axis = 1)
    
    return df

In [None]:
train = cal_func_score(train)
test = cal_func_score(test)

In [None]:
x_col = ['node1_neighbors', 'node2_neighbors', 'common_neighbors', 'jaccard_coefficient', 'adamic_adar']
y_col = 'label'

In [None]:
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

rfc = RandomForestClassifier(n_estimators=5, max_depth=1, random_state=0)
rfc.fit(train[x_col], train[y_col])

xgb = XGBClassifier(n_estimators=5, max_depth=5, random_state=0)
xgb.fit(train[x_col], train[y_col])

lr = LogisticRegression(random_state=0)
lr.fit(train[x_col], train[y_col])

In [None]:
s = pd.DataFrame(
    {
        "rfc": rfc.predict(test[x_col]),
        "xgb": xgb.predict(test[x_col]),
        "lr": lr.predict(test[x_col])
    }
)

In [None]:
s.rfc.value_counts(), s.xgb.value_counts(), s.lr.value_counts()

In [None]:
s[s.rfc != s.xgb]

In [None]:
s[s.rfc != s.lr]

In [None]:
s[s.xgb != s.lr]