In [3]:
import pandas as pd
import networkx as nx
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv("econ-beacxc.mtx", sep=' ')
df.head()

In [4]:
G = nx.from_pandas_edgelist(df, "Source", "Target")

In [5]:
nx.draw(G)

In [6]:
degree_centrality = nx.degree_centrality(G)
closeness_centrality = nx.closeness_centrality(G)
betweeness_centrality = nx.betweenness_centrality(G)

In [7]:
data = pd.DataFrame({"degree":[], "closeness":[], "betwennes":[], "influence":[]})

In [8]:
for d, c, b in zip(list(degree_centrality.values()),list(closeness_centrality.values()),list(betweeness_centrality.values())):
    included = [d,c,b, (d+c+b)/3]
    data.loc[len(data)] = included

In [9]:
avg_influence = data['influence'].mean()
data.loc[data['influence'] >=2*avg_influence , 'influence'] = 1
data.loc[data['influence'] <2*avg_influence , 'influence'] = 0

In [10]:
data.head()

In [11]:
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

In [14]:
from imblearn.over_sampling import RandomOverSampler

over= RandomOverSampler()
X,Y= over.fit_resample(X,Y)

In [15]:
def Scaling(X):
    scaler = StandardScaler()
    X=scaler.fit_transform(X)
    return X
Scaling(X)

In [16]:
classifier = KNeighborsClassifier(n_neighbors=11, p=2,metric='euclidean')

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

In [19]:
classifier.fit(X_train, y_train)

In [20]:
y_pred = classifier.predict(X_test)
y_pred

In [21]:
cm = confusion_matrix(y_test, y_pred)
print (cm)
print(f1_score(y_test, y_pred))