In [None]:
import numpy
import pickle
from concurrent.futures import ThreadPoolExecutor
from modules.support_functions import Utils
from matplotlib import pyplot as plt

from modules.Modularity import RecursiveModularity

num_workers = 4

In [None]:
from littleballoffur import DegreeBasedSampler, \
    PageRankBasedSampler, \
    RandomEdgeSampler, \
    SnowBallSampler, \
    ForestFireSampler, \
    CommunityStructureExpansionSampler, \
    ShortestPathSampler, \
    RandomWalkSampler, \
    RandomWalkWithJumpSampler, \
    MetropolisHastingsRandomWalkSampler, \
    NonBackTrackingRandomWalkSampler, \
    CirculatedNeighborsRandomWalkSampler, \
    CommonNeighborAwareRandomWalkSampler, \
    LoopErasedRandomWalkSampler

methods = [
    # random node sampling
    DegreeBasedSampler,
    PageRankBasedSampler,

    # Random Edge Sampling
    RandomEdgeSampler,
    SnowBallSampler,
    CommunityStructureExpansionSampler,
    ShortestPathSampler,
    # Random-Walks Dased
    RandomWalkSampler,
    RandomWalkWithJumpSampler,
    MetropolisHastingsRandomWalkSampler,
    NonBackTrackingRandomWalkSampler,
    CirculatedNeighborsRandomWalkSampler,
    CommonNeighborAwareRandomWalkSampler,
    LoopErasedRandomWalkSampler,
    RecursiveModularity
]

**загрузка данных**

In [None]:
with open('all_graphs.pickle','rb') as f:
    graphs = pickle.load(f)

#Загрузка распределения мотивов для полных графов
X_full_f1 = numpy.load('DataHelp/motifs_matrix_full_f1.npy')
X_full_f3 = numpy.load('DataHelp/motifs_matrix_full_f3.npy')
X_full_nodif = numpy.load('DataHelp/motifs_matrix_full.npy')

#Загрузка словарей распределения мотивов для сэмплов. 

with open('./DataHelp/motifs_samples_forMSE.pickle', 'rb') as f:
        X_sample=pickle.load(f)
with open('./DataHelp/motifs_samples_f1_forMSE.pickle', 'rb') as f:
        X_sample_f1=pickle.load(f)
with open('./DataHelp/motifs_samples_f3_forMSE.pickle', 'rb') as f:
        X_sample_f3=pickle.load(f)
#Каждый подграф кроме Modularity повторен 10 раз из-за стохастических методов. 
#Поэтому для одного экзмепляра надо взять первые 170 строчек (или вторые 170 стр и тд)

X_sample_regression = dict(map(lambda e: (e[0], dict(map(lambda o: (o[0],o[1][:len(graphs)]),e[1].items()))),X_sample.items()))
X_sample_f3_regression = dict(map(lambda e: (e[0], dict(map(lambda o: (o[0], o[1][:len(graphs)]), e[1].items()))), X_sample_f3.items()))
X_sample_f1_regression = dict(map(lambda e: (e[0], dict(map(lambda o: (o[0], o[1][:len(graphs)]), e[1].items()))), X_sample_f1.items()))

In [None]:
#MSE уже подсчитанные можно загрузить
with open('./DataHelp/MSE_methods_f1.pickle','rb') as f:
    MSE_methods_f1=pickle.load(f)
with open('./DataHelp/MSE_methods_f3.pickle','rb') as f:
    MSE_methods_f3=pickle.load(f)
with open('./DataHelp/MSE_methods_nodif.pickle', 'rb') as f:
    MSE_methods_nodif=pickle.load(f)

**отрисовка MSE**

In [None]:
def plot(MSE_dict, name_of_method):
    MSE = pd.DataFrame(MSE_dict, columns=list(MSE_dict.keys()))
    plt.figure(figsize=(20, 6))

    plt.suptitle(name_of_method, fontsize=22)
    plt.subplot(121)
    plt.xlabel("number of nodes")
    plt.ylabel("MSE")
    g1 = sns.boxplot(data=MSE)
    g1.set_yscale('log')
    plt.subplot(122)
    plt.xlabel("number of nodes")
    plt.ylabel("MSE")
    y = list(MSE.mean())
    x = list(map(lambda x: int(x), list(MSE.columns)))
    g2 = sns.scatterplot(x=x, y=y)
    g2.set_yscale('log')


In [None]:
for name in MSE_methods_f1:
    name_of_graph = name.split("'")[0] + ' Motifs of different types. F1'
    plot(MSE_methods_f1[name], name_of_graph)
for name in MSE_methods_f3:
    name_of_graph = name.split("'")[0] + ' Motifs of different types. F3'
    plot(MSE_methods_f3[name], name_of_graph)
for name in MSE_methods_nodif:
    name_of_graph = name.split("'")[0] + ' Not different types of motifs'
    plot(MSE_methods_nodif[name], name_of_graph)
plt.figure(figsize=(10, 6))
mean_MSEs = []
for name in MSE_methods_f1:
    MSE_dict = MSE_methods_f1[name]
    MSE = pd.DataFrame(MSE_dict, columns=list(MSE_dict.keys()))
    y = list(MSE.mean())
    x = list(map(lambda x: int(x), list(MSE.columns)))
    ax = plt.scatter(x=x, y=y)
    plt.yscale('log')
    mean_MSEs.append(sum(y) / len(y))

plt.legend(['mean MSE. Motifs of different types. F1' + str(x[0]).split('.')[-1].split("'")[0] + ': ' + str(np.round(x[1], decimals=3)) for x in
            zip(methods, mean_MSEs)])
plt.show()
plt.figure(figsize=(10, 6))
mean_MSEs = []
for name in MSE_methods_f3:
    MSE_dict = MSE_methods_f3[name]
    MSE = pd.DataFrame(MSE_dict, columns=list(MSE_dict.keys()))
    y = list(MSE.mean())
    x = list(map(lambda x: int(x), list(MSE.columns)))
    ax = plt.scatter(x=x, y=y)
    plt.yscale('log')
    mean_MSEs.append(sum(y) / len(y))

plt.legend(['mean MSE. Motifs of different types. F3' + str(x[0]).split('.')[-1].split("'")[0] + ': ' + str(
    np.round(x[1], decimals=3)) for x in zip(methods, mean_MSEs)])
plt.show()

plt.figure(figsize=(10, 6))
mean_MSEs = []
for name in MSE_methods_nodif:
    MSE_dict = MSE_methods_nodif[name]
    MSE = pd.DataFrame(MSE_dict, columns=list(MSE_dict.keys()))
    y = list(MSE.mean())
    x = list(map(lambda x: int(x), list(MSE.columns)))
    ax = plt.scatter(x=x, y=y)
    plt.yscale('log')
    mean_MSEs.append(sum(y) / len(y))

plt.legend(['mean MSE. Not different types of motifs.' + str(x[0]).split('.')[-1].split("'")[0] + ': ' + str(
    np.round(x[1], decimals=3)) for x in zip(methods, mean_MSEs)])
plt.show()


**Regression**

In [None]:
with ThreadPoolExecutor(max_workers=num_workers) as executor:
    res = executor.map(Utils.count, list(zip(*graphs))[1])

y = []
for n_iter in res:
    y.append(n_iter)
with open('DataHelp/names_of_all_motifs.pickle', 'rb') as f:
    names_of_all_motifs = pickle.load(f)

with open('DataHelp/names_of_all_motifs_diff.pickle', 'rb') as f:
    names_of_all_motifs_diff = pickle.load(f)

for method in methods:
    name_of_method = str(method).split('.')[-1].split("'")[0]
    for n in list(range(l, r, step)):
        X_f1 = X_sample_f1_regression[name_of_method]['Number of nodes: ' + str(n)]
        X_train, X_test, y_train, y_test = train_test_split(X_f1, y, test_size=0.3)
        X_train = pd.DataFrame(X_train, columns=names_of_all_motifs_diff)
        # Initialize CatBoostRegressor
        model = CatBoostRegressor(iterations=100, silent=True)
        # Fit model
        model.fit(X_train, y_train)
        # Get predictions
        preds = model.predict(X_test)
        # SHAP explainer:
       # explainer = shap.Explainer(model)
       # shap_values = explainer(X_train)
       # shap.plots.beeswarm(shap_values)
        print('Motifs of different types, F1. Method: ', name_of_method, ' Number of nodes: ' + str(n), ' MAPE ',
              Utils.mean_absolute_percentage_error(y_test, preds))

        X_f3 = X_sample_f3_regression[name_of_method]['Number of nodes: ' + str(n)]
        X_train, X_test, y_train, y_test = train_test_split(X_f1, y, test_size=0.3)
        X_train = pd.DataFrame(X_train, columns=names_of_all_motifs_diff)
        # CatBoostRegressor
        model = CatBoostRegressor(iterations=100, silent=True)
        model.fit(X_train, y_train)
        # Get predictions
        preds = model.predict(X_test)
        # SHAP explainer:
        #explainer = shap.Explainer(model)
        #shap_values = explainer(X_train)
        #shap.plots.beeswarm(shap_values)
        print('Motifs of different types. F3. Method: ', name_of_method, ' Number of nodes: ' + str(n), ' MAPE ',
              Utils.mean_absolute_percentage_error(y_test, preds))

        X = X_sample_regression[name_of_method]['Number of nodes: ' + str(n)]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
        X_train = pd.DataFrame(X_train, columns=names_of_all_motifs)
        # CatBoostRegressor
        model = CatBoostRegressor(iterations=100, silent=True)
        model.fit(X_train, y_train)
        # Get predictions
        preds = model.predict(X_test)
        #explainer = shap.Explainer(model)
        #shap_values = explainer(X_train)
        # summarize the effects of all the features
        #shap.plots.beeswarm(shap_values)
        print('Motifs. Not different types. Method: ', name_of_method, ' Number of nodes: ' + str(n), ' MAPE ',
              Utils.mean_absolute_percentage_error(y_test, preds))