In [1]:
from EXPERIMENT_HYPER_EMPIRICAL import *
from _FigureJiazeHelper import *
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Circle
import matplotlib.colors as colors
from scipy.sparse.linalg import eigs, eigsh
from scipy.linalg import eig
from _HyperCommunityDetection import *
import warnings
import pandas
warnings.filterwarnings('ignore', category=FutureWarning)

%load_ext autoreload
%autoreload 2

In [9]:
# Matplotlib settings

# plt.style.use('seaborn-whitegrid')
plt.rc('figure', figsize=(8, 5))
plt.rc('font', size=16)
plt.rc('font', family='sans-serif')
plt.rcParams['font.sans-serif'] = 'verdana'
plt.rcParams['lines.linewidth'] = 4
plt.rcParams['axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.edgecolor'] = basic_line_color
plt.rcParams['xtick.color'] = basic_line_color
plt.rcParams['ytick.color'] = basic_line_color
plt.rcParams['axes.labelsize'] = 'large'
plt.rcParams['lines.markersize'] = 12

%config InlineBackend.figure_format = 'retina'

In [None]:
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap

In [None]:
def plot_cm(confusionMatrix, fig=None, ax=None):
    subTrueNumgroup,subBHNumgroup = np.shape(confusionMatrix)
    rowsum = np.sum(confusionMatrix, axis=1)
    rowsum = rowsum.reshape(-1, 1)
    rowsum = np.repeat(rowsum, subBHNumgroup, axis=1)
    normConfusionMatrix = np.round(confusionMatrix / rowsum, 2)
    if fig is None and ax is None:
        fig = plt.figure(figsize=(3, 3))
        widths = [4]
        heights = [4]
        spec5 = fig.add_gridspec(ncols=1, nrows=1, width_ratios=widths, height_ratios=heights)
        row = 0
        col = 0
        ax = fig.add_subplot(spec5[row, col])
#     cmap = mpl.colormaps["bwr_r"]
#     cmap = mpl.colormaps["RdYlBu"]
    cmap = mpl.colormaps["seismic_r"]
    cmap = truncate_colormap(cmap, 0.3, 0.7)
    im = ax.matshow(normConfusionMatrix, cmap=cmap, vmin=0, vmax=1)
    for i in range(subTrueNumgroup):
        for j in range(subBHNumgroup):
            c = normConfusionMatrix[i,j]
            ax.text(j, i, str(c), va='center', ha='center', fontsize=1)
#     ax.set_title(rf"$\rho={rho}, \delta={delta}$", fontsize=12)
    # ax.set_ylabel("True Community", rotation='horizontal', fontsize=20)
    plt.xticks(fontsize=10)
    plt.yticks(fontsize=10)
    return im

In [5]:
def visualMetaCM(partition, meta):
    partition_num = np.size(np.unique(partition))
    meta_num = np.size(np.unique(meta))
#     cm, meta_index = get_confusionmatrix(partition, meta, partition_num, meta_num)
    cm = np.zeros((partition_num, meta_num))
    uniquePartition = np.unique(partition)
    uniqueMeta = np.unique(meta)
    for iP in uniquePartition:
        trueIndex = np.where(partition == iP)[0]
#         print(type(trueIndex))
        for iM in uniqueMeta:
            i = np.where(uniquePartition == iP)
            j = np.where(uniqueMeta == iM)
            cm[i, j] = np.size(np.where(meta[trueIndex]==iM))
#     print(confusionMatrix)
#     partition_ind, meta_ind = linear_sum_assignment(cm, maximize=True)
#     print(f'True index is {partition_ind}, Community detected index is {meta_ind}')
    df = pandas.DataFrame(cm, uniquePartition, uniqueMeta)
#     cm[:, np.sort(meta_ind)] = cm[:, meta_ind]
    return cm, df

In [6]:
name = 'yelp'
partition = None
with open('./result/hyperEmpirical/yelp_BHPartition.pkl', 'rb') as fr:
    partition = pickle.load(fr)
meta = []
with open('./net_data/yelp/yelp_data.pkl', 'rb') as fr:
    yelp_data = pickle.load(fr)
    for i in range(yelp_data['n']):
        meta.append(yelp_data['meta'][i]['state'])
meta = np.array(meta)
cm, df = visualMetaCM(partition, meta)
print(f'number of partition {np.size(np.unique(partition))}, number of meta {np.size(np.unique(meta))}')
path = f'./result/hyperEmpirical/{name}_cm.xlsx'
with pandas.ExcelWriter(path) as writer:
    df.to_excel(excel_writer=writer)
    writer.save()

# fig = plt.figure(figsize=(8, 180))
# widths = [4]
# heights = [4]
# spec5 = fig.add_gridspec(ncols=1, nrows=1, width_ratios=widths, height_ratios=heights)
# row = 0
# col = 0
# ax = fig.add_subplot(spec5[row, col])
# plot_cm(cm, fig=fig, ax=ax)

number of partition 615, number of meta 27


In [9]:
name = 'enron'
partition = None
with open('./result/hyperEmpirical/enron_BHPartition.pkl', 'rb') as fr:
    partition = pickle.load(fr)
meta = []
with open('./net_data/enron/enron_data.pkl', 'rb') as fr:
    enron_data = pickle.load(fr)
    for i in range(enron_data['n']):
        if enron_data['meta'][i] is None or enron_data['meta'][i][0] == 'N/A':
            meta.append('None')
        else:
            meta.append(enron_data['meta'][i][0])
meta = np.array(meta)
cm, df = visualMetaCM(partition, meta)
print(f'number of partition {np.size(np.unique(partition))}, number of meta {np.size(np.unique(meta))}')
path = f'./result/hyperEmpirical/{name}_cm.xlsx'
with pandas.ExcelWriter(path) as writer:
    df.to_excel(excel_writer=writer)
    writer.save()

number of partition 22, number of meta 10


In [7]:
name = 'enron'
givenNumGroup = None
only_assortative = True
partition_path = f'./result/hyperEmpirical/{name}_BHPartition' \
                f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
                f'{f"_assort" if only_assortative else ""}.pkl'
partition = None
with open(partition_path, 'rb') as fr:
    partition = pickle.load(fr)
meta = []
with open('./net_data/enron/enron_data.pkl', 'rb') as fr:
    enron_data = pickle.load(fr)
    for i in range(enron_data['n']):
        if enron_data['meta'][i] is None or enron_data['meta'][i][0] == 'N/A':
            meta.append('None')
        else:
            meta.append(enron_data['meta'][i][0])
meta = np.array(meta)
cm, df = visualMetaCM(partition, meta)
print(f'number of partition {np.size(np.unique(partition))}, number of meta {np.size(np.unique(meta))}')
path = f'./result/hyperEmpirical/{name}_cm' \
        f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
        f'{f"_assort" if only_assortative else ""}.xlsx'
with pandas.ExcelWriter(path) as writer:
    df.to_excel(excel_writer=writer)
    writer.save()

number of partition 14, number of meta 10


In [11]:
name = 'highschool'
givenNumGroup = 9
only_assortative = False
partition_path = f'./result/hyperEmpirical/{name}_BHPartition' \
                f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
                f'{f"_assort" if only_assortative else ""}.pkl'
partition = None
with open(partition_path, 'rb') as fr:
    partition = pickle.load(fr)
meta = []
with open('./net_data/contact-high-school/highschool_data.pkl', 'rb') as fr:
    _data = pickle.load(fr)
    for i in range(_data['n']):
        meta.append(_data['meta'][i])
meta = np.array(meta)
cm, df = visualMetaCM(partition, meta)
print(f'number of partition {np.size(np.unique(partition))}, number of meta {np.size(np.unique(meta))}')
path = f'./result/hyperEmpirical/{name}_cm' \
        f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
        f'{f"_assort" if only_assortative else ""}.xlsx'

with pandas.ExcelWriter(path) as writer:
    df.to_excel(excel_writer=writer)
    writer.save()

number of partition 9, number of meta 9


In [14]:
name = 'highschool'
givenNumGroup = 9
only_assortative = True
partition_path = f'./result/hyperEmpirical/{name}_BHPartition' \
                f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
                f'{f"_assort" if only_assortative else ""}.pkl'
partition = None
with open(partition_path, 'rb') as fr:
    partition = pickle.load(fr)
meta = []
with open('./net_data/contact-high-school/highschool_data.pkl', 'rb') as fr:
    _data = pickle.load(fr)
    for i in range(_data['n']):
        meta.append(_data['meta'][i])
meta = np.array(meta)
cm, df = visualMetaCM(partition, meta)
print(f'number of partition {np.size(np.unique(partition))}, number of meta {np.size(np.unique(meta))}')
path = f'./result/hyperEmpirical/{name}_cm' \
        f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
        f'{f"_assort" if only_assortative else ""}.xlsx'

with pandas.ExcelWriter(path) as writer:
    df.to_excel(excel_writer=writer)
    writer.save()

number of partition 9, number of meta 9


In [12]:
name = 'primary'
givenNumGroup = 10
only_assortative = False
partition_path = f'./result/hyperEmpirical/{name}_BHPartition' \
                f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
                f'{f"_assort" if only_assortative else ""}.pkl'
partition = None
with open(partition_path, 'rb') as fr:
    partition = pickle.load(fr)
meta = []
with open('./net_data/contact-primary-school/primary_data.pkl', 'rb') as fr:
    _data = pickle.load(fr)
    for i in range(_data['n']):
        meta.append(_data['meta'][i])
meta = np.array(meta)
cm, df = visualMetaCM(partition, meta)
print(f'number of partition {np.size(np.unique(partition))}, number of meta {np.size(np.unique(meta))}')
path = f'./result/hyperEmpirical/{name}_cm' \
        f'{f"_given{givenNumGroup}Groups" if givenNumGroup is not None else ""}' \
        f'{f"_assort" if only_assortative else ""}.xlsx'

with pandas.ExcelWriter(path) as writer:
    df.to_excel(excel_writer=writer)
    writer.save()

number of partition 10, number of meta 11
