## Draw barplots for showing the node category distribution and edge category distribution

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.facecolor'] = 'white'
plt.style.use('default')

In [None]:
graph_edge = pd.read_csv('data/graph_edges.txt', sep='\t', header=0)

In [None]:
all_nodes = set(graph_edge['source'])
all_nodes.update(set(graph_edge['target']))      
len(all_nodes)

In [None]:
all_node_info = pd.read_csv('data/all_graph_nodes_info.txt', sep='\t', header=0)

In [None]:
len(set(all_node_info['category']))

In [None]:
len(set(graph_edge['predicate']))

In [None]:
all_node_info['category'].value_counts()

In [None]:
node_table = pd.read_csv('draw_figures/number_of_nodes_by_category.txt', sep='\t', header=None)
node_table.columns = ['node_category','node_count']
node_table = node_table.sort_values(by='node_count', ascending=True).reset_index(drop=True)
node_table['percent'] = node_table['node_count']/node_table['node_count'].sum() * 100

In [None]:
fig, ax = plt.subplots(figsize=(8, 8), dpi=300);
ax.margins(y=0.01);
ax.barh(y=node_table.node_category.str.replace('biolink:',''), width=node_table.node_count, alpha=0.5, log=True);
ax.set_xlim(1,);
ax.minorticks_off();
# xlabels = [item.get_text() for item in ax.get_xticklabels()]
# xlabels[1] = '0'
ax.set_xticklabels(xlabels);
ax.grid(True, which='major', axis='x', alpha=0.45);
# ax.set_title('Number of Nodes by Category in Customized Knowledge Graph');
fig.tight_layout();
fig.savefig('draw_figures/number_of_nodes_by_category.svg',dpi=300)

In [None]:
edge_table = pd.read_csv('draw_figures/number_of_edges_by_category.txt', sep='\t', header=None)
edge_table = pd.concat([edge_table[[0,1]].rename({0:'edge_category',1:'edge_count'}, axis=1),edge_table[[2,3]].rename({2:'edge_category',3:'edge_count'}, axis=1)],axis=0).reset_index(drop=True)
edge_table = edge_table.sort_values(by='edge_count', ascending=True).reset_index(drop=True)
edge_table['percent'] = edge_table['edge_count']/edge_table['edge_count'].sum() * 100

In [None]:
fig, ax = plt.subplots(figsize=(15, 15), dpi=300);
ax.margins(y=0.01);
ax.barh(y=edge_table.edge_category, width=edge_table.edge_count, alpha=0.5, log=True);
ax.set_xlim(1,);
ax.minorticks_off();
# xlabels = [item.get_text() for item in ax.get_xticklabels()]
# xlabels[1] = '0'
ax.set_xticklabels(xlabels, fontsize=18);
ax.grid(True, which='major', axis='x', alpha=0.45);
# ax.set_title('Number of Edges by Predicate in Customized Knowledge Graph', size=22);
fig.tight_layout();
fig.savefig('draw_figures/number_of_edges_by_category.svg',dpi=300)

In [None]:
plotdata = pd.read_csv('draw_figures/model_evaluation.txt', sep='\t', header=0)
plotdata['method'] = plotdata['method'].str.replace('_',' ')

In [None]:
metric_list = ['MRR','Hit@1','Hit@3', 'Hit@5']

# create figure
fig = plt.figure(figsize=(15, 10), dpi=300)

# add subplots
for i, metric in enumerate(metric_list, 1):
    plt.subplot(2, 2, i)
    temp = plotdata.loc[plotdata['metric'] == metric,:].reset_index(drop=True)
    ax = sns.barplot(data=temp, x="model", y='value', hue="method")
    plt.xlabel('')
    plt.xticks(rotation=30,fontsize=10)
    plt.ylabel(metric,fontsize=15)
    plt.yticks(fontsize=10)
    ax.get_legend().remove()
    plt.title(metric, fontsize=18)

plt.subplots_adjust(hspace=0.5)

# add legend
handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right', ncol=3, bbox_to_anchor=(.90, .98), fontsize=13)

fig.savefig('draw_figures/model_evaluation.svg')