# Plot graph examples

For both concept maps and co-occurrence graphs

In [None]:
from notebook_prelude import *

In [None]:
def get_candidates(X, min_nodes = 10, max_nodes = 15):
    return [x for x in X if nx.number_of_nodes(x) in range(min_nodes, max_nodes)]

# Tuples of: (dataset_name, graph_type, (X, Y))
# For cooccurrence graphs, it will hold a (random) choice for each window size
graph_datasets = []
for dataset in dataset_helper.get_dataset_names_with_concept_map():
    if dataset != 'ng20': continue
    print('{:30} start'.format(dataset))
    graph_cache_files = dataset_helper.get_all_cached_graph_datasets(dataset_name=dataset)
    gml_graph_cache = [x for x in graph_cache_files if 'concept' in x][0]
    coo_graph_caches = [x for x in graph_cache_files if 'cooc' in x]
    
    def get_window_size(graph_cache_file):
        return graph_cache_file.split('cooccurrence_')[1].split('_')[0]
    
    coo_graphs_by_window_size = collections.defaultdict(lambda: [])
    for cache_file in coo_graph_caches:
        coo_graphs_by_window_size[get_window_size(cache_file)].append(cache_file)
    
    X_cmap, Y_cmap = dataset_helper.get_dataset_cached(gml_graph_cache)
    X_cmap = graph_helper.get_graphs_only(X_cmap)
    graph_datasets.append((dataset, 'Concept Map', (get_candidates(X_cmap), Y_cmap)))
    for window_size, cached_files in sorted(coo_graphs_by_window_size.items(), key=lambda x: x[0]):
        # Take random element from the co-occurence graph datasets
        coo_graph_cache = np.random.choice(cached_files)
        print('\tRetrieving co-occurence graphs for window_size={} ({})'.format(window_size, coo_graph_cache))
        X, Y = dataset_helper.get_dataset_cached(coo_graph_cache)
        X = graph_helper.get_graphs_only(X)
        X = get_candidates(X)
        graph_datasets.append((dataset, 'Cooccurrence\n$w={}$'.format(window_size), (X, Y)))
    break

gc.collect()

In [None]:
df = pd.DataFrame(graph_datasets, columns = ['dataset', 'graph_type', 'graph_dataset'])

In [None]:
NUM_GRAPHS_PER_TYPE = 3

for dataset, data in df.groupby('dataset'):
    fig, axes = plt.subplots(ncols=data.graph_type.value_counts().size, nrows=NUM_GRAPHS_PER_TYPE)
    for idx, row_ax in enumerate(axes):
        print('Row: {}/{}'.format(idx + 1, len(axes)))
        for (_, item), ax in zip(data.iterrows(), row_ax):
            graph_type = item.graph_type
            if idx == 0:
                ax.set_title(graph_type)
            

            X, Y = item.graph_dataset
            candidates = [x for x in X if nx.number_of_nodes(x) in range(4, 14)]
            random_graph = np.random.choice(candidates)
            pos = None if not graph_type.count('Concept') else nx.layout.circular_layout(random_graph)
            nx.draw_networkx(random_graph, ax = ax, node_size = 16, with_labels = False, node_color = '#000000', arrows=False)
            #cleanup_axes(ax)
            ax.grid(False)
            ax.set_xticks([])
            ax.set_yticks([])
    fig.tight_layout(h_pad=2, w_pad=2)
    
    for ax in axes.flatten():
        (x_min, x_max), (y_min, y_max) = ax.get_xlim(), ax.get_ylim()
        #ax.set_xlim((0, 1))
        #ax.axvline(x_min)
        #ax.axvline(x_max)
    plt.show()
    save_fig(fig, 'examples_graphs')