In [6]:
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import networkx as nx
from tqdm import tqdm
import itertools
import time

from pathlib import Path
import sys

In [7]:
%load_ext autoreload
%autoreload 2
from graph_search import random_start_graphsearch, k2_search, local_search
from utils import get_bscore

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
# set filepath, read in data
filepath = Path.cwd()
datapath = filepath / 'data'
imagepath = filepath / 'writeup/results'

## Check example

In [16]:
# check against example
filename = 'example'
bscore_example_given = -132.02362143513778
raw_data = pd.read_csv(datapath / f'{filename}.csv')
df, nodes = raw_data.values, list(raw_data.columns)

G = nx.DiGraph()
G.add_nodes_from(nodes)
G.add_edge('parent1', 'child1')
G.add_edge('parent2', 'child1')
G.add_edge('parent2', 'child2')
G.add_edge('parent3', 'child3')
bscore_example = get_bscore(G, df, nodes)

## Figure out how long the large query will take

In [37]:
filename = 'large'
raw_data = pd.read_csv(datapath / f'{filename}.csv')
df, nodes = raw_data.values, list(raw_data.columns)

In [38]:
df.shape

(10000, 50)

In [34]:
G, bayescore, tt = k2_search(nodes, df)
print(tt)

8.673135995864868


In [36]:
Gl, bayescorel, ttl = local_search(G, nodes, df, max_iter=20)
print(ttl)

2.1494619846343994


In [17]:
# get graph with best bayescore
G, bayescore, total_times = random_start_graphsearch(nodes, df, iterations=1)
print('(total time, mean iteration time):', total_times)

iteration 0 1675191893.134825


KeyboardInterrupt: 

In [48]:
# draw graph
nx.draw(G, with_labels=True)
plt.savefig(imagepath / f'{filename}_path.png')

In [None]:
results = {}
results['Bayes score'] = [bayescore]
results['Graph structure'] = [f'(N={len(G.nodes)}, E={len(G.edges)})']
results['Total time'] = [total_times[0]]
results['Mean iteration time'] = [total_times[1]]
print(pd.DataFrame(results).T.to_latex())