In [2]:
# some_file.py
import sys

sys.path.insert(0, './shared_code')



import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import sqlite3 as db
import json
import pandas as pd
import importlib
from IPython.core.display import display, HTML

import os
import RHgenerate_states
import RHcomponents
import RHdisplay
import RHdistance_partition
import RHutilities

%matplotlib inline

In [17]:
con = db.connect('data/rush_hour.db')
cur = con.cursor()

#states.to_sql('states',con,if_exists="append")

In [4]:
combinatorial_class = 36
class_36 = RHgenerate_states.generate_states(2,2)

In [21]:
len(class_36),class_36[23]

(116650, (278873228533880682009745564893224, 1))

In [22]:
comps = RHcomponents.components(class_36)

In [32]:

# prep for processing generated list of components
SAVE_BATCH_SIZE = 2000  # Number of Graphs to save in batch; Equals number of rows in DataFrame
db_components = [None]*SAVE_BATCH_SIZE


solvable_count = 0
unsolvable_count = 0
save_counter = 0

component_columns = ['comb_class','repr_board_int_s1','repr_board_int_s2',\
                     'repr_red_col', 'is_solvable','num_nodes',\
                     'density','max_solution_distance']

for g in  RHcomponents.gen_components(class_36):
    
    save_counter +=1
    if save_counter >= SAVE_BATCH_SIZE:
        df = pd.DataFrame([x for x in db_components if x is not None],columns = component_columns)
        df.to_sql('component',con,if_exists="append")
        
        db_components = [None]*SAVE_BATCH_SIZE
        save_counter = 1
        
    if not g.graph['solvable']:
        unsolvable_count +=1
        node_dict = g.node[0]
        max_soln_distance = None
        
    else:
        
        solvable_count += 1
        
        RHdistance_partition.distance_partition(g)
        
        max_soln_distance = max(g.graph['distance_partition'].keys())
        for node in g.graph['distance_partition'][max_soln_distance]:
            break
        node_dict = g.node[node]
        
    
    s1,s2 = RHutilities.split_int( node_dict['board_int'])    
    db_components.append( [\
                             combinatorial_class
                            ,s1
                            ,s2
                            ,node_dict['red_col']
                            ,g.graph['solvable']
                            ,len(g.nodes())
                            ,nx.density(g)
                            ,max_soln_distance                         
                          ]
                        )
        
        
if  db_components.count(None) != len(db_components):
    df = pd.DataFrame([x for x in db_components if x is not None],columns = component_columns)
    df.to_sql('component',con,if_exists="append",index=False)

#  Add update/insert into Combinatorial Class Table
# see table for data fields
        
    

In [33]:
df

Unnamed: 0,comb_class,repr_board_int_s1,repr_board_int_s2,repr_red_col,is_solvable,num_nodes,density,max_solution_distance
0,36,11254635381874688,1071644672,1,True,308,0.019184,4.0
1,36,21990316663104,34493825024,1,True,112,0.037162,12.0
2,36,9007233614700544,35116724256768,1,True,297,0.019429,4.0
3,36,15221639059038528,133955584,1,True,136,0.031264,10.0
4,36,221238,22058901700928,1,False,24,0.141304,
5,36,21990325051744,133955584,1,True,114,0.038503,11.0
6,36,21990316663108,275011862528,1,True,128,0.035064,11.0
7,36,35115652836736,11259042018263040,3,True,57,0.075815,2.0
8,36,343598919045,11259042018263040,3,True,33,0.102273,5.0
9,36,224640,11259043089907712,3,True,51,0.083137,2.0


In [34]:
df2 = pd.read_sql_query("select * from component",con)
df2

Unnamed: 0,id,comb_class,repr_board_int_s1,repr_board_int_s2,repr_red_col,is_solvable,num_nodes,density,max_solution_distance
0,1,36,11254635381874688,1071644672,1,1,308,0.019184,4.0
1,2,36,21990316663104,34493825024,1,1,112,0.037162,12.0
2,3,36,9007233614700544,35116724256768,1,1,297,0.019429,4.0
3,4,36,15221639059038528,133955584,1,1,136,0.031264,10.0
4,5,36,221238,22058901700928,1,0,24,0.141304,
5,6,36,21990325051744,133955584,1,1,114,0.038503,11.0
6,7,36,21990316663108,275011862528,1,1,128,0.035064,11.0
7,8,36,35115652836736,11259042018263040,3,1,57,0.075815,2.0
8,9,36,343598919045,11259042018263040,3,1,33,0.102273,5.0
9,10,36,224640,11259043089907712,3,1,51,0.083137,2.0


In [69]:
if g.graph['solvable']:
    RHdistance_partition.distance_partition(g)
    max_distance = max(g.graph['distance_partition'].keys())
    for node in g.graph['distance_partition'][max_distance]:
        break
    repr_node_dict = g.node[node]
else:
    repr_node_dict = g.node[0]
    max_distance = None

s1,s2 = RHutilities.split_int(repr_node_dict['board_int'])
red_col = repr_node_dict['red_col']
            
    
36, g.graph['solvable'],s1,s2,red_col, len(g.nodes()), nx.density(g),max_distance

(36, True, 67330304, 11259042018267128, 1, 204, 0.024244180430793006, 5)

In [11]:
len(comps),sum(len(x.nodes()) for x in comps)

(1361, 116650)

In [12]:
len(comps[0].nodes())

308

In [14]:
solvable_comps = [g for g in comps if g.graph['solvable']==True]
unsolvable_comps = [g for g in comps if g.graph['solvable'] == False]

In [15]:
df_solvable = pd.DataFrame([len(x.nodes()) for x in solvable_comps])
df_unsolvable = pd.DataFrame([len(x.nodes()) for x in unsolvable_comps])


## Here -
* Select representative for each component
* save json to file for each component (class_2_2_comp_39823484994949)
* draw graph with d3

* What I really want is real time back and forth to pull out components on the fly rather than build them all and save mass files to harddrive.



In [18]:
#df.describe()
#%matplotlib inline
df_solvable.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000250BD956EB8>]], dtype=object)

In [19]:
df_unsolvable.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000250BC07E9B0>]], dtype=object)

In [82]:
comp = comps[0]
min_int = min(comp.node[node]['board_int'] for node in comp.nodes())
min_int

632587360075802098027458330624

In [16]:
df_solvable.describe()

Unnamed: 0,0
count,1024.0
mean,107.013672
std,100.871674
min,1.0
25%,20.0
50%,60.0
75%,178.0
max,400.0


In [17]:
df_unsolvable.describe()

Unnamed: 0,0
count,337.0
mean,20.973294
std,20.752708
min,1.0
25%,6.0
50%,15.0
75%,27.0
max,96.0


In [83]:
#node = 0
comp_repr_node = [comp[node] for node in comp.nodes() if comp.node[node]['board_int'] == min_int]
comp_repr_node

[AtlasView({233: {}, 270: {}, 231: {}, 284: {}})]

In [113]:
len([g for g in comps if len(g.nodes())==1])

18

In [37]:
n = solvable_comps[40].node[0]

#HTML(RHdisplay.svg_from_state(solvable_comps[40].nodes(0)))

HTML(RHdisplay.svg_from_state(n['board_int'],n['red_col']))

In [39]:
df = pd.DataFrame( [  [len(g.nodes()),g.graph['solvable'] ] for g in comps])

In [55]:
g = solvable_comps[40]
g

<networkx.classes.graph.Graph at 0x250b11594a8>

In [58]:
RHdistance_partition.distance_partition(g)


{'board_int': 207885568651978971296788249575424,
 'inner_nbrs': {11, 49},
 'is_soln_state': False,
 'outer_nbrs': {9},
 'red_col': 2,
 'soln_distance': 4}

In [72]:
df_dist = pd.DataFrame( [g.node[n]['soln_distance'] for n in g.nodes() ], columns = ['distance'])

In [74]:
df_dist.groupby(['distance']).size()

distance
0    41
1    41
2    67
3    71
4    41
5    11
6     1
dtype: int64

In [76]:
HTML(RHdisplay.svg_from_state( g.node[0]['board_int'] , g.node[0]['red_col']))

In [77]:
len(solvable_comps) , len(unsolvable_comps)

(1024, 337)

In [79]:
len(solvable_comps) + len(unsolvable_comps)

1361

In [85]:
importlib.reload(RHdistance_partition)
for g in solvable_comps:
    RHdistance_partition.distance_partition(g)
    

In [142]:
for g in solvable_comps:
    g.graph['max_distance'] = max(g.graph['distance_partition'].keys())
    for node in g.graph['distance_partition'][g.graph['max_distance']]:
        break
    node_dict = g.node[node]
    split_board_int = RHutilities.split_int( node_dict['board_int'])
    g.graph['repr_board_int_s1'] = split_board_int[0]
    g.graph['repr_board_int_s2'] = split_board_int[1]
    g.graph['repr_red_col'] = node_dict['red_col']
    

In [162]:
df_max_dist = pd.DataFrame( \
            [ [g.graph['max_distance'],\
               g.graph['repr_board_int_s1'],\
               g.graph['repr_board_int_s2'],\
               g.graph['repr_red_col']\
               ,len(g.nodes())\
               ,g.size()\
               ,nx.density(g)\
              ]\
               for g in solvable_comps], columns=['distance','int_s1','int_s2','repr_red_col','num_nodes','num_edges','density'])

In [163]:
df_max_dist.sort_values(['distance'],ascending=False)

Unnamed: 0,distance,int_s1,int_s2,repr_red_col,num_nodes,num_edges,density
168,19,2748789558696,179633383279104,3,56,86,0.055844
769,17,2748789558696,175922545691136,3,80,149,0.047152
642,17,343598694837,25701168185664,4,116,260,0.038981
739,16,2748789776424,25701168185664,1,102,228,0.044263
830,15,2748789558696,670828544,3,84,172,0.049340
870,15,343598694837,21990330597696,4,144,344,0.033411
387,15,2748789558696,175922531535414,3,104,214,0.039955
572,15,21990316469568,670828544,2,86,161,0.044049
408,14,2748789776424,21990330597696,1,126,299,0.037968
506,14,175922531756544,68586307588,1,112,231,0.037162


In [164]:

board_int = RHutilities.combine_ints(21990316605814,11276634271252480)
red_col = 5
HTML(RHdisplay.svg_from_state(board_int,red_col))

In [166]:
df_max_dist.groupby(['distance']).agg(['count'])

Unnamed: 0_level_0,int_s1,int_s2,repr_red_col,num_nodes,num_edges,density
Unnamed: 0_level_1,count,count,count,count,count,count
distance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,34,34,34,34,34,34
1,165,165,165,165,165,165
2,73,73,73,73,73,73
3,18,18,18,18,18,18
4,221,221,221,221,221,221
5,93,93,93,93,93,93
6,55,55,55,55,55,55
7,72,72,72,72,72,72
8,110,110,110,110,110,110
9,53,53,53,53,53,53


In [119]:
df_max_dist.sort_values(by=['distance'] , ascending = False)

Unnamed: 0,distance,int_s1,int_s2,repr_red_col
168,19,175922531535792,240243301154856,4
769,17,2748789558696,175922644781568,3
642,17,343598722053,25701168185664,2
739,16,28008,1924695198793728,2
830,15,140738035933224,2749850714112,1
870,15,27968,22341162893317,2
387,15,671312424,178670650027008,1
572,15,83886454,162728391737344,5
408,14,221504,24746353754152,1
506,14,175922531756544,343464214528,1


In [135]:
df_max_dist.loc[df_max_dist['distance'] == 19]
#HTML(RHdisplay.svg_from_state(n['board_int'],n['red_col']))

Unnamed: 0,distance,int_s1,int_s2,repr_red_col
168,19,175922531535792,240243301154856,4


In [137]:
board_int = RHutilities.combine_ints(2748789558696,240243301154856)
red_col = 4
HTML(RHdisplay.svg_from_state(board_int,red_col))

{0: {0}, 1: {1}}