In [2]:
import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sys
sys.path.append("../")
from spn.structure.Base import Context
from spn.algorithms.LearningWrappers import learn_parametric
from spn.structure.leaves.parametric.Parametric import Bernoulli
try:
    from time import perf_counter
except:
    from time import time as perf_counter
from spn.io.Graphics import *
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from spn.algorithms.MPE import mpe
from spn.algorithms.Inference import log_likelihood
from spn.algorithms.Sampling import sample_instances
from numpy.random.mtrand import RandomState
from scipy.stats import pearsonr
from copy import deepcopy



"""


print("Reading dataframes \n")
df_train = pd.read_csv('train.csv')
df_train = df_train.sample(frac=1).reset_index(drop=True)
df_test = pd.read_csv('test.csv')
df_test = df_test.sample(frac=1).reset_index(drop=True)
data_train = df_train.values
data_test = df_test.values



def binning(data_array):
    
    instances, features = data_array.shape
    dark = np.zeros([instances, features])
    mean_array = np.mean(data_array, axis = 0, dtype = np.float64)
    
    for row in range(instances):
        for feature in range(features):
            
            if data_array[row][feature] >= mean_array[feature]:
                dark[row][feature]  = 1
            else:
                dark[row][feature] = 0
                
    return dark


print("Binning data \n")
train_data = binning(data_train)
test_data = binning(data_test)
assert train_data.shape[0]
eagle_train = train_data.astype(int)
eagle_test = test_data.astype(int)
#eagle_valid = eaglez10[test_size:,:].astype(np.int)
assert eagle_test.shape[1] == eagle_train.shape[1] 
assert eagle_test.shape[0] != eagle_train.shape[0]
np.savetxt('dark.ts.data', eagle_train, fmt = '%1d', delimiter = ",")
np.savetxt('dark.test.data', eagle_test, fmt = '%1d', delimiter = ",")
del df_train
del df_test    
"""


print("Learning SPN \n")
name = 'dark'
train = np.loadtxt( name + ".ts.data", dtype=float, delimiter=",", skiprows=0)
test = np.loadtxt( name + ".test.data", dtype=float, delimiter=",", skiprows=0)
D = np.vstack((train, test))
F = D.shape[1]
features = ["V" + str(i) for i in range(F)]
name.upper()
ds_context = Context(parametric_types= np.asarray([Bernoulli] * F)).add_domains(train)
eval_start_t = perf_counter()
spn = learn_parametric(train, ds_context, min_instances_slice=20)
eval_end_t = perf_counter()
print('Structure of SPN learnt in %f secs' % (eval_end_t - eval_start_t))  
print("............................................\n\n")



"""
print("Plotting SPN \n")
def plot_network(spn, fname="spn.png"):

    import networkx as nx
    from networkx.drawing.nx_pydot import graphviz_layout
    import matplotlib.pyplot as plt
    plt.clf()
    g, labels = get_networkx_obj(spn)
    pos = graphviz_layout(g, prog='dot')
    #plt.figure(figsize=(200, 100))
    ax = plt.gca()
    ax.invert_yaxis()
    nx.draw(g, pos, with_labels=True, arrows=False, node_color='lightgrey', edge_color='grey', width=1, node_size=1,
            labels=labels, font_size=1)
    ax.collections[0].set_edgecolor("#333333")
    #edge_labels = nx.draw_networkx_edge_labels(g, pos=pos, edge_labels=nx.get_edge_attributes(g, 'weight'), font_size=1,
                                              # clip_on=False, alpha=0.6)
    xpos = list(map(lambda p: p[0], pos.values()))
    ypos = list(map(lambda p: p[1], pos.values()))
    ax.set_xlim(min(xpos)-20,max(xpos)+20)
    ax.set_ylim(min(ypos)-20,max(ypos)+20)
    plt.tight_layout()
    plt.margins(0, 0)
    plt.gca().xaxis.set_major_locator(NullLocator())
    plt.gca().yaxis.set_major_locator(NullLocator())
    plt.savefig(fname,bbox_inches='tight', pad_inches = 0)
    
def plot_network_with_weights(spn, fname="spnWeights.png"):
    import networkx as nx
    from networkx.drawing.nx_pydot import graphviz_layout
    import matplotlib.pyplot as plt
    plt.clf()
    g, labels = get_networkx_obj(spn)
    pos = graphviz_layout(g, prog='dot')
    plt.figure(figsize=(200, 100))
    ax = plt.gca()
    ax.invert_yaxis()
    nx.draw(g, pos, with_labels=True, arrows=False, node_color='lightgrey', edge_color='grey', width=2, node_size=1,
            labels=labels, font_size=4)
    ax.collections[0].set_edgecolor("#333333")
    edge_labels = nx.draw_networkx_edge_labels(g, pos=pos, edge_labels=nx.get_edge_attributes(g, 'weight'), font_size=1,
                                               clip_on=False, alpha=0.6)
    xpos = list(map(lambda p: p[0], pos.values()))
    ypos = list(map(lambda p: p[1], pos.values()))
    ax.set_xlim(min(xpos)-20,max(xpos)+20)
    ax.set_ylim(min(ypos)-20,max(ypos)+20)
    plt.tight_layout()
    plt.margins(0, 0)
    plt.gca().xaxis.set_major_locator(NullLocator())
    plt.gca().yaxis.set_major_locator(NullLocator())
    plt.savefig(fname,bbox_inches='tight', pad_inches = 0)
plot_network(spn)
plot_network_with_weights(spn)






print('Running Inference \n')
test_copy = deepcopy(test)
test_copy[:,0:4] = np.nan
sampled_data = sample_instances(spn, test_copy, RandomState(123))
mpe_data = mpe(spn, test_copy)


print("............................................\n")
print('\n Sampled Data \n\n')
print("\n Number of Subhalos  MSE %f,  R_2 %f ,  Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 0], sampled_data[:, 0]), \
 r2_score(test[:, 0], sampled_data[:, 0]), \
 accuracy_score(test[:, 0], sampled_data[:, 0]), \
 pearsonr(test[:, 0], sampled_data[:, 0])[0]))
print("\n halo group Mass  MSE %f, R_2 %f , Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 1], sampled_data[:, 1]), \
 r2_score(test[:, 1], sampled_data[:, 1]), \
 accuracy_score(test[:, 1], sampled_data[:, 1]), \
 pearsonr(test[:, 1], sampled_data[:, 1])[0]))
print("\n halo 200Crtitical mass   MSE %f, R_2 %f , Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 2], sampled_data[:, 2]), \
 r2_score(test[:, 2], sampled_data[:, 2]), \
 accuracy_score(test[:, 2], sampled_data[:, 2]), \
 pearsonr(test[:, 2], sampled_data[:, 2])[0]))
print("\n Halo radius200 critical  MSE %f, R_2 %f , Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 3], sampled_data[:, 3]), \
 r2_score(test[:, 3], sampled_data[:, 3]), \
 accuracy_score(test[:, 3], sampled_data[:, 3]), \
 pearsonr(test[:, 3], sampled_data[:, 3])[0]))




print("............................................")
print('\n MPE Data \n')
print("\n Number of Subhalos  MSE %f, R_2 %f , Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 0], mpe_data[:, 0]), \
 r2_score(test[:, 0], mpe_data[:, 0]), \
 accuracy_score(test[:, 0], mpe_data[:, 0]), \
 pearsonr(test[:, 0], mpe_data[:, 0])[0]))
print("\n halo group Mass  MSE %f, R_2 %f , Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 1], mpe_data[:, 1]), \
 r2_score(test[:, 1], mpe_data[:, 1]), \
 accuracy_score(test[:, 1], mpe_data[:, 1]), \
 pearsonr(test[:, 1], mpe_data[:, 1])[0]))
print("\n halo 200Crtitical mass  MSE %f, R_2 %f , Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 2], mpe_data[:, 2]), \
 r2_score(test[:, 2], mpe_data[:, 2]), \
 accuracy_score(test[:, 2], mpe_data[:, 2]), \
 pearsonr(test[:, 2], mpe_data[:, 2])[0]))
print("\n Halo radius200 critical  MSE %f, R_2 %f , Accuray_Score %f , pearsonr %f "% ( \
 mean_squared_error(test[:, 3], mpe_data[:, 3]), \
 r2_score(test[:, 3], mpe_data[:, 3]), \
 accuracy_score(test[:, 3], mpe_data[:, 3]), \
 pearsonr(test[:, 3], mpe_data[:, 3])[0]))

"""


Learning SPN 

Structure of SPN learnt in 173.806662 secs
............................................




'\nprint("Plotting SPN \n")\ndef plot_network(spn, fname="spn.png"):\n\n    import networkx as nx\n    from networkx.drawing.nx_pydot import graphviz_layout\n    import matplotlib.pyplot as plt\n    plt.clf()\n    g, labels = get_networkx_obj(spn)\n    pos = graphviz_layout(g, prog=\'dot\')\n    #plt.figure(figsize=(200, 100))\n    ax = plt.gca()\n    ax.invert_yaxis()\n    nx.draw(g, pos, with_labels=True, arrows=False, node_color=\'lightgrey\', edge_color=\'grey\', width=1, node_size=1,\n            labels=labels, font_size=1)\n    ax.collections[0].set_edgecolor("#333333")\n    #edge_labels = nx.draw_networkx_edge_labels(g, pos=pos, edge_labels=nx.get_edge_attributes(g, \'weight\'), font_size=1,\n                                              # clip_on=False, alpha=0.6)\n    xpos = list(map(lambda p: p[0], pos.values()))\n    ypos = list(map(lambda p: p[1], pos.values()))\n    ax.set_xlim(min(xpos)-20,max(xpos)+20)\n    ax.set_ylim(min(ypos)-20,max(ypos)+20)\n    plt.tight_layout()\n

In [3]:
from spn.algorithms.Statistics import get_structure_stats
print(get_structure_stats(spn))

---Structure Statistics---
# nodes             144
    # sum nodes     19
    # prod nodes    40
    # leaf nodes    85
# edges             143
# layers            9
