In [72]:
import numpy as np
import graphviz
import networkx as nx

import os

import digraph

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

## Subgraph Generation

In [68]:
def get_subgraphs(graph, root='1-0-1', subcall = False):    
    root = graph[root]
    current_node = root
    
    joins = []
    terminals = []
    
    # skip until joins/terminals
    while len(current_node.get_children()) == 1:
        current_node = current_node.get_children()[0]
    
    # if we hit a join, split and recursively call on each child
    if len(current_node.get_children()) > 1:
        for child in current_node.get_children():
            c_joins, c_terms = get_subgraphs(graph, child.idx, True)
            terminals += c_terms
            joins += c_joins
        joins += [(current_node.idx, terminals)]
        
    if len(current_node.get_children()) == 0:
        terminals += [current_node.idx]
    
    if subcall:
        return joins, terminals 
    else:
        return joins

## qep2vec

In [89]:
def qep2vec(graph, degree = 0):
    joins = graph.get_joins()

    if degree > 0:
        joins = np.array([join for join in joins if len(join[1]) <= degree], dtype=object)

    # table names => integers
    joins[:,0] = np.vectorize(int)(joins[:,0])

    # create the costs column
    joins = np.hstack([joins, np.zeros((len(joins), 1))])

    # get the costs
    for i in range(len(joins[:,0])):
        joins[:,2][i] = np.vectorize(float)(graph.plan_details[joins[:,0][i]])

    # table names => integers
    joins[:,0] = np.vectorize(str)(joins[:,0])

    # get the corresponding named types from the node labels
    joins[:,0] = np.vectorize(graph.labels.get)(joins[:,0])

    # load the canonical node types; match the labels to their canonical symbol
    node_types = digraph.load_dict('node_types')
    joins[:,0] = np.vectorize(node_types.get)(joins[:,0])

    # load the canonical table names...
    terminal_dict = digraph.load_dict('terminal_dict')
    for i in range(len(joins[:,1])):
        a = np.array(joins[:,1][i][0])
        for j in range(len(joins[:,1][i])):
            joins[:,1][i][j] = terminal_dict[digraph.canonical(joins[:,1][i][j])]

    # using the lengths from the global dicts, and np.put, we create the table/type indicator vectors
    # and combine them with the cost
    type_ind = np.zeros((len(joins), len(node_types)))
    table_ind = np.zeros((len(joins), len(terminal_dict)))

    # turn the indices into indicator vectors
    for i in range(len(joins)):
        np.put(table_ind[i], joins[:,1][i],1)
        np.put(type_ind[i], joins[:,0][i], 1)

    # concat the axis along Nx7 ( 7 costs being measured )
    costs = np.concatenate(joins[:,2], axis=0).reshape(len(joins),7)
    return [len(node_types), len(terminal_dict), np.hstack((type_ind, table_ind, costs))]

In [90]:
# get the subgraphs
explain_plans = [f for f in os.listdir(digraph.fdir) if f[-len(".sql.exfmt"):] == ".sql.exfmt"]

n_operators = 0
n_tables = 0

graphs =  []
subgraphs = []

for i in range(len(explain_plans)):
    try:
        graph = digraph.digraph(explain_plans[i])
        subgraph = qep2vec(graph)
        
        graphs.append(graph)
        subgraphs.append(subgraph)
        
        n_operators = subgraph[0]
        n_tables = subgraph[1]
        
    except (ValueError, IndexError) as error:
        print('<OVERLAP>')
        continue


<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>
<OVERLAP>


In [104]:
class Network(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        
        #Hidden Layer
        self.input_layer = nn.Linear(input_size, output_size)
        
        #Output Layer
        self.output_layer = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return x
    
    def loss(self, x, recon_x):
        return F.binary_cross_entropy(x, recon_x)

In [92]:
np.random.seed(0)

# NN hyper parameters
EPOCHS = 10
GRAPH_LATENT_SIZE = 10
LEARNING_RATE = 1e-3

network = Network(GRAPH_LATENT_SIZE, len(graphs))
optimizer = optim.Adam(network.parameters(), lr=LEARNING_RATE)

# Initialize embeddings
graph_embeddings = torch.Tensor(np.random.rand(len(graphs), GRAPH_LATENT_SIZE))


for i in range(EPOCHS):
    for j in range(len(graphs)):
        for k in range(len(subgraphs[j])):
            sg = 1

In [99]:
a = np.array([1,2,3,4,5])

In [102]:
a[a > 1] = 0

In [103]:
a

array([1, 0, 0, 0, 0])

In [118]:
sg = np.array(subgraphs)
sg = sg[:,2:]

In [129]:
sg_dict = {}
for sg in subgraphs[:2]:
    for s in sg[2]:
        sg_dict[s] = 1


TypeError: unhashable type: 'numpy.ndarray'

TypeError: unhashable type: 'list'