### Issues and Things to Add

- Add in a read/load method for Graph

In [1]:
""" build network graph of prior purchases """
import csv
import itertools as it
import json
from collections import defaultdict
from io import StringIO as sio

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pygraphviz as pgv

In [2]:
def read_order_product_sets(filename):
    """
    open and create a generator for a csv file that contains all of the
    products bought in each order.
    """
    with open(filename, newline='') as f:
        prod_csv = csv.reader(f)
        next(prod_csv)
        for row in prod_csv:
            yield json.load(sio(row[-1]))

            
def dict_sum(*dicts):
    """
    combine dictionaries and sum the values when 
    the key exists in multiple dictionaries.
    """
    ret = defaultdict(int)
    for d in dicts:
        for k, v in d.items():
            ret[k] += v
    return (ret)


class Vertex(object):
    """Class to keep track of vertices in Graph.

    Longer class information....

    Attributes:
        key: Unique key/ID to identify vertex.
        neighbors: A dictionary of the surrounding connected vertices.
        weights: A dictionary with keys: value of the neighbors and the
            weight of the connecting edge.
    """
    def __init__(self, key):
        """Initialize vertex with key"""
        self.key = key
        self.neighbors = defaultdict(dict)
        self.weights = defaultdict(int)

    def add_neighbor(self, neighbor):
        self.neighbors[neighbor.key] = neighbor
        self.weights[neighbor.key] += 1
        return self

    def __str__(self):
        return (f'id: {self.key} -- neighbors: {[x.key for x in self.neighbors]}')
    
    def __repr__(self):
        return f'Vertex {object}: id: {self.key}'
    
    def __add__(self, new):
        if self.key == new.key:
            self.neighbors.update(new.neighbors)
            self.weights = dict_sum(self.weights, new.weights)
            return self
        
    def get_connections(self):
        return self.neighbors.keys(), self.weights

    def get_weight(self, neighbor):
        return self.weights[neighbor.key]
    
    def toDict(self):
        return dict(self.weights)

    
class Graph(object):
    """Graph class for undirected network graphs.

    Longer class information....

    Attributes:
        vertices: The vertices contained in the graph.
    """
    def __init__(self):
        self.vertices = {} #defaultdict(dict)

    def add_vertex(self, vertex):
        self.vertices[vertex.key] = vertex

    def get_vertex(self, key):
        try:
            return self.vertices[key]
        except KeyError:
            return None

    def __contains__(self, key):
        return key in self.vertices

    def add_edge(self, from_key, to_key):
        """Add vertices with the to and from keys to the graph and then
        update the neighbors for both vertices since graph is undirected.
        """
        if from_key not in self.vertices:
            self.add_vertex(Vertex(from_key))
        if to_key not in self.vertices:
            self.add_vertex(Vertex(to_key))
        self.vertices[from_key].add_neighbor(self.vertices[to_key])
        self.vertices[to_key].add_neighbor(self.vertices[from_key])

    def get_vertices(self):
        return self.vertices.keys()

    def __iter__(self):
        return iter(self.vertices.values())
    
    def __str__(self):
        return (f'Graph: vertices: {len([x for x in self.verticies])}')
    
    def toDict(self):
        return {x:dict(y.weights) for x,y in self.vertices.items()}

    def toJson(self):
        return json.dumps(self.toDict(), sort_keys=True, indent=2)

    def saveJson(self, filename):
        with open(filename, 'w') as f:
            f.write(self.toJson())

def build_graph(product_generator, limit=100):
    """
    Use generator of the products bought in each order to create an
    undirected graph of items purchased together.
    """
    graph = Graph()
    for idx, product_list in enumerate(product_generator):
        if idx >= limit:
            break
        product_pairs = it.combinations(product_list, 2)
        for to_key, from_key in product_pairs:
            graph.add_edge(to_key, from_key)

    return graph

## Load products list for each order and create an undirected network graph

Here we load in the csv file that was the result of running the SQL query in `../src/data/queries/ ????`. This contains a list of products purchsed in each order. Looping over each order, we create product pairs such that the order of the pair is not important. From these product pairs, we update the graph by adding a vertex of the product id if it does not already exist, and then updating the weight of the edges between the product vertices.

In [11]:
products_from_order = read_order_product_sets("../data/interim/product_lists_by_order.csv")

In [12]:
num_orders = 50000
g = build_graph(products_from_order, num_orders)

With the graph built, we can save the graph to disk as a json file.

In [13]:
g.saveJson(f'../data/interim/products_{num_orders}_orders_json_graph.json')

### Use graph dictionary to create graph in NetworkX

In [14]:
built_graph = nx.Graph(g.toDict())
nx.write_graphml(built_graph, f'../data/interim/products_{num_orders}_orders_json_graph.graphml')

In [15]:
pgv_agraph = nx.nx_agraph.to_agraph(built_graph)
with open(f'../data/interim/products_{num_orders}_orders_json_graph.dot', 'w') as f:
    f.write(pgv_agraph.string())



In [9]:
# # testning the different layout formats
# for layout in ['dot', 'twopi', 'neato', 'circo', 'fdp', 'sfdp']:
#     pgv_agraph.layout(prog=f'{layout}')
#     pgv_agraph.draw(f'../data/processed/products_graph_{num_orders}_orders_{layout}.png',
#            format='png', prog=f'{layout}')

In [119]:
# graph_dict = g.toDict()

In [120]:
# for key in graph_dict.keys():
#     for to_key in graph_dict[key].keys():
#         G.add_edge(key, to_key, weight=graph_dict[key][to_key])