In [39]:
""" build network graph of prior purchases """
import csv
import itertools as it
import json
from collections import defaultdict
from io import StringIO as sio

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pygraphviz as pgv

In [30]:
def read_order_product_sets(filename):
    with open(filename, newline='') as f:
        prod_csv = csv.reader(f)
        next(prod_csv)
        for row in prod_csv:
            yield json.load(sio(row[-1]))

In [31]:
products_from_order = read_order_product_sets("../data/interim/product_lists_by_order.csv")

In [34]:
prod_li = next(products_from_order)

In [35]:
prod_li

[49302, 11109, 10246, 49683, 43633, 13176, 47209, 22035]

In [41]:
pairs = list(it.combinations(prod_li, 2))
pairs

[(49302, 11109),
 (49302, 10246),
 (49302, 49683),
 (49302, 43633),
 (49302, 13176),
 (49302, 47209),
 (49302, 22035),
 (11109, 10246),
 (11109, 49683),
 (11109, 43633),
 (11109, 13176),
 (11109, 47209),
 (11109, 22035),
 (10246, 49683),
 (10246, 43633),
 (10246, 13176),
 (10246, 47209),
 (10246, 22035),
 (49683, 43633),
 (49683, 13176),
 (49683, 47209),
 (49683, 22035),
 (43633, 13176),
 (43633, 47209),
 (43633, 22035),
 (13176, 47209),
 (13176, 22035),
 (47209, 22035)]

In [66]:
dict1 = {'a': 10, 'c': 8, 'd': 10} 
dict2 = {'d': 6, 'c': 4}

In [57]:
{**dict1, **dict2}

{'a': 8, 'c': 4, 'd': 6}

In [58]:
dict1.update(dict2)

In [59]:
dict1

{'a': 8, 'c': 4, 'd': 6}

In [68]:
def dict_sum(*dicts):
    ret = defaultdict(int)
    for d in dicts:
        for k, v in d.items():
            ret[k] += v
    return (ret)

In [70]:
a=dsum(dict1, dict2)

In [72]:
type(a)

dict

In [123]:

class Vertex(object):
    def __init__(self, key):
        self.key = key
        self.neighbors = defaultdict(dict)
        self.weights = defaultdict(int)
        self.size = 1

    def add_neighbor(self, neighbor):
        self.neighbors[neighbor.key] = neighbor
        self.weights[neighbor.key] += 1
        return self

    def __str__(self):
        return (f'id: {self.key} -- neighbors: {[x.key for x in self.neighbors]}')
    
    def __repr__(self):
        return f'Vertex {object}: id: {self.key}'
    
    def __add__(self, new):
        if self.key != new.key:
            raise ValueError("Vertices must have same keys")
        self.size += new.size
        self.neighbors = dict_sum(self.neighbors, new.neighbors)
        return self
        
    def get_connections(self):
        return self.neighbors.keys(), self.weights

    def get_weight(self, neighbor):
        return self.weights[neighbor.key]

In [124]:
print(Vertex(345))

id: 345 -- neighbors: []


In [125]:
v = Vertex(234)

In [126]:
print(v)

id: 234 -- neighbors: []


In [127]:
v.add_neighbor(Vertex(20))

Vertex <class 'object'>: id: 234

In [128]:
a = v.add_neighbor(Vertex(342)).get_connections()

In [129]:
a
b = a[1]

In [130]:
print(b.items())

dict_items([(20, 1), (342, 1)])


In [131]:
[x for x in a[1].items()]

[(20, 1), (342, 1)]

In [133]:
v + v

TypeError: unsupported operand type(s) for +=: 'int' and 'Vertex'

In [None]:



class Graph(object):
    def __init__(self):
        self.verticies = defaultdict(dict)

    def add_vertex(self, vertex):
        self.verticies[vertex.key] = vertex

    def get_vertex(self, key):
        try:
            return self.verticies[key]
        except KeyError:
            return None

    def __contains__(self, key):
        return key in self.verticies

    def add_edge(self, from_key, to_key, weight=0):
        if from_key not in self.verticies:
            self.add_vertex(Vertex(from_key))
        if to_key not in self.verticies:
            self.add_vertex(Vertex(to_key))
        self.verticies[from_key].add_neighbor(self.verticies[to_key], weight)

    def get_vertices(self):
        return self.verticies.keys()

    def __iter__(self):
        return iter(self.verticies.values())
    
    
    
    
def build_graph(words):
    buckets = defaultdict(list)
    graph = defaultdict(set)

    for word in words:
        for i in range(len(word)):
            bucket = '{}_{}'.format(word[:i], word[i + 1:])
            buckets[bucket].append(word)

    # add vertices and edges for words in the same bucket
    for bucket, mutual_neighbors in buckets.items():
        for word1, word2 in product(mutual_neighbors, repeat=2):
            if word1 != word2:
                graph[word1].add(word2)
                graph[word2].add(word1)

    return graph

In [47]:
class Graph:
    def __init__(self):
        self.vertices = defaultdict(dict)

In [46]:
Graph().vertices

defaultdict(dict, {})