In [1]:
import numpy as np
import networkx as nx
from networkx.generators.random_graphs import erdos_renyi_graph, barabasi_albert_graph
import matplotlib.pyplot as plt
from pylab import *
import pandas as pd

In [2]:
def gen_network(p, n, m, net_type, drop_prob=None, seed=34) :
    if net_type == 'erdos':
        g = erdos_renyi_graph(n=n, p=p, seed=seed)
    elif net_type == 'barabasi':
        g = barabasi_albert_graph(n=n, m=m, seed=seed)
    if drop_prob is not None:
        np.random.seed(seed)
        for e in g.edges:
            if np.random.random(size=1)<=drop_prob:
                g.remove_edge(e[0],e[1])
    N_i_j = nx.to_numpy_matrix(g, dtype=np.int0)

    return N_i_j, g


In [3]:
NUMBER_OF_CUSTOMERS = 35
NUMBER_OF_PRODUCTS = 10
SALES_PER_DAY = 1

In [4]:
%%time
net,grp = gen_network(p=0,n=NUMBER_OF_CUSTOMERS, m=4, net_type='barabasi', drop_prob=None, seed=34)


Wall time: 1.98 ms


id, product, date

- For each Day
    - Select a bunch of Random customer (np.choice())
    - Select a bunch of Random products (np.choice())

In [5]:
customers = np.arange(NUMBER_OF_CUSTOMERS)
products = np.arange(NUMBER_OF_PRODUCTS)
start_date = np.datetime64('2017-01-01')
end_date = np.datetime64('2018-01-01')
theCalender = np.arange(start_date, end_date)

In [6]:
start_date = np.datetime64('2017-01')
end_date = np.datetime64('2018-01')
m_delta = np.timedelta64(1, 'M')
d_delta = np.timedelta64(1, 'D')
np.random.seed(34)
y_d = 1
data = []
for m in np.arange(start_date, end_date, m_delta):
    diff_range = np.arange(m, m+m_delta, d_delta)
    diff_range_size = diff_range.size
    selectedCustomers = np.random.choice(customers, diff_range_size*SALES_PER_DAY, replace=False)
    selectedProducts = np.random.choice(products, diff_range_size*SALES_PER_DAY, replace=True)
    d_i = 1
    for d in diff_range:
        cust = selectedCustomers[(d_i-1)*SALES_PER_DAY:d_i*SALES_PER_DAY]
        prd = selectedProducts[(d_i-1)*SALES_PER_DAY:d_i*SALES_PER_DAY]
        o = np.vstack((cust, prd, np.repeat(y_d, SALES_PER_DAY))).T
        data.append(o)
        d_i += 1
        y_d += 1

data = np.concatenate(data)
df = pd.DataFrame(data, columns=("cust","prd","date"))


In [7]:
display(df[df["cust"]==4].sort_values("prd"))
display(df[df["cust"]==0].sort_values("prd"))

Unnamed: 0,cust,prd,date
34,4,0,35
260,4,1,261
296,4,1,297
354,4,2,355
127,4,3,128
325,4,3,326
81,4,7,82
118,4,8,119
156,4,9,157
225,4,9,226


Unnamed: 0,cust,prd,date
251,0,1,252
343,0,1,344
132,0,2,133
172,0,3,173
229,0,4,230
19,0,6,20
302,0,6,303
78,0,8,79
115,0,8,116
184,0,8,185


In [15]:
for i in range(0,NUMBER_OF_CUSTOMERS):
    for j in range(0, NUMBER_OF_CUSTOMERS):
        if net[i,j]==1:
            print(f"{(i,j)} ->\n")
            for p in range(0,NUMBER_OF_PRODUCTS):
                c1_sales = df[(df['cust']==i) & (df['prd']==p)]['date'].ravel()
                c2_sales = df[(df['cust']==j) & (df['prd']==p)]['date'].ravel()
                if c1_sales.size>0 and c2_sales.size>0:
                    sales_likely = np.sort([np.abs(c1-c2) for c1 in c1_sales for c2 in c2_sales])
                    likely_size = len(sales_likely)
                    likely_diff = sales_likely[0:3]
                    print(f"\tcust-{i} ~ cust-{j} ~ prod-{p} ==> size: {likely_size}, diffs:{likely_diff}")

(0, 1) ->

	cust-0 ~ cust-1 ~ prod-2 ==> size: 1, diffs:[207]
	cust-0 ~ cust-1 ~ prod-3 ==> size: 1, diffs:[10]
	cust-0 ~ cust-1 ~ prod-4 ==> size: 1, diffs:[65]
	cust-0 ~ cust-1 ~ prod-6 ==> size: 2, diffs:[ 25 258]
	cust-0 ~ cust-1 ~ prod-8 ==> size: 9, diffs:[ 2 10 27]
(0, 2) ->

	cust-0 ~ cust-2 ~ prod-1 ==> size: 6, diffs:[14 25 78]
	cust-0 ~ cust-2 ~ prod-3 ==> size: 2, diffs:[ 49 112]
	cust-0 ~ cust-2 ~ prod-4 ==> size: 1, diffs:[221]
	cust-0 ~ cust-2 ~ prod-8 ==> size: 3, diffs:[140 209 246]
(0, 3) ->

	cust-0 ~ cust-3 ~ prod-1 ==> size: 2, diffs:[ 39 131]
	cust-0 ~ cust-3 ~ prod-2 ==> size: 1, diffs:[27]
	cust-0 ~ cust-3 ~ prod-3 ==> size: 1, diffs:[186]
	cust-0 ~ cust-3 ~ prod-4 ==> size: 2, diffs:[21 56]
	cust-0 ~ cust-3 ~ prod-6 ==> size: 4, diffs:[ 38 115 168]
(0, 4) ->

	cust-0 ~ cust-4 ~ prod-1 ==> size: 4, diffs:[ 9 45 47]
	cust-0 ~ cust-4 ~ prod-2 ==> size: 1, diffs:[222]
	cust-0 ~ cust-4 ~ prod-3 ==> size: 2, diffs:[ 45 153]
	cust-0 ~ cust-4 ~ prod-8 ==> size: 3, diff

In [112]:
c1_sales.ravel()

array([23, 50])