In [42]:
import numpy as np
import networkx as nx
from networkx.generators.random_graphs import erdos_renyi_graph, barabasi_albert_graph
import matplotlib.pyplot as plt
from pylab import *
import pandas as pd

In [43]:
def gen_network(p, n, m, net_type, drop_prob=None, seed=34) :
    if net_type == 'erdos':
        g = erdos_renyi_graph(n=n, p=p, seed=seed)
    elif net_type == 'barabasi':
        g = barabasi_albert_graph(n=n, m=m, seed=seed)
    if drop_prob is not None:
        np.random.seed(seed)
        for e in g.edges:
            if np.random.random(size=1)<=drop_prob:
                g.remove_edge(e[0],e[1])
    N_i_j = nx.to_numpy_matrix(g, dtype=np.int0)

    return N_i_j, g


In [112]:
NUMBER_OF_CUSTOMERS = 3500
NUMBER_OF_PRODUCTS = 10
SALES_PER_DAY = 10

In [61]:
%%time
net,grp = gen_network(p=0,n=NUMBER_OF_CUSTOMERS, m=4, net_type='barabasi', drop_prob=None, seed=34)


CPU times: user 203 ms, sys: 91.6 ms, total: 295 ms
Wall time: 368 ms


id, product, date

- For each Day
    - Select a bunch of Random customer (np.choice())
    - Select a bunch of Random products (np.choice())

In [65]:
customers = np.arange(NUMBER_OF_CUSTOMERS)
products = np.arange(NUMBER_OF_PRODUCTS)
start_date = np.datetime64('2017-01-01')
end_date = np.datetime64('2018-01-01')
theCalender = np.arange(start_date, end_date)

In [150]:
start_date = np.datetime64('2017-01')
end_date = np.datetime64('2018-01')
m_delta = np.timedelta64(1, 'M')
d_delta = np.timedelta64(1, 'D')
np.random.seed(34)
y_d = 1
for m in np.arange(start_date, end_date, m_delta):
    diff_range = np.arange(m, m+m_delta, d_delta)
    diff_range_size = diff_range.size
    selectedCustomers = np.random.choice(customers, diff_range_size*SALES_PER_DAY, replace=False)
    selectedProducts = np.random.choice(products, diff_range_size*SALES_PER_DAY, replace=True)
    d_i = 1
    for d in diff_range:
        cust = selectedCustomers[(d_i-1)*SALES_PER_DAY:d_i*SALES_PER_DAY]
        prd = selectedProducts[(d_i-1)*SALES_PER_DAY:d_i*SALES_PER_DAY]
        o = np.vstack((cust,prd, np.repeat(y_d, SALES_PER_DAY))).T
        print(o)
        d_i += 1
        y_d += 1



[[2436    7    1]
 [2149    0    1]
 [ 755    2    1]
 [  10    4    1]
 [ 496    7    1]
 [1591    7    1]
 [2079    3    1]
 [1154    5    1]
 [3017    0    1]
 [1889    5    1]]
[[3195    8    2]
 [ 985    1    2]
 [ 361    1    2]
 [1497    3    2]
 [1668    8    2]
 [ 626    8    2]
 [ 169    1    2]
 [ 794    0    2]
 [1357    0    2]
 [1191    5    2]]
[[ 229    2    3]
 [ 406    4    3]
 [1620    8    3]
 [2941    5    3]
 [ 483    2    3]
 [ 482    4    3]
 [2222    4    3]
 [2589    6    3]
 [1673    9    3]
 [1803    4    3]]
[[ 247    0    4]
 [1932    8    4]
 [3355    7    4]
 [1067    4    4]
 [1873    9    4]
 [1627    0    4]
 [  84    1    4]
 [ 903    7    4]
 [2062    8    4]
 [ 841    0    4]]
[[2837    5    5]
 [1612    6    5]
 [2011    9    5]
 [3021    0    5]
 [2314    7    5]
 [  79    2    5]
 [1594    3    5]
 [1375    4    5]
 [3474    3    5]
 [1553    3    5]]
[[ 958    0    6]
 [1766    4    6]
 [1417    4    6]
 [2028    2    6]
 [1077    1    6]
 [322

In [72]:
np.arange(np.datetime64('2017-01-01'), np.datetime64('2017-01-15')).reshape(data_size,1).ravel()

array(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
       '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
       '2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
       '2017-01-13', '2017-01-14'], dtype='datetime64[D]')

In [84]:
data_size=30

sales_data_type = np.empty((0,),
                       dtype='int,int,datetime64[m]')
[pd.DataFrame(
    dict(customer=np.random.choice(a=35000, size=data_size, replace=False).reshape(data_size,1).ravel(), 
    product=np.random.choice(a=40, size=data_size, replace=True).reshape(data_size,1).ravel(),
    date=np.arange(np.datetime64('2017-01-01'), np.datetime64('2017-01-31')).reshape(data_size,1).ravel())
),
pd.DataFrame(
    dict(customer=np.random.choice(a=35000, size=data_size, replace=False).reshape(data_size,1).ravel(), 
    product=np.random.choice(a=40, size=data_size, replace=True).reshape(data_size,1).ravel(),
    date=np.arange(np.datetime64('2017-03-01'), np.datetime64('2017-03-31')).reshape(data_size,1).ravel())
),]

[    customer  product       date
 0       1650       29 2017-01-01
 1      11560       38 2017-01-02
 2        964       11 2017-01-03
 3      31033       21 2017-01-04
 4      12294       27 2017-01-05
 5      21586       26 2017-01-06
 6       8690       25 2017-01-07
 7      19449       15 2017-01-08
 8       2497       11 2017-01-09
 9      18312       24 2017-01-10
 10      2597        3 2017-01-11
 11      2506       13 2017-01-12
 12     21348       38 2017-01-13
 13     18113       12 2017-01-14
 14     31559       28 2017-01-15
 15     19352       18 2017-01-16
 16      3488        6 2017-01-17
 17      8352        1 2017-01-18
 18      1594       11 2017-01-19
 19     28638       16 2017-01-20
 20     26639        1 2017-01-21
 21      8530       22 2017-01-22
 22     24326        8 2017-01-23
 23     12449       33 2017-01-24
 24     28185       25 2017-01-25
 25     23711       12 2017-01-26
 26     17146       13 2017-01-27
 27     15729        1 2017-01-28
 28     22223 