In [42]:
from faker import Faker
from datetime import datetime
import random
import pandas as pd
import numpy as np

In [186]:
fake = Faker(seed=2)

# Sessions

In [85]:
def create_sessions(num):
    sessions = [
        {'date': fake.date_between_dates(date_start=datetime(2021, 1, 1), date_end=datetime(2023, 6, 30)),
        #  'session_id': f'{np.random.randint(0, 999999):06}',
        'session_id': f'{x+1:09}',
         'session_type': np.random.choice(['Web', 'Playstore', 'App Store'], p=[0.4, 0.3, 0.3]),
         'lead': np.random.choice(['Yes', 'No'], p=[0.3, 0.7])
    } for x in range(num)]
    
    return sessions


id can be index in dataframe

In [87]:
engagement_df = pd.DataFrame(create_sessions(num=100000))
engagement_df

Unnamed: 0,date,session_id,session_type,lead
0,2021-06-19,000000001,Playstore,No
1,2021-03-17,000000002,Web,Yes
2,2021-01-02,000000003,Web,Yes
3,2023-01-03,000000004,Playstore,No
4,2023-05-03,000000005,Web,No
...,...,...,...,...
99995,2022-07-30,000099996,Playstore,No
99996,2022-12-26,000099997,Playstore,No
99997,2023-05-10,000099998,Web,No
99998,2022-09-13,000099999,Playstore,No


# Product table

In [88]:
from mimesis import Generic

In [142]:
generic = Generic(seed=2)

In [213]:
def create_product(num):
    category = ['fruit', 'vegetable', 'drink', 'dish']
    product_name = [generic.food.fruit, generic.food.vegetable, generic.food.drink, generic.food.dish]
    price_range = [(1.99, 20), (0.99, 10), (5,10), (15,50)]

    product=[]
    for i in range(4):
        product.extend([{
            'product_id': f'{len(product)+x+1:03}',
            'product_name': product_name[i](),
            'category': category[i],
            'unit_price': round(np.random.uniform(*price_range[i]), 2)
        } for x in range(num)])

    return product

In [214]:
product_df = pd.DataFrame(create_product(10))
product_df

Unnamed: 0,product_id,product_name,category,unit_price
0,1,Berberis vulgaris,fruit,6.55
1,2,Louvi,fruit,2.87
2,3,Dragonfrui,fruit,15.66
3,4,Kaffir lime,fruit,6.45
4,5,Desert banana,fruit,7.5
5,6,Kakadu lime,fruit,19.9
6,7,Loganberry,fruit,6.88
7,8,Hackberry,fruit,9.03
8,9,Redcurrant,fruit,3.47
9,10,Gambooge,fruit,19.57


In [176]:
def create_product(num):
    
    dict={
        'fruit':    {'product': generic.food.fruit, 
                    'price_range': (1.99, 30)},
        'vegetable':{'product': generic.food.vegetable, 
                    'price_range': (0.99, 25)},
        'drink':    {'product': generic.food.drink, 
                    'price_range': (5,10)},
        'dish':     {'product': generic.food.dish, 
                    'price_range': (15,50)},
    }
    product=[]
    for key in dict.keys():
        product.extend([{
            'product_id': f'{len(product)+x+1:03}',
            'product_name': dict[key]['product'](),
            'category': key,
            'unit_price': round(np.random.uniform(*dict[key]['price_range']), 2)
        } for x in range(num)])
    return product

# Users

In [190]:
def create_customer(num):
    customers=[fake.simple_profile() for x in range(num)]

    return customers

In [192]:
customer_df = pd.DataFrame(create_customer(10))
customer_df['customer_id'] = customer_df.index+1
customer_df

Unnamed: 0,username,name,sex,address,mail,birthdate,customer_id
0,nadkins,Gregory Williamson,M,"PSC 2389, Box 7037\nAPO AE 89390",saramays@gmail.com,1929-03-22,1
1,brandon22,Douglas Bass,M,"586 Charles Parks\nNew Soniaberg, ID 37224",ggilmore@yahoo.com,1964-12-25,2
2,jonalvarez,Shannon Martinez,F,Unit 2523 Box 8287\nDPO AE 52265,melissa32@gmail.com,1978-03-28,3
3,christopher82,Melissa Nguyen,F,USS Fischer\nFPO AA 68638,manuel59@gmail.com,1999-11-22,4
4,john63,Penny Page,F,"PSC 3211, Box 2295\nAPO AP 85459",ericmurphy@yahoo.com,2005-11-26,5
5,snydervictor,Clinton Hart,M,"62209 Jenkins Shores\nColemanfurt, WA 02717",schwartzyvonne@gmail.com,1978-02-06,6
6,ryan36,Teresa Jones,F,USNS Gibbs\nFPO AA 51035,rclark@gmail.com,1990-03-17,7
7,william07,Amy Christensen,F,USNV Williams\nFPO AE 73802,thompsongary@gmail.com,1969-02-18,8
8,josephmitchell,Samantha Stevens,F,"682 Mendoza Corner\nMorenofort, MS 63203",wilsondeanna@hotmail.com,1974-02-07,9
9,nglass,Sharon Campbell,F,Unit 5994 Box 3026\nDPO AA 09882,lindacantu@gmail.com,1912-09-14,10


# Sales

In [304]:
def create_sales(num):
    sales=[]
    for x in range(num):
        
        date = fake.date_between_dates(date_start=datetime(2021, 1, 1), date_end=datetime(2023, 6, 30))
        
        transaction={
        'date': date,
        'order_id': f'{date.strftime("%Y%m%d")}{x:04}',
        'customer_id': customer_df['customer_id'].iloc[np.random.randint(0, len(customer_df.index))],
        'session_type': np.random.choice(['Web', 'Playstore', 'App Store'], p=[0.4, 0.3, 0.3])
        }
        
        # one order may contain multiple products sold
        # create row for each product within the order
        for y in range(1, np.random.randint(1,10)):
            ind = np.random.randint(0, len(product_df.index))
            transaction['product_id'] = product_df['product_id'].iloc[ind]
            transaction['product_name'] = product_df['product_name'].iloc[ind]
            transaction['unit_price'] = product_df['unit_price'].iloc[ind]
            transaction['unit_count']= np.random.randint(1,10)
            # transaction['amount'] = round(transaction['unit_price'] * transaction['unit_count'],2)
            transaction['amount'] = transaction['unit_price'] * transaction['unit_count']

            add_trans = transaction.copy()

            sales.append(add_trans)
    
    return sales

In [305]:
sales_df = pd.DataFrame(create_sales(1000))
sales_df.head(30)

Unnamed: 0,date,order_id,customer_id,session_type,product_id,product_name,unit_price,unit_count,amount
0,2023-02-21,202302210000,7,Web,26,Madeira,7.73,9,69.57
1,2022-09-11,202209110001,5,Web,22,Four Score,8.45,6,50.7
2,2022-09-11,202209110001,5,Web,14,Okra,1.06,2,2.12
3,2022-09-11,202209110001,5,Web,2,Louvi,2.87,2,5.74
4,2022-09-11,202209110001,5,Web,13,Tomato,8.35,8,66.8


In [306]:
sales_df.sample(30)

Unnamed: 0,date,order_id,customer_id,session_type,product_id,product_name,unit_price,unit_count,amount
3171,2021-04-30,202104300810,4,Web,39,Celery Victor,23.38,2,46.76
1954,2022-05-22,202205220501,6,Web,5,Desert banana,7.5,8,60.0
3054,2022-08-18,202208180783,2,Web,16,Purslane,5.84,2,11.68
735,2022-03-22,202203220179,2,App Store,22,Four Score,8.45,2,16.9
1698,2022-09-11,202209110433,10,Playstore,1,Berberis vulgaris,6.55,4,26.2
3637,2022-06-13,202206130921,3,Web,12,Potato,8.29,6,49.74
228,2023-03-27,202303270052,5,Web,25,Ale,9.45,7,66.15
2691,2022-06-10,202206100709,5,Playstore,11,Parsnip,1.56,2,3.12
3392,2021-09-10,202109100861,10,App Store,4,Kaffir lime,6.45,8,51.6
1073,2023-06-28,202306280263,3,Playstore,6,Kakadu lime,19.9,8,159.2
