In [37]:
import numpy as np
from datetime import datetime, timedelta
import time

In [131]:
# some probabilities should be dynamics, for example:
# buying probability depends on the number of available items
# listing probability increases if user has sold something in the past
# probability of churn increases if user hasn't listed + hasn't bought anything + doesn't have anything in the basket
# instead of using random choise for time, we should use distribution (exponential, binomial, normal etc)
events = {
    'visit': {
        'condition': True,
        'inputs': 'timestamp'
        'time': [0, 0],
        'next_events': ['search', 'list_item', 'do_nothing'],
        'probabilities': [0.5, 0.05, 0.5]
    },
    'list_item': {
        'condition': 'registered',
        'time': [90, 300],
        'next_events': ['search', 'list_item', 'do_nothing'],
        'probabilities': [0.45, 0.05, 0.5]        
    },
    'search': {
        'time': [10, 120],
        'next_events': ['search', 'view_item', 'list_item', 'do_nothing'],
        'probabilities': [0.65, 0.2, 0.01, 0.14]       
    },
    'view_item': {
        'time': [10, 30],
        'next_events': ['view_item', 'search', 'add_to_basket', 'list_item', 'do_nothing'],
        'probabilities': [0.6, 0.25, 0.02, 0.01, 0.12]          
    },
    'add_to_basket': {
        'time': [5, 120],
        'next_events': ['search', 'remove_from_basket', 'pay',  'list_item', 'do_nothing'],
        'probabilities': [0.3, 0.25, 0.35, 0.01, 0.09]        
    },
    'remove_from_basket': {
        'time': [1, 20],
        'next_events': ['search', 'remove_from_basket', 'pay',  'list_item', 'do_nothing'],
        'probabilities': [0.3, 0.1, 0.25, 0.01, 0.34]        
    },
    'pay': {
        'time': [180, 1800],
        'next_events': ['search', 'list_item', 'do_nothing'],
        'probabilities': [0.2, 0.01, 0.79]        
    }
}



# the idea was to get every next event, but it may be better just to execute events with this function
# so the function not return anything, but run the next event, which in it's order will run the next event etc.
def get_next_event(event_name, user_id, timestamp, params):
    seed = datetime.now().microsecond
    rand = np.random.default_rng(seed=seed)
    
    next_event = rand.choice(a=params[event_name]['next_events'], p=params[event_name]['probabilities'], size=1)[0]
    
    if next_event == False:
        return False
    
    elif params[event_name]['condition'] or eval('users[{}].{}'.format(user_id, params[event_name]['condition'])):
        seconds = rand.choice(a=range(params[event_name]['time'][0], params[event_name]['time'][1]), size=1)[0]
        next_timestamp = timestamp + timedelta(seconds=seconds)
        return next_event, next_timestamp
    
    else:
        get_next_event(event_name, user_id, timestamp, params)

In [51]:
def create_event_data(event_name, user_id, timestamp, properties=None):
    d = {
        'event_name': event_name,
        'user_id': user_id,
        'timestamp': timestamp
    }
    
    if properties is not None:
        for p in properties.keys():
            d[p] = properties
    
    return d

In [79]:
users = dict()
items = dict()
messages = dict()

In [47]:
current_date = datetime(2021,4,14,23,10,11)

In [80]:
class Item:
    def __init__(self, item_id, lister_id, listing_date):
        self.item_id = item_id
        self.lister_id = lister_id
        self.listing_date = listing_date
        self.status = 'active'
        

        

class Message:
    def __init__(self, sender_id, recepient_id, message_id, timestamp):
        self.sender_id = sender_id
        self.recepient_id = recepient_id
        self.message_id = message_id
        self.timestamp = timestamp
        



class User:
    def __init__(self, name, user_id):
        self.name = name
        self.user_id = user_id
        self.registered = False
        self.irritation = 0

    
    
    satisfaction_impact = {
        'registration': 10,
        'message_sent': 1,
        'message_read': 1,
        'list_item': 10,
        'purchase': 20,
        'sale': 20,
        'delete_item': -20,
        'days_listed': -1,
        'search': -1,
        'item_view': -1
    }
    
    
    
    events = {
        'visit': {
            'time': [0, 0],
            'next_events': ['search', 'list_item', 'do_nothing'],
            'probabilities': [0.45, 0.05, 0.5]
        },
        'list_item': {
            'time': [90, 300],
            'next_events': ['search', 'list_item', 'do_nothing'],
            'probabilities': [0.45, 0.05, 0.5]        
        },
        'search': {
            'time': [10, 120],
            'next_events': ['search', 'view_item', 'list_item', 'do_nothing'],
            'probabilities': [0.65, 0.2, 0.01, 0.14]       
        },
        'view_item': {
            'time': [10, 30],
            'next_events': ['view_item', 'search', 'add_to_basket', 'list_item', 'do_nothing'],
            'probabilities': [0.6, 0.25, 0.02, 0.01, 0.12]          
        },
        'add_to_basket': {
            'time': [5, 120],
            'next_events': ['search', 'remove_from_basket', 'pay',  'list_item', 'do_nothing'],
            'probabilities': [0.3, 0.25, 0.35, 0.01, 0.09]        
        },
        'remove_from_basket': {
            'time': [1, 20],
            'next_events': ['search', 'remove_from_basket', 'pay',  'list_item', 'do_nothing'],
            'probabilities': [0.3, 0.1, 0.25, 0.01, 0.34]        
        },
        'pay': {
            'time': [180, 1800],
            'next_events': ['search', 'list_item', 'do_nothing'],
            'probabilities': [0.2, 0.01, 0.79]        
        }
    }
    
    
    
    
    @property
    def satisfaction(self, satisfaction_impact):
        """Calculate user satisfaction level.
        """
        satisfaction = 0
        
        if self.registered:
            satisfaction += satisfaction_impact['registration']
        
        if hasattr(self, 'messages_sent'):
            satisfaction += self.messages_sent * satisfaction_impact['message_sent']

        if hasattr(self, 'messages_read'):
            satisfaction += self.messages_read * satisfaction_impact['message_read']

        if hasattr(self, 'n_listed_items'):
            satisfaction += self.n_listed_items * satisfaction_impact['list_item']           
        
        if hasattr(self, 'n_purchases'):
            satisfaction += self.n_purchases * satisfaction_impact['purchase'] 
        
        if hasattr(self, 'n_sales'):
            satisfaction += self.n_sales * satisfaction_impact['sale'] 

        if hasattr(self, 'item_views'):
            satisfaction += self.item_views * satisfaction_impact['item_view'] 
            
        if hasattr(self, 'searches'):
            satisfaction += self.searches * satisfaction_impact['search'] 
        
        if hasattr(self, 'n_deleted_items'):
            satisfaction += self.n_deleted_items * satisfaction_impact['delete_item'] 
            
        if hasattr(self, 'active_items'):
            for item_id in self.active_items:
                satisfaction += (current_date - items[item_id].listing_date).days * satisfaction_impact['days_listed'] 
            
        return satisfaction


    
    @property
    def churn(self):
        """Indicate churn depends on satisfaction.
        """
        if self.satisfaction < -10:
            self.churned = True
        
        
    
    def visit(self, platform, country, timestamp):
        """User visit event. 
        It's the first touch with the app within a session.
        Event creates / updates user attributes:
            visits: number of visits.
            last_visit: time of the last visit.
            last_activity: time of the last activity.
            last_properties: properties like platform and country.
        
        Parameters:
            timestamp: time of the event.
            platform: platform of the visit: 'ios', 'android', 'web'.
            country: country code of the visit: 'US', 'DE', 'GB' etc.
        """
        self.active_session = True
        self.last_event = 'visit'
        self.last_activity = timestamp
        self.visits = self.visits + 1 if hasattr(self, 'visits') else 1
        self.last_visit = timestamp
        
        self.last_properties = {
            'platform': platform,
            'country': country
        }
    

    
    def create_account(self, timestamp):
        """User creates an account. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'create_account'
        self.last_activity = timestamp
        self.registered = True
        self.registration_date = timestamp
    
  
    
    def send_message(self, recepient_id, timestamp):
        """User sends message to another user. 
        Parameters:
            recepient_id: id of the user who receives the message.
            timestamp: time of the event.
        """
        self.last_event = 'send_message'
        self.last_activity = timestamp
        self.messages_sent = self.messages_sent + 1 if hasattr(self, 'messages_sent') else 1
        
        # create message id
        message_id = hash(str(self.user_id) + str(recepient_id) + str(timestamp))
        
        # store data to messages dict
        messages[message_id] = Message(sender_id=self.user_id, 
                                       recepient_id=recepient_id, 
                                       message_id=message_id, 
                                       timestamp=timestamp)      
        
        # call receive message function for recepient
        users[recepient_id].receive_message(message_id=message_id, timestamp=timestamp)

    
    
    def receive_message(self, message_id, timestamp):
        """User receives message from another user. 
        The function is executed automatically with send message.
        Parameters:
            recepient_id: id of the user who receives the message.
            timestamp: time of the event.
        """
        self.last_event = 'receive_message'
        self.messages_received = self.messages_received + 1 if hasattr(self, 'messages_received') else 1
        
        # store message to user's unread messages
        if hasattr(self, 'unread_messages'):
            self.unread_messages.append(message_id)
        else:
            self.unread_messages = [message_id]
        
    
    
    def read_message(self, message_id, timestamp):
        """User reads message from another user. 
        Parameters:
            message_id: id of the message.
            timestamp: time of the event.
        """
        self.last_event = 'read_message'
        self.last_activity = timestamp
        self.messages_read = self.messages_read + 1 if hasattr(self, 'messages_read') else 1
        self.unread_messages.remove(message_id)
        
        # store message to user's read messages
        if hasattr(self, 'read_messages'):
            self.read_messages.append(message_id)
        else:
            self.read_messages = [message_id]        



    def list_item(self, timestamp):
        """User lists an item. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'list_item'
        self.last_activity = timestamp
        item_id = hash(str(self.user_id) + str(timestamp))
        
        self.n_listed_items = self.n_listed_items + 1 if hasattr(self, 'n_listed_items') else 1
        self.listed_items = self.listed_items.append(item_id) if hasattr(self, 'listed_items') else [item_id]
        
        self.active_items = self.n_active_items + 1 if hasattr(self, 'n_active_items') else 1
        self.active_items = self.active_items.append(item_id) if hasattr(self, 'active_items') else [item_id]

        items[item_id] = Item(item_id=item_id, 
                              lister_id=self.user_id, 
                              listing_date=timestamp)
        
    
    
    def search(self, timestamp):
        """User performs a search. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'search'
        self.searches = self.searches + 1 if hasattr(self, 'searches') else 1
        self.last_activity = timestamp

    
        
    def view_item(self, item_id, timestamp):
        """User views an item. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """  
        self.last_event = 'view_item'
        self.last_activity = timestamp
        self.item_views = self.item_views + 1 if hasattr(self, 'item_views') else 1

        items[item_id].views = items[item_id].views + 1 if hasattr(items[item_id].views, 'views') else 1
    
    
    
    def add_to_basket(self, item_id, timestamp):
        """User adds an item to the basket. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """
        self.last_event = 'add_to_basket'
        self.last_activity = timestamp
        self.items_in_basket = self.items_in_basket + 1 if hasattr(self, 'basket') else 1
        
        if hasattr(self, 'basket'):
            self.basket.append(item_id)
        else:
            self.basket = [item_id]

        
    
    def remove_from_basket(self, item_id, timestamp):
        """User removes an item to the basket. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """
        self.last_event = 'remove_from_basket'
        self.last_activity = timestamp
        self.items_in_basket = self.items_in_basket - 1
        self.basket.remove(item_id)
    
    
    
    def pay(self, timestamp):
        """User pays for item / set of items. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """
        self.last_event = 'pay'
        self.last_activity = timestamp
        
        for item_id in self.basket:  
            # updateitems attributes
            items[item_id].status = 'sold'
            items[item_id].buyer = self.user_id
            items[item_id].date_sold = timestamp
        
            # update lister's attributes
            lister_id = items[item_id].lister_id
            users[lister_id].sell(timestamp, item_id)
        
        # update buyer's attributes
        if hasattr(self, 'purchased_items'):
            self.purchased_items.extend(self.basket)
            self.n_purchases = self.n_purchases + len(self.basket)
        else:
            self.purchased_items = self.basket
            self.n_purchases = len(self.basket)
        
        # empy basket
        self.basket = []
    
    
    
    def sell(self, item_id, timestamp):
        """User sells an item. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """
        self.last_event = 'sell'
        self.last_activity = timestamp
        self.active_items.remove(item_id)
        
        if hasattr(self, 'n_sales'):
            self.sold_items.append(item_id)
            self.n_sales = self.n_sales + 1
        else:
            self.sold_items = [item_id]
            self.n_sales = 1



    def delete_items(self, item_id, timestamp):
        """User removes an item. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """
        self.last_event = 'delete_items'
        self.last_activity = timestamp
        self.active_items.remove(item_id)
        items[item_id].status = 'deleted'
        items[item_id].date_deleted = timestamp
        
        self.n_deleted_items = self.n_deleted_items + 1 if hasattr(self, 'n_deleted_items') else 1
        self.deleted_items = self.deleted_items.append(item_id) if hasattr(self, 'deleted_items') else [item_id]
        
    
    
    def do_nothing(self):
        self.active_session = False

In [None]:
def session(user_id, timestamp):
    if user_id not in users.keys():
        users[user_id] = User(name=str(user_id), user_id='user_id')
    
    users[user_id].visit(timestamp=timestamp, os='ios', country='DE')
    
    while users[user_id].active_session:
        last_event = users[user_id].last_event
        
        next_events = events[last_event]['next_events'].copy()
        probabilities = events[last_event]['probabilities'].copy()
        
        if users[user_id].registered == False:
            next_events.append('create_account')
            probabilities = [prob * 0.8 for prob in probabilities]
            probabilities.append(0.2)

In [81]:
# create randomizer
rand = np.random.default_rng(seed=1)

In [82]:
for i in range(1, 1001):
    users[i] = User(name=str(i), user_id=i)
    users[i].visit(timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), os='ios', country='DE')
    
    if rand.binomial(n=1, p=0.8) == 1:
        users[i].create_account(timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    
    if hasattr(users[i], 'registered') and users[i].registered:
        if rand.binomial(n=1, p=0.05) == 1:
            users[i].list_item(timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

In [84]:
# Check users
for i in range(1, 21):
    args = {
        'user_id': users[i].user_id, 
        'registration_date': users[i].registration_date if hasattr(users[i], 'registered') else 'not registered'
    }
    
    print('user_id: {user_id}; registration date: {registration_date}'.format(**args))

user_id: 1; registration date: 2021-04-06 23:08:29
user_id: 2; registration date: 2021-04-06 23:08:29
user_id: 3; registration date: 2021-04-06 23:08:29
user_id: 4; registration date: not registered
user_id: 5; registration date: 2021-04-06 23:08:29
user_id: 6; registration date: 2021-04-06 23:08:29
user_id: 7; registration date: 2021-04-06 23:08:29
user_id: 8; registration date: 2021-04-06 23:08:29
user_id: 9; registration date: 2021-04-06 23:08:29
user_id: 10; registration date: 2021-04-06 23:08:29
user_id: 11; registration date: 2021-04-06 23:08:29
user_id: 12; registration date: 2021-04-06 23:08:29
user_id: 13; registration date: not registered
user_id: 14; registration date: not registered
user_id: 15; registration date: 2021-04-06 23:08:29
user_id: 16; registration date: 2021-04-06 23:08:29
user_id: 17; registration date: not registered
user_id: 18; registration date: 2021-04-06 23:08:29
user_id: 19; registration date: 2021-04-06 23:08:29
user_id: 20; registration date: 2021-04-0

In [85]:
# Check items
for i in range(1, 21):
    args = {
        'item_id': items[i].item_id, 
        'lister_id': items[i].lister_id,
        'listing_date': items[i].listing_date
    }
    
    print('item_id: {item_id}; lister_id date: {lister_id}; listing date: {listing_date}'.format(**args))

item_id: 1; lister_id date: 1; listing date: 2021-04-06 23:08:29
item_id: 2; lister_id date: 62; listing date: 2021-04-06 23:08:29
item_id: 3; lister_id date: 91; listing date: 2021-04-06 23:08:29
item_id: 4; lister_id date: 95; listing date: 2021-04-06 23:08:29
item_id: 5; lister_id date: 123; listing date: 2021-04-06 23:08:29
item_id: 6; lister_id date: 129; listing date: 2021-04-06 23:08:29
item_id: 7; lister_id date: 163; listing date: 2021-04-06 23:08:29
item_id: 8; lister_id date: 219; listing date: 2021-04-06 23:08:29
item_id: 9; lister_id date: 232; listing date: 2021-04-06 23:08:29
item_id: 10; lister_id date: 258; listing date: 2021-04-06 23:08:29
item_id: 11; lister_id date: 327; listing date: 2021-04-06 23:08:29
item_id: 12; lister_id date: 330; listing date: 2021-04-06 23:08:29
item_id: 13; lister_id date: 358; listing date: 2021-04-06 23:08:29
item_id: 14; lister_id date: 373; listing date: 2021-04-06 23:08:29
item_id: 15; lister_id date: 391; listing date: 2021-04-06 23:

## Writing data to bigquery

In [27]:
from google.cloud import storage
from google.cloud import bigquery

import sys
import os

In [44]:
bigquery_client = bigquery.Client.from_service_account_json('../../credentials/data-analysis-sql-309220-6ce084250abd.json')

In [41]:
countries = ['UK', 'DE', 'AT']
countries_probs = [0.5, 0.4, 0.1]

agents = ['android', 'ios', 'web']
agents_probs = [0.4, 0.3, 0.3]

rand = np.random.default_rng(seed=1)

objects = []
for i in range(1000):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    object = {
        'timestamp': timestamp,
        'id': str(hash(timestamp)),
        'nested': {
            'os': rand.choice(a=agents, p=agents_probs),
            'country': rand.choice(a=countries, p=countries_probs)
        }
    }
    
    objects.append(object)
    
    time.sleep(0.01)

In [45]:
bq_error = bigquery_client.insert_rows_json('data-analysis-sql-309220.synthetic.nested_test', objects)
if bq_error != []:
    print(bq_error) 