In [5]:
import numpy as np
from datetime import datetime, timedelta
import time

In [53]:
# some probabilities should be dynamics, for example:
# buying probability depends on the number of available items
# listing probability increases if user has sold something in the past
# probability of churn increases if user hasn't listed + hasn't bought anything + doesn't have anything in the basket
# instead of using random choise for time, we should use distribution (exponential, binomial, normal etc)
events = {
    'visit': {
        'condition': True,
        'inputs': 'timestamp',
        'time': [0, 20],
        'next_events': ['search', 'list_item', 'do_nothing'],
        'probabilities': [0.6, 0.05, 0.35]
    },
    'create_account': {
        'time': [30, 150],
        'next_events': ['search', 'list_item', 'do_nothing'],
        'probabilities': [0.65, 0.15, 0.2]        
    },
    'list_item': {
        'conditions': ['registered'],
        'time': [90, 300],
        'next_events': ['search', 'list_item', 'do_nothing'],
        'probabilities': [0.3, 0.1, 0.6]        
    },
    'search': {
        'time': [10, 120],
        'next_events': ['search', 'view_item', 'list_item', 'do_nothing'],
        'probabilities': [0.6, 0.25, 0.01, 0.14]       
    },
    'view_item': {
        'time': [10, 30],
        'next_events': ['view_item', 'send_message', 'search', 'add_to_basket', 'list_item', 'do_nothing'],
        'probabilities': [0.4, 0.1, 0.25, 0.05, 0.01, 0.19]          
    },
    'send_message': {
        'time': [10, 30],
        'next_events': ['view_item', 'search', 'add_to_basket', 'list_item', 'do_nothing'],
        'probabilities': [0.5, 0.25, 0.05, 0.01, 0.19]          
    },
    'add_to_basket': {
        'conditions': ['registered'],
        'time': [5, 120],
        'next_events': ['search', 'remove_from_basket', 'pay',  'list_item', 'do_nothing'],
        'probabilities': [0.2, 0.2, 0.4, 0.01, 0.19]        
    },
    'remove_from_basket': {
        'conditions': ['registered'],
        'time': [1, 20],
        'next_events': ['search', 'remove_from_basket', 'pay',  'list_item', 'do_nothing'],
        'probabilities': [0.2, 0.2, 0.2, 0.01, 0.39]        
    },
    'pay': {
        'conditions': ['registered'],
        'time': [180, 1800],
        'next_events': ['search', 'list_item', 'do_nothing'],
        'probabilities': [0.2, 0.01, 0.79]        
    },
    'do_nothing': {}
}



# the idea was to get every next event, but it may be better just to execute events with this function
# so the function not return anything, but run the next event, which in it's order will run the next event etc.
def get_next_event(event_name, user_id, timestamp, params):
    seed = datetime.now().microsecond
    rand = np.random.default_rng(seed=seed)
    
    next_event = rand.choice(a=params[event_name]['next_events'], p=params[event_name]['probabilities'], size=1)[0]
    
    if next_event == False:
        return False
    
    elif params[event_name]['condition'] or eval('users[{}].{}'.format(user_id, params[event_name]['condition'])):
        seconds = rand.choice(a=range(params[event_name]['time'][0], params[event_name]['time'][1]), size=1)[0]
        next_timestamp = timestamp + timedelta(seconds=seconds)
        return next_event, next_timestamp
    
    else:
        get_next_event(event_name, user_id, timestamp, params)

In [54]:
def create_event_data(event_name, user_id, timestamp, properties=None):
    d = {
        'event_name': event_name,
        'user_id': user_id,
        'timestamp': timestamp
    }
    
    if properties is not None:
        for p in properties.keys():
            d[p] = properties
    
    return d

In [131]:
users = dict()
items = dict()
messages = dict()

In [132]:
current_date = datetime(2021,4,17,23,10,11)

In [133]:
class Item:
    def __init__(self, item_id, lister_id, listing_date):
        self.item_id = item_id
        self.lister_id = lister_id
        self.listing_date = listing_date
        self.status = 'active'
        

        

class Message:
    def __init__(self, sender_id, recepient_id, message_id, timestamp):
        self.sender_id = sender_id
        self.recepient_id = recepient_id
        self.message_id = message_id
        self.timestamp = timestamp
        



class User:
    def __init__(self, name, user_id):
        self.name = name
        self.user_id = user_id
        self.registered = False
        self.irritation = 0

    
    
    satisfaction_impact = {
        'registration': 10,
        'message_sent': 1,
        'message_read': 1,
        'list_item': 10,
        'purchase': 20,
        'sale': 20,
        'delete_item': -20,
        'days_listed': -1,
        'search': -1,
        'item_view': -1
    }
    
    
    
    @property
    def satisfaction(self, satisfaction_impact):
        """Calculate user satisfaction level.
        """
        satisfaction = 0
        
        if self.registered:
            satisfaction += satisfaction_impact['registration']
        
        if hasattr(self, 'messages_sent'):
            satisfaction += self.messages_sent * satisfaction_impact['message_sent']

        if hasattr(self, 'messages_read'):
            satisfaction += self.messages_read * satisfaction_impact['message_read']

        if hasattr(self, 'n_listed_items'):
            satisfaction += self.n_listed_items * satisfaction_impact['list_item']           
        
        if hasattr(self, 'n_purchases'):
            satisfaction += self.n_purchases * satisfaction_impact['purchase'] 
        
        if hasattr(self, 'n_sales'):
            satisfaction += self.n_sales * satisfaction_impact['sale'] 

        if hasattr(self, 'item_views'):
            satisfaction += self.item_views * satisfaction_impact['item_view'] 
            
        if hasattr(self, 'searches'):
            satisfaction += self.searches * satisfaction_impact['search'] 
        
        if hasattr(self, 'n_deleted_items'):
            satisfaction += self.n_deleted_items * satisfaction_impact['delete_item'] 
            
        if hasattr(self, 'active_items'):
            for item_id in self.active_items:
                satisfaction += (current_date - items[item_id].listing_date).days * satisfaction_impact['days_listed'] 
            
        return satisfaction


    
    @property
    def churn(self):
        """Indicate churn depends on satisfaction.
        """
        if self.satisfaction < -10:
            self.churned = True

            
    
    @property
    def items_in_basket(self):
        """Calculate items in basket
        """
        return len(self.basket) if hasattr(self, 'basket') else 0
    
    
    
    @property
    def n_listed_items(self):
        """Calculate number of listed items
        """
        return len(self.listed_items) if hasattr(self, 'listed_items') else 0 
    
    
    
    @property
    def n_active_items(self):
        """Calculate number of active items
        """
        return len(self.active_items) if hasattr(self, 'active_items') else 0 

    
    
    @property
    def n_deleted_items(self):
        """Calculate number of deleted items
        """
        return len(self.deleted_items) if hasattr(self, 'deleted_items') else 0 
        
 

    @property
    def n_sales(self):
        """Calculate number of sold items
        """
        return len(self.sold_items) if hasattr(self, 'sold_items') else 0 
        
 

    @property
    def n_purchases(self):
        """Calculate number of purchased items
        """
        return len(self.purchased_items) if hasattr(self, 'purchased_items') else 0 


    
    @property
    def n_messages_sent(self):
        """Calculate number of sent messages
        """
        return len(self.messages_sent) if hasattr(self, 'messages_sent') else 0     


    
    @property
    def n_messages_received(self):
        """Calculate number of received messages
        """
        return len(self.messages_received) if hasattr(self, 'messages_received') else 0    


    
    @property
    def n_unread_messages(self):
        """Calculate number of received messages
        """
        return len(self.unread_messages) if hasattr(self, 'unread_messages') else 0   

    
    
    @property
    def n_read_messages(self):
        """Calculate number of read messages
        """
        return len(self.messages_read) if hasattr(self, 'messages_read') else 0  
    

    
    def visit(self, platform, country, timestamp):
        """User visit event. 
        It's the first touch with the app within a session.
        Event creates / updates user attributes:
            visits: number of visits.
            last_visit: time of the last visit.
            last_activity: time of the last activity.
            last_properties: properties like platform and country.
        
        Parameters:
            timestamp: time of the event.
            platform: platform of the visit: 'ios', 'android', 'web'.
            country: country code of the visit: 'US', 'DE', 'GB' etc.
        """
        self.active_session = True
        self.last_event = 'visit'
        self.last_activity = timestamp
        self.visits = self.visits + 1 if hasattr(self, 'visits') else 1
        self.last_visit = timestamp
        
        self.last_properties = {
            'platform': platform,
            'country': country
        }
        
        print(self.last_event, timestamp)
    

    
    def create_account(self, timestamp):
        """User creates an account. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'create_account'
        self.last_activity = timestamp
        self.registered = True
        self.registration_date = timestamp
        
        print(self.last_event, timestamp)
    
  
    
    def send_message(self, timestamp):
        """User sends message to another user. 
        Parameters:
            recepient_id: id of the user who receives the message.
            timestamp: time of the event.
        """
        self.last_event = 'send_message'
        self.last_activity = timestamp
        
        # create message id
        recepient_id = items[self.open_item].lister_id
        message_id = hash(str(self.user_id) + str(recepient_id) + str(timestamp))
        
        # add messages to user attributes
        if hasattr(self, 'messages_sent'):
            self.messages_sent.append(message_id)
        else:
            self.messages_sent = [message_id]
        
        # store data to messages dict
        messages[message_id] = Message(sender_id=self.user_id, 
                                       recepient_id=recepient_id, 
                                       message_id=message_id, 
                                       timestamp=timestamp)      
        
        # update recepient attributes
        if hasattr(users[recepient_id], 'messages_received'):
            users[recepient_id].messages_received.append(message_id)
        else: 
            users[recepient_id].messages_received = [message_id]
        
        if hasattr(users[recepient_id], 'unread_messages'):
            users[recepient_id].unread_messages.append(message_id)
        else:
            users[recepient_id].unread_messages = [message_id]        
            
        print(self.last_event, timestamp)
        
    
    
    def read_message(self, timestamp):
        """User reads message from another user. 
        Parameters:
            message_id: id of the message.
            timestamp: time of the event.
        """
        self.last_event = 'read_message'
        self.last_activity = timestamp
        
        rand = np.random.default_rng(seed=abs(hash(timestamp)))
        message_id = rand.choice(a=self.unread_messages)
        self.unread_messages.remove(message_id)
        
        # store message to user's read messages
        if hasattr(self, 'read_messages'):
            self.read_messages.append(message_id)
        else:
            self.read_messages = [message_id]
        
        print(self.last_event, timestamp)



    def list_item(self, timestamp):
        """User lists an item. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'list_item'
        self.last_activity = timestamp
        
        item_id = hash(str(self.user_id) + str(timestamp))
        
        if hasattr(self, 'listed_items'):
            self.listed_items.append(item_id)
        else:
            self.listed_items = [item_id]

        if hasattr(self, 'active_items'):
            self.active_items.append(item_id)
        else:
            self.active_items = [item_id]

        items[item_id] = Item(item_id=item_id, 
                              lister_id=self.user_id, 
                              listing_date=timestamp)
        
        print(self.last_event, timestamp)
        
    
    
    def search(self, timestamp):
        """User performs a search. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'search'
        self.searches = self.searches + 1 if hasattr(self, 'searches') else 1
        self.last_activity = timestamp
        
        rand = np.random.default_rng(seed=abs(hash(timestamp)))
        self.available_items = rand.choice(a=list(items.keys()), size=20 if len(items.keys())>=20 else len(items.keys()), replace=False)

        print(self.last_event, timestamp)
        
    
        
    def view_item(self, timestamp):
        """User views an item. 
        Parameters:
            timestamp: time of the event.
        """  
        self.last_event = 'view_item'
        self.last_activity = timestamp
        self.item_views = self.item_views + 1 if hasattr(self, 'item_views') else 1
        
        rand = np.random.default_rng(seed=abs(hash(timestamp)))
        item_id = rand.choice(a=self.available_items)
        self.open_item = item_id
        items[item_id].views = items[item_id].views + 1 if hasattr(items[item_id], 'views') else 1
        
        print(self.last_event, timestamp)
    
    
    
    def add_to_basket(self, timestamp):
        """User adds an item to the basket. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'add_to_basket'
        self.last_activity = timestamp
        
        if hasattr(self, 'basket'):
            self.basket.append(self.open_item)
        else:
            self.basket = [self.open_item]
            
        print(self.last_event, timestamp)

        
    
    def remove_from_basket(self, timestamp):
        """User removes an item to the basket. 
        Parameters:
            timestamp: time of the event.
        """
        self.last_event = 'remove_from_basket'
        self.last_activity = timestamp
        
        rand = np.random.default_rng(seed=abs(hash(timestamp)))
        item_id = rand.choice(a=self.basket)
        self.basket.remove(item_id)
        
        print(self.last_event, timestamp)
        
    
    
    def pay(self, timestamp):
        """User pays for item / set of items. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """
        self.last_event = 'pay'
        self.last_activity = timestamp
        
        for item_id in self.basket:  
            # updateitems attributes
            items[item_id].status = 'sold'
            items[item_id].buyer = self.user_id
            items[item_id].date_sold = timestamp
        
            # update lister's attributes
            lister_id = items[item_id].lister_id
            users[lister_id].active_items.remove(item_id)
            
            if hasattr(users[lister_id], 'sold_items'):
                users[lister_id].sold_items.append(item_id)
            else:
                users[lister_id].sold_items = [item_id]
        
        # update buyer's attributes
        if hasattr(self, 'purchased_items'):
            self.purchased_items.extend(self.basket)
        else:
            self.purchased_items = self.basket
        
        # empy basket
        self.basket = []
        
        print(self.last_event, timestamp)



    def delete_items(self, item_id, timestamp):
        """User removes an item. 
        Parameters:
            item_id: id of the item user views.
            timestamp: time of the event.
        """
        self.last_event = 'delete_items'
        self.last_activity = timestamp
        self.active_items.remove(item_id)
        items[item_id].status = 'deleted'
        items[item_id].date_deleted = timestamp
        
        if hasattr(self, 'deleted_items'):
            self.deleted_items.append(item_id)
        else:
            self.deleted_items = [item_id]

        
    
    def do_nothing(self, timestamp):
        self.active_session = False

In [134]:
def session(user_id, timestamp):
    if user_id not in users.keys():
        users[user_id] = User(name=str(user_id), user_id='user_id')
    
    users[user_id].visit(timestamp=timestamp, platform='ios', country='DE')
    
    # number of the event
    n = 0
    
    while users[user_id].active_session:
        last_event = users[user_id].last_event
        
        next_events = events[last_event]['next_events'].copy()
        probabilities = events[last_event]['probabilities'].copy()
        
        # adjust registration probability
        if users[user_id].registered == False:
            # add registration as potential event
            next_events.append('create_account')
            probabilities = [prob * 0.8 for prob in probabilities]
            probabilities.append(0.2)
            
            # remove all events that not possible visout registration
            for event in next_events:
                if 'conditions' in events[event].keys() and 'registered' in events[event]['conditions']:
                    # remove event
                    index = next_events.index(event)
                    next_events.remove(event)
                    probabilities.pop(index)
        
        
        # with every event probability of do nothing grows
        if 'do_nothing' in next_events:
            index = next_events.index('do_nothing')
            probabilities[index] = probabilities[index] * (1 + n/100)
            
        # normalize probabilities
        total_p = sum(probabilities)
        probabilities = [p/total_p for p in probabilities]
        probabilities[0] = probabilities[0] + 1-sum(probabilities)
                     
        rand = np.random.default_rng(seed=timestamp.second+int(str(user_id)[-1]))
        next_event = rand.choice(a=next_events, p=probabilities)
        

        
        time_delta = int(rand.integers(low=events[last_event]['time'][0], high=events[last_event]['time'][1]))
        timestamp = timestamp + timedelta(seconds=time_delta)
        args = {'next_event': next_event, 'next_timestamp': timestamp}
        
        eval("users[user_id].{next_event}(timestamp='{next_timestamp}')".format(**args))
        n += 1

In [135]:
# create initial set of items
users[1] = User(name='first user', user_id=1)

start_date = datetime.now() - timedelta(seconds=300)
users[1].create_account(timestamp=start_date)

for i in range(100):
    users[1].list_item(timestamp=start_date + timedelta(seconds=i+1))

create_account 2021-04-17 19:41:29.481573
list_item 2021-04-17 19:41:30.481573
list_item 2021-04-17 19:41:31.481573
list_item 2021-04-17 19:41:32.481573
list_item 2021-04-17 19:41:33.481573
list_item 2021-04-17 19:41:34.481573
list_item 2021-04-17 19:41:35.481573
list_item 2021-04-17 19:41:36.481573
list_item 2021-04-17 19:41:37.481573
list_item 2021-04-17 19:41:38.481573
list_item 2021-04-17 19:41:39.481573
list_item 2021-04-17 19:41:40.481573
list_item 2021-04-17 19:41:41.481573
list_item 2021-04-17 19:41:42.481573
list_item 2021-04-17 19:41:43.481573
list_item 2021-04-17 19:41:44.481573
list_item 2021-04-17 19:41:45.481573
list_item 2021-04-17 19:41:46.481573
list_item 2021-04-17 19:41:47.481573
list_item 2021-04-17 19:41:48.481573
list_item 2021-04-17 19:41:49.481573
list_item 2021-04-17 19:41:50.481573
list_item 2021-04-17 19:41:51.481573
list_item 2021-04-17 19:41:52.481573
list_item 2021-04-17 19:41:53.481573
list_item 2021-04-17 19:41:54.481573
list_item 2021-04-17 19:41:55.481

In [136]:
# create events for the first users
for i in range(2,101):
    print('\nUSER: {}'.format(i))
    users[i] = User(name='{} user'.format(i), user_id=i)
    session(user_id=i, timestamp=start_date + timedelta(seconds=150+i))


USER: 2
visit 2021-04-17 19:44:01.481573
search 2021-04-17 19:44:04.481573
view_item 2021-04-17 19:45:10.481573
view_item 2021-04-17 19:45:39.481573
create_account 2021-04-17 19:46:05.481573
search 2021-04-17 19:47:57.481573
search 2021-04-17 19:49:29.481573

USER: 3
visit 2021-04-17 19:44:02.481573
create_account 2021-04-17 19:44:02.481573

USER: 4
visit 2021-04-17 19:44:03.481573

USER: 5
visit 2021-04-17 19:44:04.481573
create_account 2021-04-17 19:44:23.481573

USER: 6
visit 2021-04-17 19:44:05.481573
search 2021-04-17 19:44:20.481573
view_item 2021-04-17 19:45:30.481573
view_item 2021-04-17 19:45:48.481573
view_item 2021-04-17 19:46:04.481573
create_account 2021-04-17 19:46:19.481573
search 2021-04-17 19:48:31.481573
view_item 2021-04-17 19:49:50.481573
search 2021-04-17 19:50:07.481573

USER: 7
visit 2021-04-17 19:44:06.481573
create_account 2021-04-17 19:44:22.481573
search 2021-04-17 19:46:03.481573

USER: 8
visit 2021-04-17 19:44:07.481573

USER: 9
visit 2021-04-17 19:44:08.4

## Writing data to bigquery

In [27]:
from google.cloud import storage
from google.cloud import bigquery

import sys
import os

In [44]:
bigquery_client = bigquery.Client.from_service_account_json('../../credentials/data-analysis-sql-309220-6ce084250abd.json')

In [41]:
countries = ['UK', 'DE', 'AT']
countries_probs = [0.5, 0.4, 0.1]

agents = ['android', 'ios', 'web']
agents_probs = [0.4, 0.3, 0.3]

rand = np.random.default_rng(seed=1)

objects = []
for i in range(1000):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    object = {
        'timestamp': timestamp,
        'id': str(hash(timestamp)),
        'nested': {
            'os': rand.choice(a=agents, p=agents_probs),
            'country': rand.choice(a=countries, p=countries_probs)
        }
    }
    
    objects.append(object)
    
    time.sleep(0.01)

In [45]:
bq_error = bigquery_client.insert_rows_json('data-analysis-sql-309220.synthetic.nested_test', objects)
if bq_error != []:
    print(bq_error) 