In [1]:
import sqlalchemy as sa
from dotenv import load_dotenv
import pandas as pd
import os
import sys

root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)


load_dotenv()

conn_string = 'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/cosmetic'.format(
    db_user=os.getenv('DB_USER'),
    db_password=os.getenv('DB_PASSWORD'),
    db_host=os.getenv('DB_HOST'),
    db_port=os.getenv('DB_PORT'),
)
engine = sa.create_engine(conn_string)

In [3]:
import datetime
from sqlalchemy import orm
from collections import defaultdict
import csv

from schemas.data_by_week import DataByWeek
from schemas.parameter import Parameter
from schemas.product import Product


prev_weeks = int(os.getenv('PREV_WEEKS'))


with orm.Session(engine) as session, session.begin():
    products = dict[tuple[int, int], Product]()
    for row in session.scalars(sa.select(Product)).all():
        products[(row.product_id, row.category_id)] = row

    parameters = dict[str, float]()
    for row in session.scalars(sa.select(Parameter)).all():
        parameters[row.name] = row.value

    memorized = defaultdict(lambda: defaultdict(lambda: { 
        'view': -parameters['mean_view'] / parameters['std_view'], 
        'cart': -parameters['mean_cart'] / parameters['std_cart'], 
        'remove_from_cart': -parameters['mean_remove_from_cart'] / parameters['std_remove_from_cart'], 
        'purchase': -parameters['mean_purchase'] / parameters['std_purchase'], 
    }))

    d = datetime.date(2019, 9, 30)
    end = datetime.date(2020, 3, 1)
    while d < end:
        records = list[dict[str, int]]()
        print(d)    
        for row in session.scalars(sa.select(DataByWeek).where(DataByWeek.date == d).order_by(DataByWeek.rank.asc())):
            key = (row.product_id, row.category_id)
            memorized[d][key] = {
                'view': (row.view - parameters['mean_view']) / parameters['std_view'],
                'cart': (row.cart - parameters['mean_cart']) / parameters['std_cart'],
                'remove_from_cart': (row.remove_from_cart - parameters['mean_remove_from_cart']) / parameters['std_remove_from_cart'],
                'purchase': (row.purchase - parameters['mean_purchase']) / parameters['std_purchase'],
            }

            record = {
                'view': (row.view - parameters['mean_view']) / parameters['std_view'],
                'cart': (row.cart - parameters['mean_cart']) / parameters['std_cart'],
                'remove_from_cart': (row.remove_from_cart - parameters['mean_remove_from_cart']) / parameters['std_remove_from_cart'],
                'purchase': (row.purchase - parameters['mean_purchase']) / parameters['std_purchase'],
                'days_on_shelf': ((d - products[(row.product_id, row.category_id)].release_date).days - parameters['mean_days_on_shelf']) / parameters['std_days_on_shelf'],                
                'rank': (row.rank - parameters['mean_rank']) / parameters['std_rank'],
                'rank_in_category': (row.rank_in_category - parameters['mean_rank_in_category']) / parameters['std_rank_in_category'],
                'max_price': (row.max_price - parameters['mean_price']) / parameters['std_price'],
                'min_price': (row.min_price - parameters['mean_price']) / parameters['std_price'],
                'avg_price': (row.avg_price - parameters['mean_price']) / parameters['std_price'],

            }                
            for i in range(1, prev_weeks + 1):
                prev_d = d - datetime.timedelta(weeks=i)
                record[f'view_prev{i}'] = memorized[prev_d][key]['view']
                record[f'cart_prev{i}'] = memorized[prev_d][key]['cart']
                record[f'remove_from_cart_prev{i}'] = memorized[prev_d][key]['remove_from_cart']
                record[f'purchase_prev{i}'] = memorized[prev_d][key]['purchase']
            
            if d > products[(row.product_id, row.category_id)].release_date:
                records.append(record)

        del memorized[d - datetime.timedelta(weeks=prev_weeks)]
        
        name = d.strftime('%Y-%m-%d')
        path = f'../datasets/{name}.csv'
        if records:
            with open(path, 'w', newline='') as output_file:
                writer = csv.DictWriter(output_file, records[0].keys())
                writer.writeheader()
                writer.writerows(records)
                records.clear()

        d += datetime.timedelta(weeks=1)



2019-09-30
2019-10-07
2019-10-14
2019-10-21
2019-10-28
2019-11-04
2019-11-11
2019-11-18
2019-11-25
2019-12-02
2019-12-09
2019-12-16
2019-12-23
2019-12-30
2020-01-06
2020-01-13
2020-01-20
2020-01-27
2020-02-03
2020-02-10
2020-02-17
2020-02-24
