In [None]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import sys
import sqlalchemy as sa

root_path = os.path.abspath(os.path.join('..'))
if root_path not in sys.path:
    sys.path.append(root_path)
    

load_dotenv()

In [None]:
from keras.models import load_model

model_view = load_model('../model/d-view.keras')
model_cart = load_model('../model/d-cart.keras')
model_remove_from_cart = load_model('../model/d-remove_from_cart.keras')
model_purchase = load_model('../model/d-purchase.keras')

In [None]:
conn_string = 'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/cosmetic'.format(
    db_user=os.getenv('DB_USER'),
    db_password=os.getenv('DB_PASSWORD'),
    db_host=os.getenv('DB_HOST'),
    db_port=os.getenv('DB_PORT'),
)
engine = sa.create_engine(conn_string)

In [None]:
def df_to_X_y(df: pd.DataFrame):
    df_as_np = df.to_numpy()
    X = df_as_np[:, 6:]
    y = df_as_np[:, 2:6]
    return X, y


In [None]:
from collections import defaultdict
import datetime
from sqlalchemy import orm

from schemas.product import Product
from schemas.data_by_date import DataByDate

prev_days = int(os.getenv('PREV_DAYS'))

with orm.Session(engine) as session, session.begin():
    products = dict[tuple[int, int], Product]()
    for row in session.scalars(sa.select(Product)).all():
        products[(row.product_id, row.category_id)] = row
    
    memorized = defaultdict(lambda: defaultdict(lambda: { 'view': 0, 'cart': 0, 'remove_from_cart': 0, 'purchase': 0 }))

    d = datetime.date(2020, 2, 15) - datetime.timedelta(days=prev_days)
    for i in range(prev_days):
        for row in session.scalars(sa.select(DataByDate).where(DataByDate.date == d).order_by(DataByDate.rank.asc())):
            key = (row.product_id, row.category_id)
            memorized[d][key] = {
                'view': row.view,
                'cart': row.cart,
                'remove_from_cart': row.remove_from_cart,
                'purchase': row.purchase,
            }

        d += datetime.timedelta(days=1)
    
    end = datetime.date(2020, 3, 1)
    while d < end:        
        records = list[dict[str, int]]()
        for row in session.scalars(sa.select(DataByDate).where(DataByDate.date == d).order_by(DataByDate.rank.asc())):
            key = (row.product_id, row.category_id)
            record = {
                'product_id': row.product_id,
                'category_id': row.category_id,
                'view': row.view,
                'cart': row.cart,
                'remove_from_cart': row.remove_from_cart,
                'purchase': row.purchase,
                'rank': row.rank,
                'rank_in_category': row.rank_in_category,
                'days_on_shelf': (d - products[key].release_date).days,
                'price': row.max_price,
            }

            for i in range(1, prev_days + 1):
                prev_d = d - datetime.timedelta(days=i)
                record[f'view_prev{i}'] = memorized[prev_d][key]['view']
                record[f'cart_prev{i}'] = memorized[prev_d][key]['cart']
                record[f'remove_from_cart_prev{i}'] = memorized[prev_d][key]['remove_from_cart']
                record[f'purchase_prev{i}'] = memorized[prev_d][key]['purchase']

            release_date = products[key].release_date
            if (d - products[key].release_date).days >= 7:
                records.append(record)
            else:
                memorized[d][key] = {
                    'view': row.view,
                    'cart': row.cart,
                    'remove_from_cart': row.remove_from_cart,
                    'purchase': row.purchase,
                }
        
        del memorized[d - datetime.timedelta(days=prev_days)]

        df = pd.DataFrame.from_records(records)
        X, y = df_to_X_y(df)
        df['predicted_view'] = model_view.predict(X)[:, 0]
        df['predicted_cart'] = model_cart.predict(X)[:, 0]
        df['predicted_remove_from_cart'] = model_remove_from_cart.predict(X)[:, 0]
        df['predicted_purchase'] = model_purchase.predict(X)[:, 0]

        result_df = df[[
            'product_id', 
            'category_id', 
            'view', 
            'predicted_view', 
            'cart', 
            'predicted_cart', 
            'remove_from_cart', 
            'predicted_remove_from_cart', 
            'purchase', 
            'predicted_purchase'
        ]]
        name = d.strftime('%Y-%m-%d')
        result_df.to_csv(f'../.data/predict/d-{name}.csv')
        results = result_df.to_dict('records')
        for result in results:
            key = (result['product_id'], result['category_id'])
            memorized[d][key] = {
                'view': result['predicted_view'],
                'cart': result['predicted_cart'],
                'remove_from_cart': result['predicted_remove_from_cart'],
                'purchase': result['predicted_purchase'],
            }

        d += datetime.timedelta(days=1)        

In [None]:
from training.utils import plot_accuracy

result_df = pd.read_csv('../.data/predict/d-2020-02-29.csv')

plot_accuracy(result_df['predicted_purchase'], result_df['purchase'], 'purchase', (0, 300))
result_df