# End-to-end local jaffle_shop in Python

For funsies.

## Imports

In [None]:
# tracking
import mlflow

# pydata/ml
import sklearn as sklearn

import numpy as np
import pandas as pd
import lightgbm as lgb

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

# viz
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

# snowflake
import yaml
import snowflake.snowpark

from snowflake.snowpark import types
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, udf, sql_expr

## Setup

In [None]:
# setup viz defaults
sns.set_theme()
sns.set(rc={"figure.figsize": (16, 18)})
sns.set_style("darkgrid")
plt.style.use(["dark_background"])

## Raw data (sources)

In [None]:
raw_customers = pd.read_csv("seeds/raw_customers.csv")
raw_customers.head()

In [None]:
raw_orders = pd.read_csv("seeds/raw_orders.csv")
raw_orders.head()

In [None]:
raw_payments = pd.read_csv("seeds/raw_payments.csv")
raw_payments.head()

## Staging data

In [None]:
customers_rename = {"id": "customer_id"}

stg_customers = raw_customers.rename(columns=customers_rename)
stg_customers.head()

In [None]:
orders_rename = {"id": "order_id", "user_id": "customer_id"}

stg_orders = raw_orders.rename(columns=orders_rename)
stg_orders.head()

In [None]:
payments_rename = {"id": "payment_id"}

stg_payments = raw_payments.rename(columns=payments_rename)
stg_payments["amount"] /= 100  # this makes cents into dollars

stg_payments.head()

## Final models

In [None]:
# copilot sorta helped...
customer_orders = (
    stg_orders.groupby("customer_id")
    .agg(
        first_order=("order_date", "min"),
        most_recent_order=("order_date", "max"),
        number_of_orders=("order_id", "count"),
    )
    .reset_index()
)

customer_orders.head()

In [None]:
customer_payments = (
    stg_payments.merge(stg_orders, on="order_id", how="left")
    .groupby("customer_id")
    .agg(total_amount=("amount", "sum"))
    .reset_index()
)

customer_payments.head()

In [None]:
customers_rename = {"total_amount": "customer_lifetime_value"}

# copilot actually wrote this line, minus the renaming (maybe would have if I'd added the dictionary?)
customers = (
    stg_customers.merge(customer_orders, on="customer_id", how="left")
    .merge(customer_payments, on="customer_id", how="left")
    .rename(columns=customers_rename)
)

customers.head()

In [None]:
payment_methods = ["credit_card", "coupon", "bank_transfer", "gift_card"]

order_payments_renames = {
    f"{payment_method}": f"{payment_method}_amount"
    for payment_method in payment_methods
}

order_payments_totals = stg_payments.groupby("order_id").agg(
    total_amount=("amount", "sum")
)

order_payments = (
    stg_payments.groupby(["order_id", "payment_method"])
    .agg(payment_method_amount=("amount", "sum"))
    .reset_index()
    .pivot(index="order_id", columns="payment_method", values="payment_method_amount")
    .rename(columns=order_payments_renames)
    .merge(order_payments_totals, on="order_id", how="left")
    .reset_index()
)

order_payments.head()

In [None]:
orders_renames = {"total_amount": "amount"}

orders = stg_orders.merge(order_payments, on="order_id", how="left").rename(
    columns=orders_renames
)

orders.head()