# Create Initial Dataset from Greenway Data

The goal here is to try and build the first dataset that ties together Accounts, Dealer Stock Units, Customer Equipment, Stores, and Users. This will be a dataset that will allow us to identify customer buying cycles

In [1]:
%load_ext dotenv
%dotenv
%load_ext autoreload
%autoreload 2

In [2]:
import json

import polars as pl
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from src.transformation.translate import (
    translate_csv_to_common_model,
)

## Pull in Data and Translate

In [46]:
account_df = translate_csv_to_common_model(
    "data/dealers/greenway/account.csv",
    "greenway",
    "./src/transformation/semantic_layer.json",
    "account",
)
customer_equipment_df = translate_csv_to_common_model(
    "data/dealers/greenway/customer-equipment.csv",
    "greenway",
    "./src/transformation/semantic_layer.json",
    "customer_equipment",
)
stock_unit_df = translate_csv_to_common_model(
    "data/dealers/greenway/dealer-stock-unit.csv",
    "greenway",
    "./src/transformation/semantic_layer.json",
    "dealer_stock_unit",
)
purchase_orders_df = translate_csv_to_common_model(
    "data/dealers/greenway/purchase-order.csv",
    "greenway",
    "./src/transformation/semantic_layer.json",
    "purchase_order",
)
service_requests_df = translate_csv_to_common_model(
    "data/dealers/greenway/service-requests.csv",
    "greenway",
    "./src/transformation/semantic_layer.json",
    "service_requests",
)
user_df = translate_csv_to_common_model(
    "data/dealers/greenway/user.csv",
    "greenway",
    "./src/transformation/semantic_layer.json",
    "user",
)
store_df = translate_csv_to_common_model(
    "data/dealers/greenway/store.csv",
    "greenway",
    "./src/transformation/semantic_layer.json",
    "store",
)

In [None]:
account_df.head()

In [None]:
account_df["customer_business_class"].value_counts()

In [None]:
customer_equipment_df.head()

In [None]:
customer_equipment_df.filter(pl.col("ce_category") == "ALLD").tail()

In [None]:
from src.transformation.category import CleanMakeModelData

In [None]:
clean_make_model = CleanMakeModelData()

In [None]:
clean_make_model.make_model_data

In [16]:
clean_make_model.create_aggregated_data(stock_unit_df)

In [None]:
clean_make_model.make_model_data.filter(
    (pl.col("make").str.to_lowercase() == "JOHN DEERE".lower())
    & (pl.col("model").str.to_lowercase() == "3400".lower()),
).shape[0]

In [None]:
stock_unit_df

In [None]:
stock_unit_df.filter(
    (pl.col("dsu_make").str.to_lowercase() == "JOHN DEERE".lower())
    & (pl.col("dsu_model").str.to_lowercase() == "8270R".lower())
).drop_nulls("dsu_group")
# .group_by(group_col)
# .agg(pl.count(group_col).alias("count"))
# .sort("count", descending=True)
# .to_numpy()

In [None]:
clean_make_model.aggregated_data.head()

In [None]:
len(clean_make_model.aggregated_data)

In [None]:
clean_make_model.aggregated_data.filter(pl.col("group") != "")

In [None]:
clean_make_model.clean_make_model_data("FAIR OAKS", "10")

In [None]:
matched_cats = 0
matched_subcats = 0
idx = 1
for row in stock_unit_df.iter_rows(named=True):
    make = row["dsu_make"].lower() if row["dsu_make"] else ""
    model = row["dsu_model"].lower() if row["dsu_model"] else ""
    group = row["dsu_group"].lower() if row["dsu_group"] else ""
    if make and model:
        result = clean_make_model.clean_make_model_data(make, model, group=group)
    if result["category"] != "Unknown":
        matched_cats += 1
    if result["subcategory"] != "Unknown":
        matched_subcats += 1

    if idx % 1000 == 0:
        print(f"Matched Cats: {matched_cats}, Matched Subcats: {matched_subcats}")
        print(f"Total of {idx} rows processed")

    idx += 1