In [0]:
import sys
import os
sys.path.insert(0, '../')

In [0]:
import time


In [0]:
import logging
import pandas as pd
import numpy as np
from pathlib import Path
from functools import reduce


from lmkgroup_ds_utils.db.connector import DB
from lmkgroup_ds_utils.constants import Company
from customer_churn.paths import SQL_DIR

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [0]:
company_id = Company.GL
db = DB(
    local=True,
    db_name="analytics_db"
)

postgres_db = DB(
    local=True,
    db_name="postgres_db",
)
model_training = True
start_date_var = "2024-02-29"
end_date_var =  "2024-03-07"
snapshot_dates = pd.date_range(start_date_var, end_date_var)

In [0]:
from customer_churn.dataset.bisnode import Bisnode
from customer_churn.dataset.complaints import Complaints
from customer_churn.dataset.crm_segments import CRMSegments
from customer_churn.dataset.customers import Customers
from customer_churn.dataset.events import Events
from customer_churn.dataset.orders import Orders

In [0]:
### Bisnode data
bisnode = Bisnode(company_id, db, model_training)
bisnode.load()
bisnode.df

In [0]:
### Complaints data
complaints = Complaints(company_id, db, model_training)
complaints.load()
complaints.df

In [0]:
### CRMSegment
crm_segment = CRMSegments(company_id, db, model_training)
crm_segment.load()
crm_segment.df

In [0]:
customers = Customers(company_id, db, model_training)
customers.load()
customers.df

In [0]:
db.conf_db

In [0]:
postgres_db.conf_db

In [0]:
events = Events(company_id, postgres_db, model_training)
start_time = time.time()
events.load()
print("--- %s seconds ---" % (time.time() - start_time))
events.df


In [0]:
orders = Orders(company_id, db, model_training)
orders.load()
orders.df

## Get features for snapshot date

In [0]:
snapshot_date = snapshot_dates[0]

features = [
    "customer_since_weeks",
    "weeks_since_last_delivery",
    "number_of_forecast_orders",
    "number_of_total_orders",
    "snapshot_status",
] # and much more

label = "forecast_status"

In [0]:
snapshot_date

In [0]:
bisnode_features = bisnode.get_features_for_snapshot(snapshot_date)
bisnode_features.head()

In [0]:
complaints_features = complaints.get_features_for_snapshot(snapshot_date)
complaints_features.head()

In [0]:
df = complaints.df.groupby("agreement_id").aggregate(
            total_complaints=pd.NamedAgg('agreement_id', 'count'),
            last_complaint=pd.NamedAgg('delivery_date', 'max'),
            category=pd.NamedAgg('category', 'last')
        )

In [0]:
df["weeks_since_last_complaint"] = (snapshot_date - df["last_complaint"])

In [0]:
df["weeks_since_last_complaint"].dt.days // 7