In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import dash
import dash_mantine_components as dmc
from dash import dcc, html, Input, Output, callback
import plotly.io as pio
import os
from tqdm import tqdm

#### Preliminary Cell

In [8]:
CUST_ID_COL = 'CFK_CIF_NBR'.lower()
START_DATE_COL = 'S_OPENING_DT'.lower()
CLOSE_DATE_COL = 'S_CLOSE_DATE'.lower()
OPENING_AMOUNT_COL = 'S_OPENING_AMT'.lower()
PRODUCT_TYPE = 'S_ACCT_TYPE'.lower()
DATE_DIFF = 'DATE_DIFF'.lower()
DS_PATH = 'positive_label_data/' + 'mock_agreements.pq'
PDF_PATH = 'positive_label_pdfs/'

#### Data Format Transform and sampling

In [5]:
df = pd.read_parquet(DS_PATH)

# Convert date columns to datetime format
df[START_DATE_COL] = pd.to_datetime(df[START_DATE_COL], errors='coerce')
df[CLOSE_DATE_COL] = pd.to_datetime(df[CLOSE_DATE_COL], errors='coerce')

# Convert amount column to float format
df[OPENING_AMOUNT_COL] = pd.to_numeric(df[OPENING_AMOUNT_COL], errors='coerce').astype(float)

# Check for duplicated rows
if len(df[df.duplicated()]) > 0:
    df = df.drop_duplicates()
    print('Ready to go')
else:
    print('Ready to go')
    
# Random Sample to 1M rows
df = df.sample(n=1000000)

Ready to go


#### Graph Repository

In [10]:
# Graph Repository
if not os.path.exists(PDF_PATH):
        os.makedirs(PDF_PATH)

#### Date and Event Value Input

In [11]:
# Filter Date
date_range = input('Please enter date range, for example: 03-21-2021, 12-31-2023. Input None for default')
if date_range == 'None':
    starting_date, ending_date = df[START_DATE_COL].min(), df[START_DATE_COL].max()
else:
    starting_date, ending_date = date_range.split(',')
    
# Filter Event Value
event_value_range = input('Please enter event value range, for example: 500, 35000. Input None for default')
if date_range == 'None':
    event_value_min, event_value_max = df[OPENING_AMOUNT_COL].min(), df[OPENING_AMOUNT_COL].max()
else:
    event_value_min, event_value_max = map(int, event_value_range.split(','))
    
# Filtered DataFrame
filtered_df = df[
        (df[OPENING_AMOUNT_COL] >= event_value_min) &
        (df[OPENING_AMOUNT_COL] <= event_value_max) &
        (df[START_DATE_COL] >= pd.to_datetime(starting_date)) &
        (df[START_DATE_COL] <= pd.to_datetime(ending_date))
].copy()

#### General Stats and Event Count

In [None]:
# Overall Event Value Distribution
overall_distr_fig = go.Figure(data=go.Violin(y=df[OPENING_AMOUNT_COL], box_visible=True, 
                                                  meanline_visible=True, x0='Overall', name='Overall'))
overall_distr_fig.update_layout(
            title='Overall Event Value Distribution', 
            title_x=0.5,
            xaxis_title='Product Type', 
            yaxis_title='Event Value')
file_path = os.path.join(PDF_PATH, f"Overall_Event_Value_Distribution.pdf")
pio.write_image(overall_distr_fig, file_path)

""" # Event Value Distribution by Individual Product
for product in tqdm(sorted(df[PRODUCT_TYPE].unique())):
    overall_distr_fig = go.Figure(data=go.Violin(y=df[df[PRODUCT_TYPE]==product][OPENING_AMOUNT_COL], box_visible=True,
                                                    meanline_visible=True, x0=str(product), name=str(product)))
    overall_distr_fig.update_layout(
            title='Overall Event Value Distribution by Product', 
            title_x=0.5,
            xaxis_title='Product Type', 
            yaxis_title='Event Value')
    file_path = os.path.join(PDF_PATH, f"Overall_Event_Value_Distribution_Product_{product}.pdf")
    pio.write_image(overall_distr_fig, file_path) """
    
# Event Count by Month
event_count_fig = go.Figure()
for product in tqdm(sorted(df[PRODUCT_TYPE].unique())):
    event_counts = filtered_df[filtered_df[PRODUCT_TYPE]==product].groupby(filtered_df[START_DATE_COL].dt.strftime('%Y-%m')).size()
    x = event_counts.index
    y = event_counts.values
    name = str(product) 
    event_count_fig.add_trace(go.Bar(x=x, y=y, name=name))
    
event_count_fig.update_layout(
        xaxis_title='Month',
        yaxis_title='Event Count',
        title='Event Count vs Time',
        title_x=0.5,
        barmode='stack'
    )
event_count_fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Event_Count_by_Month.pdf")
pio.write_image(event_count_fig, file_path)

# Event Count by Year
event_count_fig = go.Figure()
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    event_counts = filtered_df[filtered_df[PRODUCT_TYPE]==product].groupby(filtered_df[START_DATE_COL].dt.year).size()
    x = event_counts.index
    y = event_counts.values
    name = str(product) 
    event_count_fig.add_trace(go.Bar(x=x, y=y, name=name))
    
event_count_fig.update_layout(
        xaxis_title='Year',
        yaxis_title='Event Count',
        title='Event Count vs Time',
        title_x=0.5,
        barmode='stack'
    )
event_count_fig.update_xaxes(dtick="Y1", tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Event_Count_by_Year.pdf")
pio.write_image(event_count_fig, file_path)

# Create the unique customers chart by Month
unique_customer_fig = go.Figure()
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    unique_customers = filtered_df[filtered_df[PRODUCT_TYPE]==product].groupby(filtered_df[START_DATE_COL].dt.strftime('%Y-%m'))[CUST_ID_COL].nunique()
    x = unique_customers.index
    y = unique_customers.values
    name = str(product) 
    unique_customer_fig.add_trace(go.Bar(x=x, y=y, name=name))
    
unique_customer_fig.update_layout(
    xaxis_title='Month',
    yaxis_title='Unique Customer',
    title='Unique Customer vs Time',
    title_x=0.5,
    barmode='stack'
)
unique_customer_fig.update_xaxes(dtick="M1", tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Unique_Customer_by_Month.pdf")
pio.write_image(unique_customer_fig, file_path)

# Create the unique customers chart by Year
unique_customer_fig = go.Figure()
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    unique_customers = filtered_df[filtered_df[PRODUCT_TYPE]==product].groupby(filtered_df[START_DATE_COL].dt.year)[CUST_ID_COL].nunique()
    x = unique_customers.index
    y = unique_customers.values
    name = str(product) 
    unique_customer_fig.add_trace(go.Bar(x=x, y=y, name=name))
    
unique_customer_fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Unique Customer',
    title='Unique Customer vs Time',
    title_x=0.5,
    barmode='stack'
)
unique_customer_fig.update_xaxes(dtick="Y1", tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Unique_Customer_by_Year.pdf")
pio.write_image(unique_customer_fig, file_path)

# Event count separated into first, second, third...times
filtered_df_cust = filtered_df.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
temp = filtered_df_cust.groupby(CUST_ID_COL).size().value_counts()
event_count_client_fig = go.Figure(data=go.Bar(x=temp.index, y=temp.values))
event_count_client_fig.update_layout(xaxis=dict(tickmode='array', tickvals=temp.index, ticktext=temp.index),
                                             xaxis_title='Number of Agreements', yaxis_title='Number of Customers',
                                             title='How many agreements did each customer open?',
                                             title_x=0.5)
file_path = os.path.join(PDF_PATH, f"Total_Number_of_Agreement.pdf")
pio.write_image(event_count_client_fig, file_path)
    
# Event count separated into Month
event_count_client_fig = go.Figure()
filtered_df_cust = filtered_df.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
for group in tqdm(sorted(filtered_df_cust['rank'].unique())):
    temp = filtered_df_cust[filtered_df_cust['rank']==group].groupby(filtered_df_cust[START_DATE_COL].dt.strftime('%Y-%m')).size()
    x = temp.index
    y = temp.values
    name = str(int(group)) + 'Agreement'
    event_count_client_fig.add_trace(go.Bar(x=x, y=y, name=name))
        
event_count_client_fig.update_layout(
title='Out of the Event Counts, how many is the first, second and third agreements?',
yaxis_title='Event Count',
title_x=0.5,
barmode='stack')
event_count_client_fig.update_xaxes(dtick='M1', tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Total_Number_of_Agreement_by_Month.pdf")
pio.write_image(event_count_client_fig, file_path)

# Event count separated into Year
event_count_client_fig = go.Figure()
filtered_df_cust = filtered_df.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
for group in tqdm(sorted(filtered_df_cust['rank'].unique())):
    temp = filtered_df_cust[filtered_df_cust['rank']==group].groupby(filtered_df_cust[START_DATE_COL].dt.year).size()
    x = temp.index
    y = temp.values
    name = str(int(group)) + 'Agreement'
    event_count_client_fig.add_trace(go.Bar(x=x, y=y, name=name))
        
event_count_client_fig.update_layout(
title='Out of the Event Counts, how many is the first, second and third agreements?',
yaxis_title='Event Count',
title_x=0.5,
barmode='stack')
event_count_client_fig.update_xaxes(dtick='Y1', tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Total_Number_of_Agreement_by_Year.pdf")
pio.write_image(event_count_client_fig, file_path)

""" # Event count separated into first, second, third...times by Product Type
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new= filtered_df[filtered_df[PRODUCT_TYPE]==product]
    filtered_df_cust = filtered_df_new.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
    filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
    temp = filtered_df_cust.groupby(CUST_ID_COL).size().value_counts()
    event_count_client_fig = go.Figure(data=go.Bar(x=temp.index, y=temp.values))
    event_count_client_fig.update_layout(xaxis=dict(tickmode='array', tickvals=temp.index, ticktext=temp.index),
                                                xaxis_title='Number of Agreements', yaxis_title='Number of Customers',
                                                title='How many agreements did each customer open?',
                                                title_x=0.5)
    file_path = os.path.join(PDF_PATH, f"Number_of_Agreement_Product_{product}.pdf")
    pio.write_image(event_count_client_fig, file_path)

# Event count by Product separated into Month
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    filtered_df_cust = filtered_df_new.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
    filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')

    # Check if the filtered data for a specific product is not empty
    if not filtered_df_cust.empty:
        # Initialize the plotly figure
        event_count_client_fig = go.Figure()

        for group in sorted(filtered_df_cust['rank'].unique()):
            temp = filtered_df_cust[filtered_df_cust['rank'] == group].groupby(filtered_df_cust[START_DATE_COL].dt.strftime('%Y-%m')).size()
            x = temp.index
            y = temp.values
            name = str(int(group)) + 'Agreement'
            event_count_client_fig.add_trace(go.Bar(x=x, y=y, name=name))

        event_count_client_fig.update_layout(
            title=f'Out of the Event Counts for Product {product}, how many are the first, second, and third agreements?',
            yaxis_title='Event Count',
            title_x=0.5,
            barmode='stack'
        )
        event_count_client_fig.update_xaxes(dtick='M1', tickformat="%b\n%Y")

        # Save the figure for each product
        file_path = os.path.join(PDF_PATH, f"Number_of_Agreement_Product_{product}_by_Month.pdf")
        pio.write_image(event_count_client_fig, file_path)

        # Clear the figure after each product to start fresh for the next product
        event_count_client_fig = None

# Event count by Product separated into Year
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    filtered_df_cust = filtered_df_new.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
    filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')

    # Check if the filtered data for a specific product is not empty
    if not filtered_df_cust.empty:
        # Initialize the plotly figure
        event_count_client_fig = go.Figure()

        for group in sorted(filtered_df_cust['rank'].unique()):
            temp = filtered_df_cust[filtered_df_cust['rank'] == group].groupby(filtered_df_cust[START_DATE_COL].dt.year).size()
            x = temp.index
            y = temp.values
            name = str(int(group)) + 'Agreement'
            event_count_client_fig.add_trace(go.Bar(x=x, y=y, name=name))

        event_count_client_fig.update_layout(
            title=f'Out of the Event Counts for Product {product}, how many are the first, second, and third agreements?',
            yaxis_title='Event Count',
            title_x=0.5,
            barmode='stack'
        )
        event_count_client_fig.update_xaxes(dtick='Y1', tickformat="%b\n%Y")

        # Save the figure for each product
        file_path = os.path.join(PDF_PATH, f"Number_of_Agreement_Product_{product}_by_Year.pdf")
        pio.write_image(event_count_client_fig, file_path)

        # Clear the figure after each product to start fresh for the next product
        event_count_client_fig = None """
        
# Multiple Agreement by Month 
filtered_df_cust = filtered_df.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
temp = filtered_df_cust.groupby([filtered_df_cust[START_DATE_COL].dt.strftime('%Y-%m'), filtered_df_cust[CUST_ID_COL]]).size().reset_index(name='count')
temp[START_DATE_COL] = pd.to_datetime(temp[START_DATE_COL])
temp = temp.groupby([temp[START_DATE_COL].dt.strftime('%Y-%m'), temp['count']]).size().reset_index(name='cust_count')
temp[START_DATE_COL] = pd.to_datetime(temp[START_DATE_COL])
        
multiple_event_client_fig = go.Figure()
for group in tqdm(sorted(temp['count'].unique())):
    x = temp[temp['count']==group][START_DATE_COL]
    y = temp[temp['count']==group]['cust_count']
    name = str(int(group)) + 'Agreement'
    multiple_event_client_fig.add_trace(go.Bar(x=x, y=y, name=name))
            
multiple_event_client_fig.update_layout(
title='How many customers have multiple agreements in Each Month',
yaxis_title='Number of Customers',
title_x=0.5,
barmode='stack')
multiple_event_client_fig.update_xaxes(dtick='M1', tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Total_Number_of_Multiple_Agreement_by_Month.pdf")
pio.write_image(multiple_event_client_fig, file_path)

# Multiple Agreement by Year
temp = filtered_df_cust.groupby([filtered_df_cust[START_DATE_COL].dt.year, filtered_df_cust[CUST_ID_COL]]).size().reset_index(name='count')
temp = temp.groupby([START_DATE_COL, 'count']).size().reset_index(name='cust_count')
        
multiple_event_client_fig = go.Figure()
for group in temp['count'].unique():
    x = temp[temp['count']==group][START_DATE_COL]
    y = temp[temp['count']==group]['cust_count']
    name = str(int(group)) + 'Agreement'
    multiple_event_client_fig.add_trace(go.Bar(x=x, y=y, name=name))
            
multiple_event_client_fig.update_layout(
title='How many customers have multiple agreements in Each Year',
yaxis_title='Number of Customers',
title_x=0.5,
barmode='stack')
multiple_event_client_fig.update_xaxes(dtick='Y1', tickformat="%b\n%Y")
file_path = os.path.join(PDF_PATH, f"Total_Number_of_Multiple_Agreement_by_Year.pdf")
pio.write_image(multiple_event_client_fig, file_path)

""" # Multiple Agreement by Month (Specific Product)
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    filtered_df_cust = filtered_df_new.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
    filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
    temp = filtered_df_cust.groupby([filtered_df_cust[START_DATE_COL].dt.strftime('%Y-%m'), filtered_df_cust[CUST_ID_COL]]).size().reset_index(name='count')
    temp[START_DATE_COL] = pd.to_datetime(temp[START_DATE_COL])
    temp = temp.groupby([temp[START_DATE_COL].dt.strftime('%Y-%m'), temp['count']]).size().reset_index(name='cust_count')
    temp[START_DATE_COL] = pd.to_datetime(temp[START_DATE_COL])
            
    multiple_event_client_fig = go.Figure()
    for group in sorted(temp['count'].unique()):
        x = temp[temp['count']==group][START_DATE_COL]
        y = temp[temp['count']==group]['cust_count']
        name = str(int(group)) + 'Agreement'
        multiple_event_client_fig.add_trace(go.Bar(x=x, y=y, name=name))
    
    multiple_event_client_fig.update_layout(
    title=f'How many customers have multiple agreements in Product {product} in Each Month',
    yaxis_title='Number of Customers',
    title_x=0.5,
    barmode='stack')
    multiple_event_client_fig.update_xaxes(dtick='M1', tickformat="%b\n%Y")
    file_path = os.path.join(PDF_PATH, f"Number_of_Multiple_Agreement_Product_{product}_by_Month.pdf")
    pio.write_image(multiple_event_client_fig, file_path)

# Multiple Agreement by Year (Specific Product)
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    filtered_df_cust = filtered_df_new.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
    filtered_df_cust['rank'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
    temp = filtered_df_cust.groupby([filtered_df_cust[START_DATE_COL].dt.year, filtered_df_cust[CUST_ID_COL]]).size().reset_index(name='count')
    temp = temp.groupby([START_DATE_COL, 'count']).size().reset_index(name='cust_count')
            
    multiple_event_client_fig = go.Figure()
    for group in temp['count'].unique():
        x = temp[temp['count']==group][START_DATE_COL]
        y = temp[temp['count']==group]['cust_count']
        name = str(int(group)) + 'Agreement'
        multiple_event_client_fig.add_trace(go.Bar(x=x, y=y, name=name))
    
    multiple_event_client_fig.update_layout(
    title=f'How many customers have multiple agreements in Product {product} in Each Year',
    yaxis_title='Number of Customers',
    title_x=0.5,
    barmode='stack')
    multiple_event_client_fig.update_xaxes(dtick='Y1', tickformat="%b\n%Y")
    file_path = os.path.join(PDF_PATH, f"Number_of_Multiple_Agreement_Product_{product}_by_Year.pdf")
    pio.write_image(multiple_event_client_fig, file_path) """

#### Average Event Value and Distribution

In [7]:
# Box Chart by Month
box_chart = go.Figure(data=go.Box(x=filtered_df[START_DATE_COL].dt.strftime('%Y-%m'), y=filtered_df[OPENING_AMOUNT_COL], boxmean=True, name='Bar Plot'))
box_chart.update_layout(
    xaxis_title='Month',
    yaxis_title='Event Value',
    title='Average Event Value vs Time',
    title_x=0.5
)
box_chart.update_xaxes(dtick="M1", tickformat="%b\n%Y")
box_chart.add_trace(go.Scatter(x=filtered_df.groupby(filtered_df[START_DATE_COL].dt.strftime('%Y-%m'))[OPENING_AMOUNT_COL].mean().index, 
                                       y=filtered_df.groupby(filtered_df[START_DATE_COL].dt.strftime('%Y-%m'))[OPENING_AMOUNT_COL].mean().values,
                                       name='Mean'))
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Month.pdf")
pio.write_image(box_chart, file_path)

# Box Chart by Year
box_chart = go.Figure(data=go.Box(x=filtered_df[START_DATE_COL].dt.year, y=filtered_df[OPENING_AMOUNT_COL], boxmean=True, name='Bar Plot'))
box_chart.update_layout(
    xaxis_title='Year',
    yaxis_title='Event Value',
    title='Average Event Value vs Time',
    title_x=0.5
)
box_chart.update_xaxes(dtick="Y1", tickformat="%b\n%Y")
box_chart.add_trace(go.Scatter(x=filtered_df.groupby(filtered_df[START_DATE_COL].dt.year)[OPENING_AMOUNT_COL].mean().index, 
                                       y=filtered_df.groupby(filtered_df[START_DATE_COL].dt.year)[OPENING_AMOUNT_COL].mean().values,
                                       name='Mean'))
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Year.pdf")
pio.write_image(box_chart, file_path)

# Box Chart by Week
week_df = filtered_df.sort_values(START_DATE_COL)
box_chart = go.Figure(data=go.Box(x=week_df[START_DATE_COL].dt.strftime('%Y-%W'), y=filtered_df[OPENING_AMOUNT_COL], boxmean=True, name='Bar Plot'))
box_chart.update_layout(
    xaxis_title='Week',
    yaxis_title='Event Value',
    title='Average Event Value vs Time',
    title_x=0.5,
    xaxis={'type': 'category'}
)
box_chart.update_xaxes(dtick=None, tickformat=None)
box_chart.add_trace(go.Scatter(x=filtered_df.groupby(week_df[START_DATE_COL].dt.strftime('%Y-%W'))[OPENING_AMOUNT_COL].mean().index, 
                                       y=filtered_df.groupby(week_df[START_DATE_COL].dt.strftime('%Y-%W'))[OPENING_AMOUNT_COL].mean().values,
                                       name='Mean'))
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Week.pdf")
pio.write_image(box_chart, file_path)

# Box Chart by Day of Week
day_sequence = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday']

fig_day_week = go.Figure(data=go.Box(x=filtered_df[START_DATE_COL].dt.day_name(), y=filtered_df[OPENING_AMOUNT_COL], boxmean=True))
fig_day_week.update_layout(
    title='Average Event Value by Day of Week',
    xaxis_title='Day of Week',
    yaxis_title='Event Value',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': day_sequence}
)
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Day_of_Week.pdf")
pio.write_image(fig_day_week, file_path)

events_count_day_week = filtered_df[START_DATE_COL].dt.day_name().value_counts().reindex(day_sequence, fill_value=0)
fig_day_week = go.Figure(data=go.Bar(x=events_count_day_week.index, y= events_count_day_week.values))
fig_day_week.update_layout(
    title='Event Count by Day of Week',
    xaxis_title='Day of Week',
    yaxis_title='Event Count',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': day_sequence}
)
file_path = os.path.join(PDF_PATH, f"Event_Count_by_Day_of_Week.pdf")
pio.write_image(fig_day_week, file_path)

# Box Chart by Day of Month
fig_day_month = go.Figure(data=go.Box(x=filtered_df[START_DATE_COL].dt.day, y=filtered_df[OPENING_AMOUNT_COL], boxmean=True))
fig_day_month.update_layout(
    title='Average Event Value by Day of Month',
    xaxis_title='Day of Month',
    yaxis_title='Event Value',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': [str(i) for i in range(1, 32)]}
)
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Day_of_Month.pdf")
pio.write_image(fig_day_month, file_path)

events_count_day_month = filtered_df[START_DATE_COL].dt.day.value_counts().reindex(fill_value=0)
fig_day_month = go.Figure(data=go.Bar(x=events_count_day_month.index, y= events_count_day_month.values))
fig_day_month.update_layout(
    title='Event Count by Day of Month',
    xaxis_title='Day of Month',
    yaxis_title='Event Count',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': [str(i) for i in range(1, 32)]}
)
file_path = os.path.join(PDF_PATH, f"Event_Count_by_Day_of_Month.pdf")
pio.write_image(fig_day_month, file_path)

# Box Chart by Month of Year 
month_sequence = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
fig_month_year = go.Figure(data=go.Box(x=filtered_df[START_DATE_COL].dt.month_name(), y=filtered_df[OPENING_AMOUNT_COL], boxmean=True))
fig_month_year.update_layout(
    title='Average Event Value by Month of Year',
    xaxis_title='Month',
    yaxis_title='Event Value',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': month_sequence}
)
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Month_of_Year.pdf")
pio.write_image(fig_month_year, file_path)

events_count_month_year = filtered_df[START_DATE_COL].dt.month_name().value_counts().reindex(month_sequence, fill_value=0)
fig_month_year = go.Figure(data=go.Bar(x=events_count_month_year.index, y= events_count_month_year.values))
fig_month_year.update_layout(
    title='Event Count by Month of Year',
    xaxis_title='Month',
    yaxis_title='Event Count',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': month_sequence}
)
file_path = os.path.join(PDF_PATH, f"Event_Count_by_Month_of_Year.pdf")
pio.write_image(fig_month_year, file_path)

# Box Chart by Quarter of Year
fig_quarter_year = go.Figure(data=go.Box(x=filtered_df[START_DATE_COL].dt.quarter, y=filtered_df[OPENING_AMOUNT_COL], boxmean=True))
fig_quarter_year.update_layout(
    title='Average Event Value by Quarter of Year',
    xaxis_title='Quarter',
    yaxis_title='Event Value',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': [1, 2, 3, 4]}
)
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Quarter_of_Year.pdf")
pio.write_image(fig_quarter_year, file_path)

events_count_quarter_year = filtered_df[START_DATE_COL].dt.quarter.value_counts().reindex(fill_value=0)
fig_quarter_year = go.Figure(data=go.Bar(x=events_count_quarter_year.index, y= events_count_quarter_year.values))
fig_quarter_year.update_layout(
    title='Event Count by Quarter of Year',
    xaxis_title='Quarter',
    yaxis_title='Event Count',
    title_x=0.5,
    xaxis={'categoryorder': 'array', 'categoryarray': [1, 2, 3, 4]}
)
file_path = os.path.join(PDF_PATH, f"Event_Count_by_Quarter_of_Year.pdf")
pio.write_image(fig_quarter_year, file_path)

# Avg Event Value by Quantile
temp = filtered_df.groupby(CUST_ID_COL)[OPENING_AMOUNT_COL].mean()
temp_df = pd.DataFrame(temp)
temp_df['Bin'] = pd.qcut(temp_df[OPENING_AMOUNT_COL], q=20, duplicates='drop')
bin_count = temp_df['Bin'].value_counts().sort_index()
x = bin_count.index
y = bin_count.values

x_ticktext = [str(interval) for interval in x]

avg_value_client_fig = go.Figure(data=[go.Bar(x=x_ticktext, y=y)])
avg_value_client_fig.update_layout(
    title='Average Event Value for Each Client by Quantile',
    xaxis_title='Average Event Value (Bin)',
    yaxis_title='Number of Counts',
    title_x=0.5
)
file_path = os.path.join(PDF_PATH, f"Average_Event_Value_by_Bins_of_Quantile.pdf")
pio.write_image(avg_value_client_fig, file_path)

# Event Distribution by Bin of Quantile
cust_filtered_df = filtered_df.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
cust_filtered_df['rank'] = cust_filtered_df.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
cust_filtered_df['Bin'] = pd.qcut(cust_filtered_df[OPENING_AMOUNT_COL], q=20, duplicates='drop') 

fig_distribution = go.Figure()
for group in sorted(cust_filtered_df['rank'].unique()):
    bin_count = cust_filtered_df[cust_filtered_df['rank']==group]['Bin'].value_counts().sort_index()
    x = bin_count.index
    x_ticktext = [str(interval) for interval in x]
    y = bin_count.values
    name = str(int(group)) + 'Agreement'
    fig_distribution.add_trace(go.Bar(x=x_ticktext, y=y, name=name))

fig_distribution.update_layout(
    title='Event Value Distribution by Quantile',
    xaxis_title='Event Value (Bin)',
    yaxis_title='Event Count',
    title_x=0.5,
    barmode='stack')
file_path = os.path.join(PDF_PATH, f"Event_Value_Distribution_by_Bins_of_Quantile.pdf")
pio.write_image(fig_distribution, file_path)

# Cumulative Event Value 
filtered_df_total_event = filtered_df.sort_values(START_DATE_COL)
rolling_cumsum = filtered_df_total_event.groupby(START_DATE_COL)[OPENING_AMOUNT_COL].sum().expanding().sum()

fig_total_event_value = go.Figure(data=go.Bar(x=filtered_df.sort_values(START_DATE_COL)[START_DATE_COL].unique(), y=rolling_cumsum))
fig_total_event_value.update_layout(
    title='Cumulative Event Value',
    xaxis_title='Date',
    yaxis_title='Cumulative Event Value',
    title_x=0.5
)
file_path = os.path.join(PDF_PATH, f"Cumulative_Event_Value.pdf")
pio.write_image(fig_total_event_value, file_path)

""" # Box Chart by Month By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    box_chart = go.Figure(data=go.Box(x=filtered_df_new[START_DATE_COL].dt.strftime('%Y-%m'), y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True, name='Bar Plot'))
    box_chart.update_layout(
        xaxis_title='Month',
        yaxis_title='Event Value',
        title=f'Average Event Value of Product {product} vs Time',
        title_x=0.5
    )
    box_chart.update_xaxes(dtick="M1", tickformat="%b\n%Y")
    box_chart.add_trace(go.Scatter(x=filtered_df_new.groupby(filtered_df_new[START_DATE_COL].dt.strftime('%Y-%m'))[OPENING_AMOUNT_COL].mean().index, 
                                        y=filtered_df_new.groupby(filtered_df_new[START_DATE_COL].dt.strftime('%Y-%m'))[OPENING_AMOUNT_COL].mean().values,
                                        name='Mean'))
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Month.pdf")
    pio.write_image(box_chart, file_path)

# Box Chart by Year By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    box_chart = go.Figure(data=go.Box(x=filtered_df_new[START_DATE_COL].dt.year, y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True, name='Bar Plot'))
    box_chart.update_layout(
        xaxis_title='Year',
        yaxis_title='Event Value',
        title=f'Average Event Value of Product {product} vs Time',
        title_x=0.5
    )
    box_chart.update_xaxes(dtick="Y1", tickformat="%b\n%Y")
    box_chart.add_trace(go.Scatter(x=filtered_df_new.groupby(filtered_df_new[START_DATE_COL].dt.year)[OPENING_AMOUNT_COL].mean().index, 
                                        y=filtered_df_new.groupby(filtered_df_new[START_DATE_COL].dt.year)[OPENING_AMOUNT_COL].mean().values,
                                        name='Mean'))
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Year.pdf")
    pio.write_image(box_chart, file_path)

# Box Chart by Week By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    week_df = filtered_df_new.sort_values(START_DATE_COL)
    box_chart = go.Figure(data=go.Box(x=week_df[START_DATE_COL].dt.strftime('%Y-%W'), y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True, name='Bar Plot'))
    box_chart.update_layout(
        xaxis_title='Week',
        yaxis_title='Event Value',
        title=f'Average Event Value of Product {product} vs Time',
        title_x=0.5,
        xaxis={'type': 'category'}
    )
    box_chart.update_xaxes(dtick=None, tickformat=None)
    box_chart.add_trace(go.Scatter(x=filtered_df_new.groupby(week_df[START_DATE_COL].dt.strftime('%Y-%W'))[OPENING_AMOUNT_COL].mean().index, 
                                        y=filtered_df_new.groupby(week_df[START_DATE_COL].dt.strftime('%Y-%W'))[OPENING_AMOUNT_COL].mean().values,
                                        name='Mean'))
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Week.pdf")
    pio.write_image(box_chart, file_path)

# Box Chart by Day of Week By Specific Product
day_sequence = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday']
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    fig_day_week = go.Figure(data=go.Box(x=filtered_df_new[START_DATE_COL].dt.day_name(), y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True))
    fig_day_week.update_layout(
        title=f'Average Event Value of Product {product} by Day of Week',
        xaxis_title='Day of Week',
        yaxis_title='Event Value',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': day_sequence}
    )
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Day_of_Week.pdf")
    pio.write_image(fig_day_week, file_path)

for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    events_count_day_week = filtered_df_new[START_DATE_COL].dt.day_name().value_counts().reindex(day_sequence, fill_value=0)
    fig_day_week = go.Figure(data=go.Bar(x=events_count_day_week.index, y= events_count_day_week.values))
    fig_day_week.update_layout(
        title=f'Event Count of Product {product} by Day of Week',
        xaxis_title='Day of Week',
        yaxis_title='Event Count',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': day_sequence}
    )
    file_path = os.path.join(PDF_PATH, f"Event_Count_Product_{product}_by_Day_of_Week.pdf")
    pio.write_image(fig_day_week, file_path)

# Box Chart by Day of Month By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    fig_day_month = go.Figure(data=go.Box(x=filtered_df_new[START_DATE_COL].dt.day, y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True))
    fig_day_month.update_layout(
        title=f'Average Event Value of Product {product} by Day of Month',
        xaxis_title='Day of Month',
        yaxis_title='Event Value',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': [str(i) for i in range(1, 32)]}
    )
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Day_of_Month.pdf")
    pio.write_image(fig_day_month, file_path)

for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    events_count_day_month = filtered_df[START_DATE_COL].dt.day.value_counts().reindex(fill_value=0)
    fig_day_month = go.Figure(data=go.Bar(x=events_count_day_month.index, y= events_count_day_month.values))
    fig_day_month.update_layout(
        title=f'Event Count of Product {product} by Day of Month',
        xaxis_title='Day of Month',
        yaxis_title='Event Count',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': [str(i) for i in range(1, 32)]}
    )
    file_path = os.path.join(PDF_PATH, f"Event_Count_Product_{product}_by_Day_of_Month.pdf")
    pio.write_image(fig_day_month, file_path)

# Box Chart by Month of Year By Specific Product
month_sequence = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    fig_month_year = go.Figure(data=go.Box(x=filtered_df_new[START_DATE_COL].dt.month_name(), y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True))
    fig_month_year.update_layout(
        title=f'Average Event Value of Product {product} by Month of Year',
        xaxis_title='Month',
        yaxis_title='Event Value',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': month_sequence}
    )
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Month_of_Year.pdf")
    pio.write_image(fig_month_year, file_path)

for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    events_count_month_year = filtered_df_new[START_DATE_COL].dt.month_name().value_counts().reindex(month_sequence, fill_value=0)
    fig_month_year = go.Figure(data=go.Bar(x=events_count_month_year.index, y= events_count_month_year.values))
    fig_month_year.update_layout(
        title=f'Event Count of Product {product} by Month of Year',
        xaxis_title='Month',
        yaxis_title='Event Count',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': month_sequence}
    )
    file_path = os.path.join(PDF_PATH, f"Event_Count_Product_{product}_by_Month_of_Year.pdf")
    pio.write_image(fig_month_year, file_path)

# Box Chart by Quarter of Year By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    fig_quarter_year = go.Figure(data=go.Box(x=filtered_df_new[START_DATE_COL].dt.quarter, y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True))
    fig_quarter_year.update_layout(
        title=f'Average Event Value of Product {product} by Quarter of Year',
        xaxis_title='Quarter',
        yaxis_title='Event Value',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': [1, 2, 3, 4]}
    )
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Quarter_of_Year.pdf")
    pio.write_image(fig_quarter_year, file_path)

for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    events_count_quarter_year = filtered_df_new[START_DATE_COL].dt.quarter.value_counts().reindex(fill_value=0)
    fig_quarter_year = go.Figure(data=go.Bar(x=events_count_quarter_year.index, y= events_count_quarter_year.values))
    fig_quarter_year.update_layout(
        title=f'Event Count of Product {product} by Quarter of Year',
        xaxis_title='Quarter',
        yaxis_title='Event Count',
        title_x=0.5,
        xaxis={'categoryorder': 'array', 'categoryarray': [1, 2, 3, 4]}
    )
    file_path = os.path.join(PDF_PATH, f"Event_Count_Product_{product}_by_Quarter_of_Year.pdf")
    pio.write_image(fig_quarter_year, file_path)

# Avg Event Value by Quantile By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    temp = filtered_df_new.groupby(CUST_ID_COL)[OPENING_AMOUNT_COL].mean()
    temp_df = pd.DataFrame(temp)
    temp_df['Bin'] = pd.qcut(temp_df[OPENING_AMOUNT_COL], q=20, duplicates='drop')
    bin_count = temp_df['Bin'].value_counts().sort_index()
    x = bin_count.index
    y = bin_count.values

    x_ticktext = [str(interval) for interval in x]

    avg_value_client_fig = go.Figure(data=[go.Bar(x=x_ticktext, y=y)])
    avg_value_client_fig.update_layout(
        title=f'Average Event Value of Product {product} for Each Client by Quantile',
        xaxis_title='Average Event Value (Bin)',
        yaxis_title='Number of Counts',
        title_x=0.5
    )
    file_path = os.path.join(PDF_PATH, f"Average_Event_Value_Product_{product}_by_Bins_of_Quantile.pdf")
    pio.write_image(avg_value_client_fig, file_path)

# Event Distribution by Bin of Quantile By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    cust_filtered_df = filtered_df_new.sort_values(by=[CUST_ID_COL, START_DATE_COL]).copy()
    cust_filtered_df['rank'] = cust_filtered_df.groupby(CUST_ID_COL)[START_DATE_COL].rank(ascending=True, method='first')
    cust_filtered_df['Bin'] = pd.qcut(cust_filtered_df[OPENING_AMOUNT_COL], q=20, duplicates='drop') 

    fig_distribution = go.Figure()
    for group in sorted(cust_filtered_df['rank'].unique()):
        bin_count = cust_filtered_df[cust_filtered_df['rank']==group]['Bin'].value_counts().sort_index()
        x = bin_count.index
        x_ticktext = [str(interval) for interval in x]
        y = bin_count.values
        name = str(int(group)) + 'Agreement'
        fig_distribution.add_trace(go.Bar(x=x_ticktext, y=y, name=name))

    fig_distribution.update_layout(
        title=f'Event Value Distribution of Product {product} by Quantile',
        xaxis_title='Event Value (Bin)',
        yaxis_title='Event Count',
        title_x=0.5,
        barmode='stack')
    file_path = os.path.join(PDF_PATH, f"Event_Value_Distribution_Product_{product}_by_Bins_of_Quantile.pdf")
    pio.write_image(fig_distribution, file_path)

# Cumulative Event Value By Specific Product
for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    filtered_df_total_event = filtered_df_new.sort_values(START_DATE_COL)
    rolling_cumsum = filtered_df_total_event.groupby(START_DATE_COL)[OPENING_AMOUNT_COL].sum().expanding().sum()

    fig_total_event_value = go.Figure(data=go.Bar(x=filtered_df.sort_values(START_DATE_COL)[START_DATE_COL].unique(), y=rolling_cumsum))
    fig_total_event_value.update_layout(
        title=f'Cumulative Event Value of Product {product}',
        xaxis_title='Date',
        yaxis_title='Cumulative Event Value',
        title_x=0.5
    )
    file_path = os.path.join(PDF_PATH, f"Cumulative_Event_Value_Product_{product}.pdf")
    pio.write_image(fig_total_event_value, file_path) """

' # Box Chart by Month By Specific Product\nfor product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):\n    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]\n    box_chart = go.Figure(data=go.Box(x=filtered_df_new[START_DATE_COL].dt.strftime(\'%Y-%m\'), y=filtered_df_new[OPENING_AMOUNT_COL], boxmean=True, name=\'Bar Plot\'))\n    box_chart.update_layout(\n        xaxis_title=\'Month\',\n        yaxis_title=\'Event Value\',\n        title=f\'Average Event Value of Product {product} vs Time\',\n        title_x=0.5\n    )\n    box_chart.update_xaxes(dtick="M1", tickformat="%b\n%Y")\n    box_chart.add_trace(go.Scatter(x=filtered_df_new.groupby(filtered_df_new[START_DATE_COL].dt.strftime(\'%Y-%m\'))[OPENING_AMOUNT_COL].mean().index, \n                                        y=filtered_df_new.groupby(filtered_df_new[START_DATE_COL].dt.strftime(\'%Y-%m\'))[OPENING_AMOUNT_COL].mean().values,\n                                        name=\'Mean\'))\n    file_path = os.p

#### Time Difference between Events

In [8]:
filtered_df_cust = filtered_df.copy().sort_values([CUST_ID_COL, START_DATE_COL], ascending=[True, True])
filtered_df_cust['time_diff'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].diff()
filtered_df_cust['time_diff'] = filtered_df_cust['time_diff'] /np.timedelta64(1,"D")
nan_count = filtered_df_cust.drop_duplicates(subset=CUST_ID_COL, keep=False)['time_diff'].isna().sum() #  count only customers with one CD

# Average time diff between events for each client
histogram_trace = go.Histogram(x=filtered_df_cust['time_diff'], name='Time Difference')
nan_count_trace = go.Bar(x=['-1'], y=[nan_count], name='NaN Count', visible='legendonly')
client_avg_time_fig = go.Figure(data=[histogram_trace, nan_count_trace])

client_avg_time_fig.update_layout(
    title='Average Time Difference Between Events for Each Client',
    xaxis_title='Day Difference',
    yaxis_title='Number of Counts',
    title_x=0.5
)
file_path = os.path.join(PDF_PATH, f"Average_Time_Difference_Between_Events.pdf")
pio.write_image(client_avg_time_fig, file_path)

# Max Time Difference
histogram_trace = go.Histogram(x=filtered_df_cust.groupby(CUST_ID_COL)['time_diff'].max(), name='Max Time Difference')
nan_count_trace = go.Bar(x=['-1'], y=[nan_count], name='NaN Count', visible='legendonly')
client_maxmin_time_fig = go.Figure(data=[histogram_trace, nan_count_trace])
client_maxmin_time_fig.update_layout(
title='Max Time Difference Between Events for Each Client',
xaxis_title='Day Difference',
yaxis_title='Number of Counts',
title_x=0.5)
file_path = os.path.join(PDF_PATH, f"Max_Time_Difference_Between_Events.pdf")
pio.write_image(client_maxmin_time_fig, file_path)

# Min Time Difference
histogram_trace = go.Histogram(x=filtered_df_cust.groupby(CUST_ID_COL)['time_diff'].min(), name='Min Time Difference')
nan_count_trace = go.Bar(x=['-1'], y=[nan_count], name='NaN Count', visible='legendonly')
client_maxmin_time_fig = go.Figure(data=[histogram_trace, nan_count_trace])
client_maxmin_time_fig.update_layout(
title='Min Time Difference Between Events for Each Client',
xaxis_title='Day Difference',
yaxis_title='Number of Counts',
title_x=0.5)
file_path = os.path.join(PDF_PATH, f"Min_Time_Difference_Between_Events.pdf")
pio.write_image(client_maxmin_time_fig, file_path)

""" for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):
    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]
    filtered_df_cust = filtered_df_new.copy().sort_values([CUST_ID_COL, START_DATE_COL], ascending=[True, True])
    filtered_df_cust['time_diff'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].diff()
    filtered_df_cust['time_diff'] = filtered_df_cust['time_diff'] /np.timedelta64(1,"D")
    nan_count = filtered_df_cust.drop_duplicates(subset=CUST_ID_COL, keep=False)['time_diff'].isna().sum() #  count only customers with one CD

    # Average time diff between events for each client
    histogram_trace = go.Histogram(x=filtered_df_cust['time_diff'], name='Time Difference')
    nan_count_trace = go.Bar(x=['-1'], y=[nan_count], name='NaN Count', visible='legendonly')
    client_avg_time_fig = go.Figure(data=[histogram_trace, nan_count_trace])

    client_avg_time_fig.update_layout(
        title=f'Average Time Difference Between Events of Product {product} for Each Client',
        xaxis_title='Day Difference',
        yaxis_title='Number of Counts',
        title_x=0.5
    )
    file_path = os.path.join(PDF_PATH, f"Average_Time_Difference_Between_Events_Product_{product}.pdf")
    pio.write_image(client_avg_time_fig, file_path)

    # Max Time Difference
    histogram_trace = go.Histogram(x=filtered_df_cust.groupby(CUST_ID_COL)['time_diff'].max(), name='Max Time Difference')
    nan_count_trace = go.Bar(x=['-1'], y=[nan_count], name='NaN Count', visible='legendonly')
    client_maxmin_time_fig = go.Figure(data=[histogram_trace, nan_count_trace])
    client_maxmin_time_fig.update_layout(
    title=f'Max Time Difference Between Events of Product {product} for Each Client',
    xaxis_title='Day Difference',
    yaxis_title='Number of Counts',
    title_x=0.5)
    file_path = os.path.join(PDF_PATH, f"Max_Time_Difference_Between_Events_Product_{product}.pdf")
    pio.write_image(client_maxmin_time_fig, file_path)

    # Min Time Difference
    histogram_trace = go.Histogram(x=filtered_df_cust.groupby(CUST_ID_COL)['time_diff'].min(), name='Min Time Difference')
    nan_count_trace = go.Bar(x=['-1'], y=[nan_count], name='NaN Count', visible='legendonly')
    client_maxmin_time_fig = go.Figure(data=[histogram_trace, nan_count_trace])
    client_maxmin_time_fig.update_layout(
    title=f'Min Time Difference Between Events of Product {product} for Each Client',
    xaxis_title='Day Difference',
    yaxis_title='Number of Counts',
    title_x=0.5)
    file_path = os.path.join(PDF_PATH, f"Min_Time_Difference_Between_Events_Product_{product}.pdf")
    pio.write_image(client_maxmin_time_fig, file_path) """

' for product in tqdm(sorted(filtered_df[PRODUCT_TYPE].unique())):\n    filtered_df_new = filtered_df[filtered_df[PRODUCT_TYPE] == product]\n    filtered_df_cust = filtered_df_new.copy().sort_values([CUST_ID_COL, START_DATE_COL], ascending=[True, True])\n    filtered_df_cust[\'time_diff\'] = filtered_df_cust.groupby(CUST_ID_COL)[START_DATE_COL].diff()\n    filtered_df_cust[\'time_diff\'] = filtered_df_cust[\'time_diff\'] /np.timedelta64(1,"D")\n    nan_count = filtered_df_cust.drop_duplicates(subset=CUST_ID_COL, keep=False)[\'time_diff\'].isna().sum() #  count only customers with one CD\n\n    # Average time diff between events for each client\n    histogram_trace = go.Histogram(x=filtered_df_cust[\'time_diff\'], name=\'Time Difference\')\n    nan_count_trace = go.Bar(x=[\'-1\'], y=[nan_count], name=\'NaN Count\', visible=\'legendonly\')\n    client_avg_time_fig = go.Figure(data=[histogram_trace, nan_count_trace])\n\n    client_avg_time_fig.update_layout(\n        title=f\'Average Time