In [19]:
import pandas as pd
import numpy as np
import datetime
import os
import matplotlib.pyplot as plt

#════════════════════════════════════════════════════════════════════════╗
#                          __CHANGE_HERE__                               ║
model_input_directory = 'C:/Users/aaron.eades/OneDrive - Liquid Environmental Solutions/Documents/Data Analysis' # ACCOUNT # and NSD
actual_input_directory = 'C:/Users/aaron.eades/OneDrive - Liquid Environmental Solutions/Documents/Data Analysis' 
model_file_name = 'TALOS021925DALOKC'
actual_file_name = 'TALOS021925DALOKC_output'
minimum_frequency = 6
#                                                                        ║
#════════════════════════════════════════════════════════════════════════╝

In [20]:
def monday_column(df, date_column):
    def get_monday(date):
        if pd.isna(date):
            return np.nan  
        date = pd.to_datetime(date)
        return date - datetime.timedelta(days=date.weekday())    
    df['Monday_Date'] = df[date_column].apply(get_monday)
    return df

def calculate_adjusted_model_nsd_date(df, model_nsd_date_col, frequency_col):
    current_date = pd.to_datetime(datetime.datetime.now().strftime("%Y-%m-%d"))
    def adjust_date(row):
        model_nsd_date = pd.to_datetime(row[model_nsd_date_col])
        frequency = row[frequency_col]
        if pd.isna(frequency):
            frequency = 0 
        if model_nsd_date > current_date:
            return model_nsd_date         
        days_to_add = frequency * 7 
        adjusted_date = model_nsd_date + pd.Timedelta(days=days_to_add)
        return adjusted_date    
    df['adjusted_model_nsd_date'] = df.apply(adjust_date, axis=1)
    return df

def talos(df):
    
    for index, row in df.iterrows():
        frequency = row['FREQ']
        adjusted_model_nsd_date = pd.to_datetime(row['adjusted_model_nsd_date'], errors='coerce')
        actual_nsd_monday = pd.to_datetime(row['Monday_Date'], errors='coerce')
        actual_nsd = pd.to_datetime(row['NSD'], errors='coerce')
        if frequency >= minimum_frequency:
            monday_difference = (actual_nsd_monday - adjusted_model_nsd_date).days
            decay_in_days = (actual_nsd - adjusted_model_nsd_date).days
            df.at[index, 'monday_difference'] = monday_difference
            df.at[index, 'decay_in_days'] = decay_in_days             
            if -14 <= monday_difference <= 14:
                df.at[index, 'stabilizing_nsd'] = row['adjusted_model_nsd_date'] 
    return df

def decay_summary(df):
    summary = df.groupby(['Site', 'LOB']).agg(
        Total=pd.NamedAgg(column='ACCOUNT #', aggfunc='count'),
        Controlled=pd.NamedAgg(column='decay_in_days', aggfunc=lambda x: (x.notna()).sum()),
        Beyond_Control=pd.NamedAgg(column='stabilizing_nsd', aggfunc=lambda x: x.isna().sum()),
    ).reset_index()    
    summary['% Controlled'] = ((summary['Total'] - summary['Beyond_Control']) / summary['Total'] * 100).round(2).astype(str) + ' %'
    summary['% Beyond Control'] = (summary['Beyond_Control'] / summary['Total'] * 100).round(2).astype(str) + ' %'    
    return summary

def controlled_decay(result):
    decay_control_df = result[result['stabilizing_nsd'].notna()]
    decay_counts = decay_control_df.groupby(['Site', 'LOB']).agg(
        **{
            "-2": pd.NamedAgg(column='decay_in_days', aggfunc=lambda x: (x == -2).sum()),
            "-1": pd.NamedAgg(column='decay_in_days', aggfunc=lambda x: (x == -1).sum()),
            "0": pd.NamedAgg(column='decay_in_days', aggfunc=lambda x: (x == 0).sum()),
            "1": pd.NamedAgg(column='decay_in_days', aggfunc=lambda x: (x == 1).sum()),
            "2": pd.NamedAgg(column='decay_in_days', aggfunc=lambda x: (x == 2).sum()),
        }
    ).reset_index()

    decay_counts['Total'] = decay_counts[["-2", "-1", "0", "1", "2"]].sum(axis=1)
    grand_total = decay_counts[["-2", "-1", "0", "1", "2"]].sum().sum()
    decay_counts['Total (%)'] = ((decay_counts[["-2", "-1", "0", "1", "2"]].sum(axis=1) / grand_total) * 100).round(2).astype(str) + ' %'

    grand_total_row = pd.DataFrame({
        'Site': ['Grand Total'],
        'LOB': [''],
        '-2': [decay_counts['-2'].sum()],
        '-1': [decay_counts['-1'].sum()],
        '0': [decay_counts['0'].sum()],
        '1': [decay_counts['1'].sum()],
        '2': [decay_counts['2'].sum()],
        'Total': [decay_counts['Total'].sum()],
        'Total (%)': [100.00]
    })

    decay_counts = pd.concat([decay_counts, grand_total_row], ignore_index=True)
    return decay_counts

In [21]:
model_df = pd.read_excel(os.path.join(model_input_directory, f'{model_file_name}.xlsx'))
actual_df = pd.read_excel(os.path.join(actual_input_directory, f'{actual_file_name}.xlsx'))
model_df['ACCOUNT #'] = model_df['ACCOUNT #'].astype(str)
actual_df['ACCOUNT #'] = actual_df['ACCOUNT #'].astype(str)
model_df.set_index('ACCOUNT #', inplace=True)
actual_df.set_index('ACCOUNT #', inplace=True)
df = actual_df.reset_index().merge(model_df.reset_index(), on='ACCOUNT #', how='left')
df = df.drop([col for col in df.columns if col.endswith('_x')], axis=1, errors='ignore')
df.columns = df.columns.str.replace('_y', '')


df = monday_column(df, 'NSD')
df = calculate_adjusted_model_nsd_date(df, 'Model_NSD_Date', 'FREQ')
df['Model_NSD_Date'] = pd.to_datetime(df['Model_NSD_Date'], errors='coerce')
df['Model_NSD_Date'] = df['Model_NSD_Date'].dt.strftime("%Y-%m-%d")  

result = talos(df)
result['stabilizing_nsd'] = pd.to_datetime(result['stabilizing_nsd'], errors='coerce')
result['stabilizing_nsd'] = result['stabilizing_nsd'].dt.strftime("%Y-%m-%d")  
result['LOB'] = result['LOB'].replace({1: 'GRTS', 2: 'OWS', 3: 'GRIT', 4: 'UCO'})

columns_to_keep = ['ACCOUNT #', 
                   'Cluster ID', 
                   'FREQ', 
                   'Site',
                   'LOB',
                   'Model_NSD_Date',  
                   'adjusted_model_nsd_date', 
                   'NSD',
                   'Monday_Date',
                   'monday_difference',
                   'decay_in_days',
                   'stabilizing_nsd',
                   'LATITUDE', 
                   'LONGITUDE']
result = result[columns_to_keep]

decay_control = result[(result['stabilizing_nsd'].notna()) & (result['monday_difference'] != 0)]
decay_beyond_control = result[(result['monday_difference'].notna()) & (result['stabilizing_nsd'].isna())]

PermissionError: [Errno 13] Permission denied: 'C:/Users/aaron.eades/OneDrive - Liquid Environmental Solutions/Documents/Data Analysis\\TALOS021925DALOKC.xlsx'

In [None]:
decay_summary = decay_summary(result)
print("Decay Analysis :")
print(decay_summary.to_string(index=False))

controlled_decay_analysis = controlled_decay(result)
print("\nControlled Decay Analysis :")
print(controlled_decay_analysis.to_string(index=False))

Decay Analysis :
Site  LOB  Total  Controlled  Beyond_Control % Controlled % Beyond Control
 HOU GRTS   4017        3701             949      76.38 %          23.62 %

Controlled Decay Analysis :
       Site  LOB  -2  -1  0   1   2  Total Total (%)
        HOU GRTS  26   5 90 112 107    340   100.0 %
Grand Total       26   5 90 112 107    340     100.0


In [None]:
# output_file = os.path.join(model_input_directory, f'output_{'decay_control'}.csv')
# decay_control.to_csv(output_file, index=False)
# print(f"File saved to: {output_file}")

# output_file = os.path.join(model_input_directory, f'output_{'decay_beyond_control'}.csv')
# decay_beyond_control.to_csv(output_file, index=False)
# print(f"File saved to: {output_file}")