In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import openpyxl
from openpyxl.drawing.image import Image
from openpyxl import Workbook
import seaborn as sns
import xlsxwriter
from io import BytesIO
from tabulate import tabulate
import datetime as dt
import os

In [2]:
data = pd.read_excel('Weekly Compliance_CPR 00-00-Item.xlsx')
LLE = pd.read_excel('LLE.xlsx')
SAP =pd.read_csv('SAP_DATA_LOOKUP_202408271113.csv')
df_cleaned = data
active_SAP = SAP[SAP['TEXT_EMP_STATUS'] == 'Active']

In [3]:
columns_to_strip = ['LLE', 'E1', 'E2', 'E3', 'E4']
# Stripping trailing and leading spaces from the specified columns
LLE[columns_to_strip] = LLE[columns_to_strip].apply(lambda col: col.str.strip())

active_SAP['Full Name'] = active_SAP.apply(
    lambda row: f"{row['LAST_NAME']}, {row['FIRST_NAME']}" + 
                (f" {row['MIDDLE_NAME'][0]}" if pd.notna(row['MIDDLE_NAME']) else ""),
    axis=1
).str.strip()  # Removes any trailing spaces


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  active_SAP['Full Name'] = active_SAP.apply(


In [4]:
df_cleaned['Perner'] = df_cleaned['Perner'].astype(float).astype(int)
LLE['CUST_PERNER'] = LLE['CUST_PERNER'].astype(float).astype(int)
SAP['PERNR'] = SAP['PERNR'].astype(float).astype(int)

In [5]:
merged_1st_step = df_cleaned.merge(LLE, left_on='Perner', right_on='CUST_PERNER', how='left')

In [6]:
merged_2nd_step = merged_1st_step.merge(
    active_SAP[['INTERNET_MAIL_ADDRESS02', 'Full Name']].rename(
        columns={'INTERNET_MAIL_ADDRESS02': 'LLE_EMAIL', 'Full Name': 'LLE_FULL_NAME'}
    ),
    left_on='LLE', 
    right_on='LLE_FULL_NAME', 
    how='left'
)

In [7]:
merged_final = merged_2nd_step.merge(
    active_SAP[['INTERNET_MAIL_ADDRESS02', 'Full Name']].rename(
        columns={'INTERNET_MAIL_ADDRESS02': 'E2_EMAIL', 'Full Name': 'E2_FULL_NAME'}
    ),
    left_on='E2', 
    right_on='E2_FULL_NAME', 
    how='left'
)

In [8]:
#merged_final['Required By Date'] = pd.to_datetime(merged_final['Required By Date'])
#merged_final['Completion Date'] = pd.to_datetime(merged_final['Completion Date'])

In [9]:
def sanitize_sheet_name(name):
    """Truncate the sheet name to be 31 characters or fewer."""
    return name[:31]

def reorder_status(data):
    """Reorder the index of a series or dataframe based on the desired order of Learner Item Status."""
    status_order = ['Current', 'Coming Due', 'Overdue']
    
    if isinstance(data, pd.Series):
        return data.reindex(status_order, fill_value=0)
    elif isinstance(data, pd.DataFrame):
        return data.reindex(columns=status_order)
    return data

def create_combined_chart(bar_data, pie_data, title, color_palette, breakdown_table):
    """Create a combined chart with a bar plot, pie chart, and breakdown table."""
    bar_data = reorder_status(bar_data)
    pie_data = reorder_status(pie_data)
    
    # Reorder the breakdown table without filling missing categories with zeros
    breakdown_table = breakdown_table.set_index('Learner Item Status').reindex(['Current', 'Coming Due', 'Overdue']).fillna(0).reset_index()

    fig, ax = plt.subplots(1, 2, figsize=(14, 7))
    
    # Bar Chart
    sns.barplot(x=bar_data.index, y=bar_data.values, palette=color_palette, ax=ax[0])
    ax[0].set_title(title, fontsize=16)
    ax[0].set_ylabel('Count')
    ax[0].set_xlabel('Learner Item Status')
    ax[0].set_ylim(0, max(bar_data.values) * 1.2)  # Normalize the bar height
    for i, v in enumerate(bar_data.values):
        ax[0].text(i, v + 0.05 * max(bar_data.values), str(v), ha='center')

    # Pie Chart
    def custom_autopct(pct):
        return ('%1.1f%%' % pct) if pct < 100 else ''  # Don't show label for 100%

    wedges, texts, autotexts = ax[1].pie(pie_data.values, labels=pie_data.index, autopct=custom_autopct, colors=color_palette)
    for text in texts + autotexts:
        text.set_fontsize(10)
    ax[1].set_title('Percentage Breakdown', fontsize=16)

    # Add Breakdown Table using tabulate
    table_str = tabulate(breakdown_table, headers='keys', tablefmt='grid', showindex=False)
    fig.text(0.5, -0.25, table_str, ha='center', fontsize=12, wrap=True)

    # Save the combined chart to a BytesIO object
    chart_stream = BytesIO()
    plt.savefig(chart_stream, format='png', bbox_inches='tight')
    plt.close()
    chart_stream.seek(0)
    return chart_stream

def create_org_breakdown_chart(data, title, color_palette):
    """Create a side-by-side bar chart for Org Breakdown with E2 on the x-axis and percentage breakdown."""
    if data.empty:
        return None  # Skip if data is empty

    data = reorder_status(data)
    
    # Calculate the percentages for each status
    total_counts = data.sum(axis=1)
    percentages = data.divide(total_counts, axis=0) * 100

    num_vps = len(data.index)
    
    # Dynamically increase figure size to accommodate more VPs
    fig_width = max(12, num_vps * 0.7)  # Dynamic figure width
    fig, ax = plt.subplots(figsize=(fig_width, 8))  # Adjust figure width dynamically
    
    # Plot the bar chart
    bar_width = 0.8  # Keep the bar width consistent
    data.plot(kind='bar', color=color_palette, ax=ax, width=bar_width)
    
    ax.set_title(title, fontsize=16)
    ax.set_ylabel('Count')
    ax.set_xlabel('E2 (VPs)')

    # Rotate x-axis labels to prevent overlap
    plt.xticks(rotation=45, ha='right', fontsize=10)

    # Add only percentages on the bars (reduce font size, remove decimal points)
    for i, container in enumerate(ax.containers):
        for j, bar in enumerate(container):
            percent = percentages.iloc[j, i]
            if bar.get_height() > 0:  # Only add labels for non-zero bars
                # Adjust label positioning based on the height of the bar
                ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.03 * total_counts.max(), f'{int(percent)}%', ha='center', va='bottom', fontsize=8)

    ax.legend(title='Learner Item Status', bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Ensure the chart stays within the bounds
    ax.set_ylim(0, total_counts.max() * 1.2)

    # Save the org breakdown chart to a BytesIO object
    chart_stream = BytesIO()
    plt.savefig(chart_stream, format='png', bbox_inches='tight')
    plt.close()
    chart_stream.seek(0)
    return chart_stream

def format_excel_sheet(workbook, worksheet, df, start_row=0, start_col=0):
    """Format an Excel sheet with pandas DataFrame, handling NaT and NaN."""
    header_format = workbook.add_format({'bold': True, 'bg_color': '#FFC0CB', 'border': 1})
    cell_format = workbook.add_format({'border': 1})
    
    # Convert all datetime columns to strings
    for col in df.columns:
        if pd.api.types.is_datetime64_any_dtype(df[col]):
            df[col] = df[col].astype(str).replace('NaT', '')

    # Replace any remaining NaN values with empty strings
    df = df.fillna('')

    for col_num, value in enumerate(df.columns.values):
        worksheet.write(start_row, start_col + col_num, value, header_format)
        
    for row_num, row_data in enumerate(df.values):
        for col_num, cell_value in enumerate(row_data):
            worksheet.write(start_row + 1 + row_num, start_col + col_num, cell_value, cell_format)

def generate_reports(merged_final):
    # List of unique E2 executives
    executives = merged_final['E2_FULL_NAME'].unique()
    
    # Define the final color palette
    color_palette = ['#4CAF50', '#FFC107', '#F44336']  # Current, Coming Due, Overdue

    # Report Path Directory
    save_directory = r'J:\Data\RMSA Analysis\0 - Team Working Files\Reg Compliance W VP Breakdown'
    
    # Ensure the directory exists
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    
    # Iterate over each executive to create their specific report
    for exec_name in executives:
        exec_data = merged_final[merged_final['E2_FULL_NAME'] == exec_name]
        
        # Skip if exec_data is empty
        if exec_data.empty:
            print(f"No data found for executive: {exec_name}. Skipping report generation.")
            continue
        
        ba_text = exec_data['BA Text'].iloc[0]  # Assuming each exec belongs to one BA
        e3_name = exec_data['E3'].iloc[0]  # Using the correct column name for E3
        
        # Define the path to save the Excel file
        save_path = os.path.join(save_directory, f'{exec_name}_Report.xlsx')
        
        # Create an Excel file for each executive
        with pd.ExcelWriter(save_path, engine='xlsxwriter') as writer:
            workbook = writer.book
            
            # Tab 1: BA Overview
            ba_data = merged_final[merged_final['BA Text'] == ba_text]
            ba_summary = ba_data['Learner Item Status'].value_counts()
            ba_pie_data = ba_summary / ba_summary.sum()
            ba_breakdown_table = pd.DataFrame({
                'Learner Item Status': ba_summary.index,
                'Count': ba_summary.values,
                'Percentage': (ba_summary.values / ba_summary.sum() * 100).round(1)
            })
            ba_chart = create_combined_chart(ba_summary, ba_pie_data, f'{ba_text} Overview', color_palette, ba_breakdown_table)
            
            # Create the BA Overview tab
            sheet_name = sanitize_sheet_name(f'{ba_text} Overview')
            worksheet = workbook.add_worksheet(sheet_name)
            worksheet.insert_image('A1', '', {'image_data': ba_chart})
            
            # Tab 2: Executive-specific overview
            exec_summary = exec_data['Learner Item Status'].value_counts()
            exec_pie_data = exec_summary / exec_summary.sum()
            exec_breakdown_table = pd.DataFrame({
                'Learner Item Status': exec_summary.index,
                'Count': exec_summary.values,
                'Percentage': (exec_summary.values / exec_summary.sum() * 100).round(1)
            })
            exec_chart = create_combined_chart(exec_summary, exec_pie_data, f'{exec_name} Overview', color_palette, exec_breakdown_table)
            
            # Create the Executive Overview tab
            exec_overview_tab = sanitize_sheet_name(f'{exec_name} Overview')
            worksheet = workbook.add_worksheet(exec_overview_tab)
            worksheet.insert_image('A1', '', {'image_data': exec_chart})
            
            # Tab 3: VP Breakdown (new tab)
            vp_data = merged_final[merged_final['E3'] == e3_name].groupby(['E2_FULL_NAME', 'Learner Item Status']).size().unstack(fill_value=0)
            
            # Check if there is data for the E3 (VPs under the E3)
            if not vp_data.empty:
                vp_chart = create_org_breakdown_chart(vp_data, 'VP Breakdown', color_palette)
                
                # Only create the worksheet if the chart exists (i.e., if vp_chart is not None)
                if vp_chart:
                    sheet_name = sanitize_sheet_name('VP Breakdown')
                    worksheet = workbook.add_worksheet(sheet_name)
                    worksheet.insert_image('A1', '', {'image_data': vp_chart})
                    format_excel_sheet(workbook, worksheet, vp_data.reset_index(), start_row=20)
            
            # Tab 4: Org Breakdown
            lle_summary = exec_data.groupby(['LLE', 'Learner Item Status']).size().unstack(fill_value=0)
            lle_chart = create_org_breakdown_chart(lle_summary, f'{exec_name} Org Breakdown', color_palette)
            
            sheet_name = sanitize_sheet_name('Org Breakdown')
            worksheet = workbook.add_worksheet(sheet_name)
            worksheet.insert_image('A1', '', {'image_data': lle_chart})
            format_excel_sheet(workbook, worksheet, lle_summary.reset_index(), start_row=20)
            
            # Tab 5: Org_Overdue
            overdue_data = exec_data[exec_data['Learner Item Status'] == 'Overdue']
            if not overdue_data.empty:
                sheet_name = sanitize_sheet_name('Org Overdue')
                overdue_data.to_excel(writer, sheet_name=sheet_name, index=False)
            
            # Tab 6: Org_Coming_Due
            coming_due_data = exec_data[exec_data['Learner Item Status'] == 'Coming Due']
            if not coming_due_data.empty:
                sheet_name = sanitize_sheet_name('Org Coming Due')
                coming_due_data.to_excel(writer, sheet_name=sheet_name, index=False)
            
            # Tab 7: Org_Current
            current_data = exec_data[exec_data['Learner Item Status'] == 'Current']
            if not current_data.empty:
                sheet_name = sanitize_sheet_name('Org Current')
                current_data.to_excel(writer, sheet_name=sheet_name, index=False)

    print(f"Reports generated successfully and saved to {save_directory}.")


In [10]:
generate_reports(merged_final)

No data found for executive: nan. Skipping report generation.
Reports generated successfully and saved to J:\Data\RMSA Analysis\0 - Team Working Files\Reg Compliance W VP Breakdown.


In [11]:
merged_final

Unnamed: 0,Last Name,Legal Name,Perner,REQ TYPE,CONTENT TYPE,Item Type,Item ID,Item Title,Required By Date,Completion Date,...,CUST_PERNER,LLE,E1,E2,E3,E4,LLE_EMAIL,LLE_FULL_NAME,E1_EMAIL,E2_FULL_NAME
0,Aaron,Richard,888876,SAFETY,BLEED,COURSE,4080000,WDW_SAFETY_LIFE BLEEDING CONTROL,2025-12-10,2023-12-11,...,888876.0,"Arrington, LeRoy","Arrington, LeRoy","Ball, Charles A","Bisienere, Maribeth N",,LEROY.ARRINGTON@DISNEY.COM,"Arrington, LeRoy",Adam.Ball@disney.com,"Ball, Charles A"
1,Aaron,Richard,888876,SAFETY,CPR/AED,COURSE,4620979,WDW_REG-SAFETY GLOBAL_LIFE CPR,2025-12-10,2023-12-11,...,888876.0,"Arrington, LeRoy","Arrington, LeRoy","Ball, Charles A","Bisienere, Maribeth N",,LEROY.ARRINGTON@DISNEY.COM,"Arrington, LeRoy",Adam.Ball@disney.com,"Ball, Charles A"
2,Abair,Gary,1363105,SAFETY,BLEED,COURSE,4080000,WDW_SAFETY_LIFE BLEEDING CONTROL,2026-03-20,2024-03-20,...,1363105.0,"Geeslin, Robert C","Geeslin, Robert C","Reid, Linda J",,,BOB.GEESLIN@DISNEY.COM,"Geeslin, Robert C",LINDA.REID@DISNEY.COM,"Reid, Linda J"
3,Abair,Gary,1363105,SAFETY,CPR/AED,COURSE,4620979,WDW_REG-SAFETY GLOBAL_LIFE CPR,2026-03-20,2024-03-20,...,1363105.0,"Geeslin, Robert C","Geeslin, Robert C","Reid, Linda J",,,BOB.GEESLIN@DISNEY.COM,"Geeslin, Robert C",LINDA.REID@DISNEY.COM,"Reid, Linda J"
4,Abalos-Moreo,Velvetann,92042863,SAFETY,BLEED,COURSE,4080000,WDW_SAFETY_LIFE BLEEDING CONTROL,2024-12-14,2022-12-15,...,92042863.0,"Armor, Alison E",,"Armor, Alison E","Bisienere, Maribeth N",,Alison.Armor@disney.com,"Armor, Alison E",Alison.Armor@disney.com,"Armor, Alison E"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9242,Zumwalt,Kayla,1440210,SAFETY,CPR/AED,COURSE,4620979,WDW_REG-SAFETY GLOBAL_LIFE CPR,2024-12-29,2022-12-30,...,1440210.0,"Dubiel, Susan R","Dubiel, Susan R","Riles, Sarah F","Kirk, Jason T",,SUSAN.R.DUBIEL@DISNEY.COM,"Dubiel, Susan R",SARAH.RILES@DISNEY.COM,"Riles, Sarah F"
9243,Zuniga,Marisa,92087890,SAFETY,BLEED,COURSE,4080000,WDW_SAFETY_LIFE BLEEDING CONTROL,2024-11-16,2022-11-17,...,92087890.0,"Geeslin, Robert C","Geeslin, Robert C","Reid, Linda J",,,BOB.GEESLIN@DISNEY.COM,"Geeslin, Robert C",LINDA.REID@DISNEY.COM,"Reid, Linda J"
9244,Zuniga,Marisa,92087890,SAFETY,CPR/AED,COURSE,4620979,WDW_REG-SAFETY GLOBAL_LIFE CPR,2024-11-16,2022-11-17,...,92087890.0,"Geeslin, Robert C","Geeslin, Robert C","Reid, Linda J",,,BOB.GEESLIN@DISNEY.COM,"Geeslin, Robert C",LINDA.REID@DISNEY.COM,"Reid, Linda J"
9245,Zwickl,Jason,1529199,SAFETY,BLEED,COURSE,4080000,WDW_SAFETY_LIFE BLEEDING CONTROL,2023-09-01,NaT,...,1529199.0,"Reid, Linda J",,"Reid, Linda J",,,LINDA.REID@DISNEY.COM,"Reid, Linda J",LINDA.REID@DISNEY.COM,"Reid, Linda J"
