In [1]:
import pandas as pd
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.styles import Font
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
import re
import os

In [2]:
import warnings
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl')


## Define helper functions


In [3]:
def clean_column_names(df):
    return df.rename(columns=lambda x: x.strip() if isinstance(x, str) else x)

def auto_adjust_column_width(worksheet):
    for column in worksheet.columns:
        max_length = 0
        column_letter = get_column_letter(column[0].column)
        for cell in column:
            try:
                if len(str(cell.value)) > max_length:
                    max_length = len(cell.value)
            except:
                pass
        adjusted_width = (max_length + 2) * 1.2
        worksheet.column_dimensions[column_letter].width = adjusted_width

def format_as_table(worksheet, data_range, style='TableStyleMedium9'):
    table_name = re.sub(r'\W+', '', worksheet.title)
    table_name = f"Table_{table_name}"
    
    table = Table(displayName=table_name, ref=data_range)
    style = TableStyleInfo(name=style, showFirstColumn=False,
                           showLastColumn=False, showRowStripes=True, showColumnStripes=False)
    table.tableStyleInfo = style
    
    worksheet.add_table(table)

def add_total_row(worksheet):
    last_row = worksheet.max_row
    last_col = worksheet.max_column
    
    # Find the 'Amount' column
    amount_col = None
    for col in range(1, last_col + 1):
        if worksheet.cell(row=1, column=col).value == 'Amount':
            amount_col = col
            break
    
    if amount_col is not None:
        # Add 'Total' in the first column of the last row
        worksheet.cell(row=last_row + 1, column=1, value='Total')
        
        # Add sum formula in the 'Amount' column
        sum_formula = f'=SUM({get_column_letter(amount_col)}2:{get_column_letter(amount_col)}{last_row})'
        worksheet.cell(row=last_row + 1, column=amount_col, value=sum_formula)
        
        # Make the total row bold
        for col in range(1, last_col + 1):
            worksheet.cell(row=last_row + 1, column=col).font = Font(bold=True)


In [4]:
# Read the Excel files
file1_path = os.path.join("AR_Files", "AR_updated.xlsx")
file2_path = os.path.join("AR_Files", "PC_Overview_AR.xlsx")
new_file_path = os.path.join("AR_Files", "AR_Analysis.xlsx")

# Read PO Amount By Category sheet
df1 = pd.read_excel(file1_path, sheet_name="last_updated")

# Read AR & AP Real 0804 sheet
df2 = pd.read_excel(file2_path, sheet_name="AR & AP Real 0804")

df2 = clean_column_names(df2)

In [5]:
# Convert 'PO #' to string in both dataframes
df1['PO #'] = df1['PO #'].astype(str)
df2['tsmc PO #'] = df2['tsmc PO #'].astype(str)

# Rename columns in df2 to match df1
df2 = df2.rename(columns={
    'tsmc PO #': 'PO #',
    'System': 'Main Page',  # Note the space at the end to match df1
    'PO Amount': 'Total Contract $'  # New mapping
})

# Filter df2 to include only 'Base Build' type
df2_filtered = df2[df2['TSMC Depart'] == '新工']

# Identify new PO numbers
new_pos = set(df2_filtered['PO #']) - set(df1['PO #'])

# Create a new dataframe for updated data
df_updated = df1.copy()

# Create a dataframe for new entries
df_new_entries = pd.DataFrame(columns=df_updated.columns)

# Update existing entries and collect new ones
for index, row in df2_filtered.iterrows():
    if row['PO #'] in df_updated['PO #'].values:
        # Update existing entry
        mask = df_updated['PO #'] == row['PO #']
        for col in ['Main Page', 'Project #', 'Project Name', 'Total Contract $']:
            if col in df2_filtered.columns and col in df_updated.columns:
                if pd.notna(row[col]) and (pd.isna(df_updated.loc[mask, col]).any() or df_updated.loc[mask, col].iloc[0] == ''):
                    df_updated.loc[mask, col] = row[col]
    elif row['PO #'] in new_pos:
        # Collect new entry
        new_row = pd.DataFrame([row[['PO #', 'Main Page', 'Project #', 'Project Name', 'Total Contract $']]])
        df_new_entries = pd.concat([df_new_entries, new_row], ignore_index=True)

# Sort only the new entries by PO #
df_new_entries = df_new_entries.sort_values('PO #')

# Concatenate the original (updated) dataframe with the sorted new entries
df_updated = pd.concat([df_updated, df_new_entries], ignore_index=True)

# Write all sheets to the new Excel file
with pd.ExcelWriter(new_file_path, engine='openpyxl') as writer:
    df1.to_excel(writer, sheet_name="PO Amount By Category", index=False)
    df2.to_excel(writer, sheet_name="AR & AP Real 0804", index=False)
    df_updated.to_excel(writer, sheet_name="Updated PO Data", index=False)

    # Get the workbook to apply formatting
    workbook = writer.book

    for sheet_name in workbook.sheetnames:
        worksheet = workbook[sheet_name]
        auto_adjust_column_width(worksheet)
        format_as_table(worksheet, f"A1:{get_column_letter(worksheet.max_column)}{worksheet.max_row}")
        if sheet_name == "Updated PO Data":
            add_total_row(worksheet)

print(f"Updated file saved: {new_file_path}")
print(f"Number of new PO numbers added: {len(new_pos)}")

# Print the columns of df_updated to verify
print("\nColumns in Updated PO Data:")
print(df_updated.columns.tolist())

  df_updated = pd.concat([df_updated, df_new_entries], ignore_index=True)


Updated file saved: AR_Files/AR_Analysis.xlsx
Number of new PO numbers added: 0

Columns in Updated PO Data:
['Type', 'Project #', 'Project Name', 'PO #', 'Total Contract $', 'Main Page', 'CO/Added', 'Amy PO ']


## Add Updated PO Data to AR_updated

In [6]:
def update_ar_sheet():
    # File paths
    source_file = os.path.join("AR_Files", "AR_Analysis.xlsx")
    dest_file = os.path.join("AR_Files", "AR_updated.xlsx")
    
    # Read both files
    updated_po_data = pd.read_excel(source_file, sheet_name='Updated PO Data')
    ar_data = pd.read_excel(dest_file, sheet_name='last_updated')
    
    # Convert PO numbers to string in both dataframes
    updated_po_data['PO #'] = updated_po_data['PO #'].astype(str)
    ar_data['PO #'] = ar_data['PO #'].astype(str)
    
    # Find new rows
    new_rows = updated_po_data[~updated_po_data['PO #'].isin(ar_data['PO #'])]
    
    # Show information and get confirmation
    print(f"\nFound {len(new_rows)} new rows to add to last_updated sheet.")
    if len(new_rows) > 0:
        print("\nSample PO numbers to be added:")
        sample_size = min(5, len(new_rows))  # Show up to 5 samples
        for po in new_rows['PO #'].head(sample_size):
            print(f"- {po}")
        
        confirm = input("\nDo you want to proceed with updating the last_updated sheet? (yes/no): ").lower()
        
        if confirm != 'yes':
            print("Update cancelled.")
            return
        
        # Proceed with update if confirmed
        with pd.ExcelWriter(dest_file, engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
            # Append new rows to existing data
            updated_ar = pd.concat([ar_data, new_rows], ignore_index=True)
            
            # Write back to last_updated sheet
            updated_ar.to_excel(writer, sheet_name='last_updated', index=False)
            
            print(f"\nSuccessfully added {len(new_rows)} new rows to last_updated sheet")
    else:
        print("No new rows found to add.")

if __name__ == "__main__":
    print("Starting AR sheet update process...")
    update_ar_sheet()

Starting AR sheet update process...

Found 0 new rows to add to last_updated sheet.
No new rows found to add.
