In [1]:
import openpyxl
import xlwings as xw
import pandas as pd
import shutil
import io
import os
import pandas as pd
import time

In [2]:
user_profile = os.getenv('USERPROFILE') ## Retrieving user profile directory from 'USERPROFILE' environment variable.

In [3]:
# Constructing the base directory path using the user profile directory.
base_dir = os.path.join(
    user_profile, 
    'California Department of Transportation',
    'DOT HQ PMP Cal B C Update - General',
    'Testbed'
)

In [4]:
# CalB/C Available Models 
at_model = os.path.join(base_dir, 'Input', 'CalBC Models', 'AT_model.xlsm')
sketch_model = os.path.join(base_dir, 'Input', 'CalBC Models', 'Sketch_model.xlsm')
if_model = os.path.join(base_dir, 'Input', 'CalBC Models', 'Intermodal_freight_model.xlsm')
pr_model = os.path.join(base_dir, 'Input', 'CalBC Models', 'Park_&_ride_model.xlsm')
corridor_model = os.path.join(base_dir, 'Input', 'CalBC Models', 'Corridor_model.xlsm')

In [5]:
# Function to get both model and template paths
def get_model_and_template(model_file):
    model_name = os.path.splitext(os.path.basename(model_file))[0]
    template_file = os.path.join(base_dir, 'Input', 'Templates', f"{model_name}_template.xlsx")
    return model_file, template_file

In [6]:
# Constructing file directory for output and template files.
output_dir = os.path.join(base_dir, 'Output')

In [7]:
# Function to identify input cells using cell background 
def identify_green_cells(main_file, sheet_names):
    """Identify green-highlighted cells in the specified sheet names and return their cell addresses."""
    green_cells_addresses = {}
    
    with xw.App(visible=False) as app:
        main_wb = app.books.open(main_file)
        print(main_wb.sheet_names)
        
        # Loop through the specified sheet names
        for sheet_name in sheet_names:
            if sheet_name in main_wb.sheet_names:
                sheet = main_wb.sheets[sheet_name]
                green_cells_addresses[sheet_name] = []
                
                # Loop through all the rows in the used range
                for row in sheet.used_range.rows:  
                    for cell in row:  
                        # Check if the cell is highlighted in green (color: #CCFFCC)
                        if cell.color == (204, 255, 204):  

                            # Add the cell's address to the list
                            green_cells_addresses[sheet_name].append(cell.address)
        
        main_wb.close()
    
    return green_cells_addresses



In [8]:
# Specify which model to use
main_file, template_file = get_model_and_template(corridor_model)

# Specify which sheets have inputs for the given model 
sheet_names = ["1) Project Information"] 

In [9]:
#Run the function 
green_cells_addresses = identify_green_cells(main_file, sheet_names)
print(green_cells_addresses)


['Title', 'Instructions', '1) Project Information', '2) Model Inputs', '3) Results', 'Travel Time', 'Consumer Surplus', 'Reliability', 'Vehicle Operating Costs', 'Accident Costs', 'Emissions', 'Final Calculations', 'PARAMETERS']
{'1) Project Information': ['$E$2', '$E$4', '$F$4', '$G$4', '$H$4', '$I$4', '$J$4', '$K$4', '$L$4', '$M$4', '$N$11', '$H$12', '$N$14', '$W$15', '$X$15', '$Y$15', '$AB$15', '$AC$15', '$F$16', '$W$16', '$X$16', '$Y$16', '$AB$16', '$AC$16', '$W$17', '$X$17', '$Y$17', '$AB$17', '$AC$17', '$F$18', '$W$18', '$X$18', '$Y$18', '$AB$18', '$AC$18', '$W$19', '$X$19', '$Y$19', '$AB$19', '$AC$19', '$F$20', '$W$20', '$X$20', '$Y$20', '$AB$20', '$AC$20', '$W$21', '$X$21', '$Y$21', '$AB$21', '$AC$21', '$W$22', '$X$22', '$Y$22', '$AB$22', '$AC$22', '$Z$24', '$AA$24', '$AB$24', '$Z$25', '$AA$25', '$AB$25', '$Z$26', '$AA$26', '$AB$26', '$Z$27', '$AA$27', '$AB$27', '$Z$28', '$AA$28', '$AB$28', '$Z$29', '$AA$29', '$AB$29', '$Z$30', '$AA$30', '$AB$30', '$Z$31', '$AA$31', '$AB$31', '

In [10]:
def populate_green_cells(main_file, template_file, green_cells_addresses, output_dir):
    """Populate green-highlighted cells in the main file from the template file for each version."""
    with xw.App(visible=False) as app:
        # Open the main file and the template file
        main_wb = app.books.open(main_file)
        template_wb = app.books.open(template_file)
        
        # Loop through each version
        for version in ['1.0', '2.0']:
            print(f"Processing version {version}")
            
            # Loop through the sheets in the main file and the green_cells_addresses dictionary
            for sheet_name, green_cells in green_cells_addresses.items():
                if sheet_name in main_wb.sheet_names:
                    sheet = main_wb.sheets[sheet_name]
                    
                    # Check for the versioned sheet name in the template file
                    template_sheet_name = f"{sheet_name}_{version}"  
                    
                    
                    if template_sheet_name in template_wb.sheet_names:
                        template_sheet = template_wb.sheets[template_sheet_name]

                        
                        # Loop through the green-highlighted cells and copy data from the template
                        for address in green_cells:
                            cell = sheet[address]
                            template_cell = template_sheet[address]

                            # Copy the data from the template to the main file
                            cell.value = template_cell.value
                    
            # Recalculate all formulas in the main file
            main_wb.app.calculate()

            # Save the updated main file with the version-specific suffix
            output_filename = f"{os.path.splitext(os.path.basename(main_file))[0]}_v{version}.xlsx"
            output_filepath = os.path.join(output_dir, output_filename)
            main_wb.save(output_filepath)
            print(f"Saved {output_filepath}")
        
        main_wb.close()
        template_wb.close()

In [None]:
# Running the function 
populate_green_cells(main_file, template_file, green_cells_addresses, output_dir)

In [None]:
# Summary data table 
def extract_summary_df(output_dir, main_file, named_ranges):
    """Extract summary data for the selected model from the output folder and create a DataFrame."""
    # Extract the model name from the main file (based on file name)
    model_name = os.path.splitext(os.path.basename(main_file))[0]

    summary_data = []
    
    # Filter the files from the output folder related to the selected model
    model_output_files = [f for f in os.listdir(output_dir) if model_name in f and f.endswith('.xlsx')]
    
    # Loop through each of the filtered output files
    for model_file in model_output_files:
        version = model_file.split('_v')[-1].split('.')[0]  # Extract version from filename
        file_path = os.path.join(output_dir, model_file)
        
        # Open the workbook using xlwings
        with xw.App(visible=False) as app:
            wb = app.books.open(file_path)
            
            # Extract data from each named range
            row_data = {'Project Version': version}
            for named_range in named_ranges:
                try:
                    # Attempt to access the named range and get its value
                    if named_range in wb.names:
                        range_value = wb.names[named_range].refers_to_range.value
                        row_data[named_range] = range_value
                    else:
                        row_data[named_range] = None  # Named range doesn't exist
                        print(f"Named range '{named_range}' not found in {file_path}")
                except Exception as e:
                    row_data[named_range] = None  # If something goes wrong, set to None
                    print(f"Error accessing '{named_range}' in {file_path}: {e}")
            
            # Append row data to the summary list
            summary_data.append(row_data)
            wb.close()

    # Convert the summary data to a DataFrame
    summary_df = pd.DataFrame(summary_data)
    
    # Return the summary DataFrame
    return summary_df

In [None]:
named_ranges = ['BeneCostRatio', 'ReturnOnInvest', 'Payback']
summary_df = extract_summary_df(output_dir, main_file, named_ranges)

In [23]:
print(summary_df)

  Project Version BeneCostRatio ReturnOnInvest Payback
0               1          None            N/A     N/A
1               2          None            N/A     N/A
