In [1]:
import os
import pandas as pd
import re

# Function to process an individual file
def process_file(input_file, output_file):
    # Load the CSV file
    df = pd.read_csv(input_file)
    
    # 1. Remove rows containing the word 'Parking' in column B
    df = df[~df['BD/BA'].str.contains('Parking', na=False)]
    
    # 2. Replace "--VACANT--" or "VACANT" with a blank cell in column C (Tenant)
    df['Tenant'] = df['Tenant'].replace(['--VACANT--', 'VACANT'], '')
    
    # 3. Change the last row in column A to display the number of units followed by 'Units'
    total_units = df.iloc[3:].shape[0]  # Count rows from line 4 onwards
    df.loc[df.index[-1], 'Unit'] = f'{total_units} Units'  # Replace 'Totals' with number of units
    
    # 4. Modify column B (BD/BA)
    def modify_bd_ba(value):
        if pd.isna(value):
            return value  # Ignore null values
        if re.match(r'^\d+', value):  # If it starts with a number
            number = re.match(r'^\d+', value).group(0)  # Extract the number from the start
            return f'{number}/1.0'  # Keep the number and add "/1.0"
        else:
            return '1/1.0'  # If it's text, replace with '1/1.0'
    
    df['BD/BA'] = df['BD/BA'].apply(modify_bd_ba)

    # 5. Remove letters and symbols in column A, starting from line 4 (inclusive) and excluding the last row
    df.loc[2:len(df)-2, 'Unit'] = df.loc[2:len(df)-2, 'Unit'].apply(lambda x: re.sub(r'\D', '', str(x)))

    # 6. Calculate occupancy percentage ("Current" values in the "Status" column)
    total_status = len(df['Status'].dropna())  # Total of non-null values in the "Status" column
    current_occupied = df['Status'].str.contains('Current', na=False).sum()  # Number of "Current" in the "Status" column
    
    # Calculate the percentage
    if total_status > 0:
        occupancy_percentage = (current_occupied / total_status) * 100
    else:
        occupancy_percentage = 0
    
    # 7. Add "X% Occupied" in the same row as "Total Units" (last row in the "Unit" column)
    df.loc[df.index[-1], 'Status'] = f'{occupancy_percentage:.2f}% Occupied'

    # Save the processed file in a new folder
    df.to_csv(output_file, index=False)

# Paths for input and output folders
input_folder = r'C:\Users\Meu Computador\OneDrive - Thirty Capital\01_Live Clients\09_Century Housing\Boston Post\Rent Roll - File Conversion\10.21'
output_folder = r'C:\Users\Meu Computador\OneDrive - Thirty Capital\01_Live Clients\09_Century Housing\Boston Post\Rent Roll - File Ingestion\modified_files'

# Create the output folder if it does not exist
os.makedirs(output_folder, exist_ok=True)

# Process all CSV files in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith('.csv'):
        input_file = os.path.join(input_folder, filename)
        output_file = os.path.join(output_folder, filename)
        process_file(input_file, output_file)

print("Processing completed. Modified files are in the 'modified_files' folder.")


Processing completed. Modified files are in the 'modified_files' folder.
