In [5]:
#all imports
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
%matplotlib inline


df = pd.read_csv('all_parking_data.csv')
df2 = pd.read_csv('recent_parking_data.csv')

In [7]:
# === Map 2025 column names to match 2020–2023 ===
column_map_2025 = {
    'Day of the Week': 'Day',
    'Lot 2 Prem Occ': 'LOT 2 Premium Occupied',
    'Lot 2 Prem Cap': 'LOT 2 Premium Spaces',
    'Lot 2 Prem%': 'LOT 2 Premium % Capacity',
    'Lot 2 Gen Occ': 'LOT 2 General Occupied',
    'Lot 2 Gen Capacity': 'LOT 2 General Spaces',
    'Lot 2 %': 'LOT 2 General % Capacity',
    'Lot 2 Val Occ': 'LOT 2 Valet Occupied',
    'Lot 2 Val Cap': 'LOT 2 Valet Spaces',
    'Lot 2 Val%': 'LOT 2 Valet % Capacity',
    'Lot 3 Occ': 'LOT 3 Occupied',
    'Lot 3 Cap': 'LOT 3 Spaces',
    'Lot 3%': 'LOT 3 % Capacity',
    'Lot 4 Prem Occ': 'LOT 4 Premium Occupied',
    'Lot 4 Prem Cap': 'LOT 4 Premium Spaces',
    'Lot 4 Prem %': 'LOT 4 Premium % Capacity',
    'Lot 4 Gen Occ': 'LOT 4 General Occupied',
    'Lot 4 Gen Cap': 'LOT 4 General Spaces',
    'Lot 4 Gen%': 'LOT 4 General % Capacity',
    'Lot 5 Occ': 'LOT 5 Occupied',
    'Lot 5 Cap': 'LOT 5 Spaces',
    'Lot 5%': 'LOT 5 % Capacity',
    'Lot 5 Val Occ': 'LOT 5 Valet Occupied',
    'Lot 5 Val Cap': 'LOT 5 Valet Spaces',
    'Lot 5 Val%': 'LOT 5 Valet % Capacity',
    'Lot 6 Occ': 'LOT 6 Occupied',
    'Lot 6 Cap': 'LOT 6 Spaces',
    'Lot 6%': 'LOT 6 % Capacity',
    'Total Facility Occ': 'Total Facility Occupied',
    'Total Facility Cap': 'Total Facility Spaces',
    'Total Facility%': 'Total Facility % Capacity'
}

# === Apply mapping to df2 ===
df2 = df2.rename(columns=column_map_2025)

# === Add missing columns to df (old data) with default 0s for Valet lots ===
missing_cols = [
    'LOT 2 Valet Spaces', 'LOT 2 Valet Occupied', 'LOT 2 Valet % Capacity',
    'LOT 5 Valet Spaces', 'LOT 5 Valet Occupied', 'LOT 5 Valet % Capacity',
    'Total Facility Occupied', 'Total Facility Spaces', 'Total Facility % Capacity'
]

for col in missing_cols:
    if col not in df.columns:
        df[col] = 0

# === Align columns between both dataframes ===
all_cols = sorted(set(df.columns).union(df2.columns))
df = df.reindex(columns=all_cols)
df2 = df2.reindex(columns=all_cols)

# === Combine ===
combined_df = pd.concat([df, df2], ignore_index=True)

# === Clean up ===
combined_df = combined_df.sort_values(by="Date").reset_index(drop=True)

print(f"✅ Combined DataFrame shape: {combined_df.shape}")
print("✅ Columns aligned and merged successfully.")


✅ Combined DataFrame shape: (1215, 42)
✅ Columns aligned and merged successfully.


In [11]:
combined_df.tail()

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 General Vacancies,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Premium Vacancies,...,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,LOT 6 Vacancies,Total CC Revenue,Total Cash Revenue,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces,Total Revenue
1210,2025-09-26 00:00:00,Friday,0.635914,772.0,1214,,0.930435,321.0,345,,...,0.626028,837.0,1337,,,,0.683773,5596,8184,
1211,2025-09-27 00:00:00,Saturday,0.665568,808.0,1214,,0.924638,319.0,345,,...,0.620045,829.0,1337,,,,0.682063,5582,8184,
1212,2025-09-28 00:00:00,Sunday,0.610379,741.0,1214,,0.794203,274.0,345,,...,0.54899,734.0,1337,,,,0.615225,5035,8184,
1213,2025-09-29 00:00:00,Monday,0.536244,651.0,1214,,0.953623,329.0,345,,...,0.510097,682.0,1337,,,,0.577102,4723,8184,
1214,2025-09-30 00:00:00,Tuesday,0.543657,660.0,1214,,0.93913,324.0,345,,...,0.47644,637.0,1337,,,,0.567571,4645,8184,


In [19]:
combined_df.columns

Index(['Date', 'Day', 'LOT 2 General % Capacity', 'LOT 2 General Occupied',
       'LOT 2 General Spaces', 'LOT 2 General Vacancies',
       'LOT 2 Premium % Capacity', 'LOT 2 Premium Occupied',
       'LOT 2 Premium Spaces', 'LOT 2 Premium Vacancies',
       'LOT 2 Value % Capacity', 'LOT 2 Value Occupied', 'LOT 2 Value Spaces',
       'LOT 3 % Capacity', 'LOT 3 Occupied', 'LOT 3 Spaces', 'LOT 3 Vacancies',
       'LOT 4 General % Capacity', 'LOT 4 General Occupied',
       'LOT 4 General Spaces', 'LOT 4 General Vacancies',
       'LOT 4 Premium % Capacity', 'LOT 4 Premium Occupied',
       'LOT 4 Premium Spaces', 'LOT 4 Premium Vacancies', 'LOT 5 % Capacity',
       'LOT 5 Occupied', 'LOT 5 Spaces', 'LOT 5 Vacancies',
       'LOT 5 Value % Capacity', 'LOT 5 Value Occupied', 'LOT 5 Value Spaces',
       'LOT 6 % Capacity', 'LOT 6 Occupied', 'LOT 6 Spaces', 'LOT 6 Vacancies',
       'Total Facility % Capacity', 'Total Facility Occupied',
       'Total Facility Spaces'],
      dtype='ob

In [15]:
# === Drop all revenue columns ===
revenue_cols = [col for col in combined_df.columns if 'Revenue' in col]
combined_df = combined_df.drop(columns=revenue_cols, errors='ignore')

# === Rename "Valet" → "Value" in all column names ===
combined_df.columns = combined_df.columns.str.replace('Valet', 'Value', regex=False)

# === Verify ===
print("✅ Cleaned columns:")
print(combined_df.columns.tolist())

✅ Cleaned columns:
['Date', 'Day', 'LOT 2 General % Capacity', 'LOT 2 General Occupied', 'LOT 2 General Spaces', 'LOT 2 General Vacancies', 'LOT 2 Premium % Capacity', 'LOT 2 Premium Occupied', 'LOT 2 Premium Spaces', 'LOT 2 Premium Vacancies', 'LOT 2 Value % Capacity', 'LOT 2 Value Occupied', 'LOT 2 Value Spaces', 'LOT 3 % Capacity', 'LOT 3 Occupied', 'LOT 3 Spaces', 'LOT 3 Vacancies', 'LOT 4 General % Capacity', 'LOT 4 General Occupied', 'LOT 4 General Spaces', 'LOT 4 General Vacancies', 'LOT 4 Premium % Capacity', 'LOT 4 Premium Occupied', 'LOT 4 Premium Spaces', 'LOT 4 Premium Vacancies', 'LOT 5 % Capacity', 'LOT 5 Occupied', 'LOT 5 Spaces', 'LOT 5 Vacancies', 'LOT 5 Value % Capacity', 'LOT 5 Value Occupied', 'LOT 5 Value Spaces', 'LOT 6 % Capacity', 'LOT 6 Occupied', 'LOT 6 Spaces', 'LOT 6 Vacancies', 'Total Facility % Capacity', 'Total Facility Occupied', 'Total Facility Spaces']


In [33]:
combined_df.shape

(1215, 39)

In [37]:
combined_df[80:90]

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 General Vacancies,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Premium Vacancies,...,LOT 5 Value % Capacity,LOT 5 Value Occupied,LOT 5 Value Spaces,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,LOT 6 Vacancies,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces
80,2020-07-23,Thursday,0.191961,234.0,1219,985.0,0.098765,32.0,324,292.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
81,2020-07-24,Friday,0.224774,274.0,1219,945.0,0.101852,33.0,324,291.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
82,2020-07-25,Saturday,0.241181,294.0,1219,925.0,0.101852,33.0,324,291.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
83,2020-07-26,Sunday,0.193601,236.0,1219,983.0,0.070988,23.0,324,301.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
84,2020-07-27,Monday,0.173093,211.0,1219,1008.0,0.074074,24.0,324,300.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
85,2020-07-28,Tuesday,0.153404,187.0,1219,1032.0,0.07716,25.0,324,299.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
86,2020-07-29,Wednesday,0.169811,207.0,1219,1012.0,0.089506,29.0,324,295.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
87,2020-07-30,Thursday,0.169811,207.0,1219,1012.0,0.089506,29.0,324,295.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
88,2020-07-31,Friday,0.219852,268.0,1219,951.0,0.101852,33.0,324,291.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
89,2020-08-01,Saturday,0.210829,257.0,1219,962.0,0.089506,29.0,324,295.0,...,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0


In [47]:
import numpy as np
#redo totals

# Work only on first 1062 rows
df_subset = df.iloc[:1062].copy()

# --- Identify the relevant columns ---
lot_cols = {
    2: {
        'occ': ['LOT 2 General Occupied', 'LOT 2 Premium Occupied'],
        'spaces': ['LOT 2 General Spaces', 'LOT 2 Premium Spaces']
    },
    3: {
        'occ': ['LOT 3 Occupied'],
        'spaces': ['LOT 3 Spaces']
    },
    4: {
        'occ': ['LOT 4 General Occupied', 'LOT 4 Premium Occupied'],
        'spaces': ['LOT 4 General Spaces', 'LOT 4 Premium Spaces']
    },
    5: {
        'occ': ['LOT 5 Occupied'],
        'spaces': ['LOT 5 Spaces']
    },
    6: {
        'occ': ['LOT 6 Occupied'],
        'spaces': ['LOT 6 Spaces']
    }
}

# --- Sum all lots except Lot 6 first ---
total_occ = pd.Series(0, index=df_subset.index)
total_spaces = pd.Series(0, index=df_subset.index)

for lot in [2, 3, 4, 5]:
    occ_cols = [c for c in lot_cols[lot]['occ'] if c in df_subset.columns]
    space_cols = [c for c in lot_cols[lot]['spaces'] if c in df_subset.columns]
    total_occ += df_subset[occ_cols].fillna(0).sum(axis=1)
    total_spaces += df_subset[space_cols].fillna(0).sum(axis=1)

# --- Handle Lot 6 separately: ignore if -1 ---
if all(c in df_subset.columns for c in ['LOT 6 Occupied', 'LOT 6 Spaces']):
    lot6_occ = df_subset['LOT 6 Occupied']
    lot6_spaces = df_subset['LOT 6 Spaces']

    mask_valid = (lot6_occ != -1) & (lot6_spaces != -1)
    total_occ += lot6_occ.where(mask_valid, 0)
    total_spaces += lot6_spaces.where(mask_valid, 0)

# --- Compute Total Facility % Capacity ---
df_subset['Total Facility Occupied'] = total_occ
df_subset['Total Facility Spaces'] = total_spaces
df_subset['Total Facility % Capacity'] = np.where(
    total_spaces > 0,
    (total_occ / total_spaces) * 100,
    np.nan
).round(2)

# --- Preview ---
print(df_subset[['Date', 'Total Facility Occupied', 'Total Facility Spaces', 'Total Facility % Capacity']][80:90])


          Date  Total Facility Occupied  Total Facility Spaces  \
80  2020-07-23                   1311.0                   6717   
81  2020-07-24                   1382.0                   6717   
82  2020-07-25                   1489.0                   6717   
83  2020-07-26                   1131.0                   6717   
84  2020-07-27                   1051.0                   6717   
85  2020-07-28                   1156.0                   6717   
86  2020-07-29                   1203.0                   6717   
87  2020-07-30                   1200.0                   6717   
88  2020-07-31                   1400.0                   6717   
89  2020-08-01                   1403.0                   6717   

    Total Facility % Capacity  
80                      19.52  
81                      20.57  
82                      22.17  
83                      16.84  
84                      15.65  
85                      17.21  
86                      17.91  
87               

In [49]:
combined_df.tail()

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 General Vacancies,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Premium Vacancies,...,LOT 5 Value % Capacity,LOT 5 Value Occupied,LOT 5 Value Spaces,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,LOT 6 Vacancies,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces
1210,2025-09-26 00:00:00,Friday,0.635914,772.0,1214,,0.930435,321.0,345,,...,1.15,23.0,20,0.626028,837.0,1337,,0.683773,5596,8184
1211,2025-09-27 00:00:00,Saturday,0.665568,808.0,1214,,0.924638,319.0,345,,...,1.05,21.0,20,0.620045,829.0,1337,,0.682063,5582,8184
1212,2025-09-28 00:00:00,Sunday,0.610379,741.0,1214,,0.794203,274.0,345,,...,0.8,16.0,20,0.54899,734.0,1337,,0.615225,5035,8184
1213,2025-09-29 00:00:00,Monday,0.536244,651.0,1214,,0.953623,329.0,345,,...,0.9,18.0,20,0.510097,682.0,1337,,0.577102,4723,8184
1214,2025-09-30 00:00:00,Tuesday,0.543657,660.0,1214,,0.93913,324.0,345,,...,1.05,21.0,20,0.47644,637.0,1337,,0.567571,4645,8184


In [51]:
# Drop all columns containing 'Vacancies' in combined_df
combined_df = combined_df.drop(
    columns=[col for col in combined_df.columns if 'Vacancies' in col],
    errors='ignore'
)

# Verify
print("✅ Columns after dropping 'Vacancies':")
print(combined_df.columns.tolist())

✅ Columns after dropping 'Vacancies':
['Date', 'Day', 'LOT 2 General % Capacity', 'LOT 2 General Occupied', 'LOT 2 General Spaces', 'LOT 2 Premium % Capacity', 'LOT 2 Premium Occupied', 'LOT 2 Premium Spaces', 'LOT 2 Value % Capacity', 'LOT 2 Value Occupied', 'LOT 2 Value Spaces', 'LOT 3 % Capacity', 'LOT 3 Occupied', 'LOT 3 Spaces', 'LOT 4 General % Capacity', 'LOT 4 General Occupied', 'LOT 4 General Spaces', 'LOT 4 Premium % Capacity', 'LOT 4 Premium Occupied', 'LOT 4 Premium Spaces', 'LOT 5 % Capacity', 'LOT 5 Occupied', 'LOT 5 Spaces', 'LOT 5 Value % Capacity', 'LOT 5 Value Occupied', 'LOT 5 Value Spaces', 'LOT 6 % Capacity', 'LOT 6 Occupied', 'LOT 6 Spaces', 'Total Facility % Capacity', 'Total Facility Occupied', 'Total Facility Spaces']


In [57]:
combined_df.tail()

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Value % Capacity,LOT 2 Value Occupied,...,LOT 5 Spaces,LOT 5 Value % Capacity,LOT 5 Value Occupied,LOT 5 Value Spaces,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces
1210,2025-09-26 00:00:00,Friday,0.635914,772.0,1214,0.930435,321.0,345,1.35,27.0,...,2296,1.15,23.0,20,0.626028,837.0,1337,0.683773,5596,8184
1211,2025-09-27 00:00:00,Saturday,0.665568,808.0,1214,0.924638,319.0,345,1.55,31.0,...,2296,1.05,21.0,20,0.620045,829.0,1337,0.682063,5582,8184
1212,2025-09-28 00:00:00,Sunday,0.610379,741.0,1214,0.794203,274.0,345,1.0,20.0,...,2296,0.8,16.0,20,0.54899,734.0,1337,0.615225,5035,8184
1213,2025-09-29 00:00:00,Monday,0.536244,651.0,1214,0.953623,329.0,345,0.3,6.0,...,2296,0.9,18.0,20,0.510097,682.0,1337,0.577102,4723,8184
1214,2025-09-30 00:00:00,Tuesday,0.543657,660.0,1214,0.93913,324.0,345,0.25,5.0,...,2296,1.05,21.0,20,0.47644,637.0,1337,0.567571,4645,8184
