In [103]:
import pandas as pd
import numpy as np

file_path = "CEOData/ONT Parking Occupancy May-July 2025.xlsx"

try:
    # Load all sheets (no header yet)
    all_sheets = pd.read_excel(file_path, sheet_name=None, header=None)
    print(f"✅ Loaded {len(all_sheets)} total sheets.")

    # Include only sheets that contain "Counts" in their name
    sheet_names = [name for name in all_sheets.keys() if "counts" in str(name).lower()]
    print("Sheets included:", sheet_names)

    # Combine sheets, skipping first 3 rows from each
    all_data = []
    for name in sheet_names:
        print(f"➡️ Reading sheet: {name}")
        sheet_df = pd.read_excel(file_path, sheet_name=name, header=None, skiprows=3)
        #sheet_df["Source Sheet"] = name  # Optional: track source
        all_data.append(sheet_df)

    # Combine all sheets into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True)

    print("\n✅ Combined DataFrame created successfully!")
    print("Shape:", combined_df.shape)

except Exception as e:
    print("❌ Failed to read Excel file:")
    print(e)


✅ Loaded 10 total sheets.
Sheets included: ['May Counts', 'June Counts', 'July Counts', 'Aug Counts', 'Sep Counts']
➡️ Reading sheet: May Counts
➡️ Reading sheet: June Counts
➡️ Reading sheet: July Counts
➡️ Reading sheet: Aug Counts
➡️ Reading sheet: Sep Counts

✅ Combined DataFrame created successfully!
Shape: (158, 32)


In [85]:
combined_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,,NaT,Lot 2 Prem Occ,Lot 2 Prem Cap,Lot 2 Prem%,Lot 2 Gen Occ,Lot 2 Gen Capacity,Lot 2 %,Lot 2 Val Occ,Lot 2 Val Cap,...,Lot 5%,Lot 5 Val Occ,Lot 5 Val Cap,Lot 5 Val%,Lot 6 Occ,Lot 6 Cap,Lot 6%,Total Facility Occ,Total Facility Cap,Total Facility%
1,Thursday,2025-05-01,282,345,0.817391,881,1214,0.7257,2,20,...,0.450348,12,20,0.6,719,1337,0.537771,5242,8184,0.640518
2,Friday,2025-05-02,260,345,0.753623,996,1214,0.820428,5,20,...,0.507404,13,20,0.65,739,1337,0.55273,5323,8184,0.650415
3,Saturday,2025-05-03,252,345,0.730435,1033,1214,0.850906,4,20,...,0.518293,13,20,0.65,719,1337,0.537771,5144,8184,0.628543
4,Sunday,2025-05-04,249,345,0.721739,926,1214,0.762768,4,20,...,0.484321,8,20,0.4,686,1337,0.513089,4805,8184,0.587121


In [105]:
# Fix column names for combined_df
combined_df.iloc[0, 0] = 'Day of the Week'
combined_df.iloc[0, 1] = 'Date'

# Set first row as header
combined_df.columns = combined_df.iloc[0]
combined_df = combined_df.drop(index=0).reset_index(drop=True)


  combined_df.iloc[0, 1] = 'Date'


In [106]:
combined_df[combined_df['Lot 2 Prem Occ'] == 'Lot 2 Prem Occ']

Unnamed: 0,Day of the Week,Date,Lot 2 Prem Occ,Lot 2 Prem Cap,Lot 2 Prem%,Lot 2 Gen Occ,Lot 2 Gen Capacity,Lot 2 %,Lot 2 Val Occ,Lot 2 Val Cap,...,Lot 5%,Lot 5 Val Occ,Lot 5 Val Cap,Lot 5 Val%,Lot 6 Occ,Lot 6 Cap,Lot 6%,Total Facility Occ,Total Facility Cap,Total Facility%
31,,NaT,Lot 2 Prem Occ,Lot 2 Prem Cap,Lot 2 Prem%,Lot 2 Gen Occ,Lot 2 Gen Capacity,Lot 2 %,Lot 2 Val Occ,Lot 2 Val Cap,...,Lot 5%,Lot 5 Val Occ,Lot 5 Val Cap,Lot 5 Val%,Lot 6 Occ,Lot 6 Cap,Lot 6%,Total Facility Occ,Total Facility Cap,Total Facility%
62,,NaT,Lot 2 Prem Occ,Lot 2 Prem Cap,Lot 2 Prem%,Lot 2 Gen Occ,Lot 2 Gen Capacity,Lot 2 %,Lot 2 Val Occ,Lot 2 Val Cap,...,Lot 5%,Lot 5 Val Occ,Lot 5 Val Cap,Lot 5 Val%,Lot 6 Occ,Lot 6 Cap,Lot 6%,Total Facility Occ,Total Facility Cap,Total Facility%
94,,NaT,Lot 2 Prem Occ,Lot 2 Prem Cap,Lot 2 Prem%,Lot 2 Gen Occ,Lot 2 Gen Capacity,Lot 2 %,Lot 2 Val Occ,Lot 2 Val Cap,...,Lot 5%,Lot 5 Val Occ,Lot 5 Val Cap,Lot 5 Val%,Lot 6 Occ,Lot 6 Cap,Lot 6%,Total Facility Occ,Total Facility Cap,Total Facility%
126,,NaT,Lot 2 Prem Occ,Lot 2 Prem Cap,Lot 2 Prem%,Lot 2 Gen Occ,Lot 2 Gen Capacity,Lot 2 %,Lot 2 Val Occ,Lot 2 Val Cap,...,Lot 5%,Lot 5 Val Occ,Lot 5 Val Cap,Lot 5 Val%,Lot 6 Occ,Lot 6 Cap,Lot 6%,Total Facility Occ,Total Facility Cap,Total Facility%


In [109]:
combined_df.drop(index=[31, 62, 94, 126], inplace=True)

In [111]:
combined_df.nunique()

0
Day of the Week         7
Date                  153
Lot 2 Prem Occ         78
Lot 2 Prem Cap          1
Lot 2 Prem%            79
Lot 2 Gen Occ         126
Lot 2 Gen Capacity      1
Lot 2 %               127
Lot 2 Val Occ          29
Lot 2 Val Cap           1
Lot 2 Val%             30
Lot 3 Occ             123
Lot 3 Cap               1
Lot 3%                124
Lot 4 Prem Occ         37
Lot 4 Prem Cap          1
Lot 4 Prem %           38
Lot 4 Gen Occ         139
Lot 4 Gen Cap           1
Lot 4 Gen%            140
Lot 5 Occ             133
Lot 5 Cap               1
Lot 5%                134
Lot 5 Val Occ          25
Lot 5 Val Cap           1
Lot 5 Val%             26
Lot 6 Occ             110
Lot 6 Cap               1
Lot 6%                111
Total Facility Occ    143
Total Facility Cap      1
Total Facility%       143
dtype: int64

In [113]:
combined_df.head()

Unnamed: 0,Day of the Week,Date,Lot 2 Prem Occ,Lot 2 Prem Cap,Lot 2 Prem%,Lot 2 Gen Occ,Lot 2 Gen Capacity,Lot 2 %,Lot 2 Val Occ,Lot 2 Val Cap,...,Lot 5%,Lot 5 Val Occ,Lot 5 Val Cap,Lot 5 Val%,Lot 6 Occ,Lot 6 Cap,Lot 6%,Total Facility Occ,Total Facility Cap,Total Facility%
0,Thursday,2025-05-01 00:00:00,282,345,0.817391,881,1214,0.7257,2,20,...,0.450348,12,20,0.6,719,1337,0.537771,5242,8184,0.640518
1,Friday,2025-05-02 00:00:00,260,345,0.753623,996,1214,0.820428,5,20,...,0.507404,13,20,0.65,739,1337,0.55273,5323,8184,0.650415
2,Saturday,2025-05-03 00:00:00,252,345,0.730435,1033,1214,0.850906,4,20,...,0.518293,13,20,0.65,719,1337,0.537771,5144,8184,0.628543
3,Sunday,2025-05-04 00:00:00,249,345,0.721739,926,1214,0.762768,4,20,...,0.484321,8,20,0.4,686,1337,0.513089,4805,8184,0.587121
4,Monday,2025-05-05 00:00:00,287,345,0.831884,821,1214,0.676277,5,20,...,0.419425,10,20,0.5,602,1337,0.450262,4766,8184,0.582356


In [115]:
combined_df.to_csv("recent_parking_data.csv", index=False)

In [123]:
import pandas as pd

# Identify columns to keep
id_vars = ['Day of the Week', 'Date']

# All other columns are lot-related
value_vars = [col for col in combined_df.columns if col not in id_vars]

# Melt into long format
long_df = combined_df.melt(id_vars=id_vars, value_vars=value_vars, 
                           var_name='Lot_Metric', value_name='Value')

# Split 'Lot_Metric' into Lot number, Category (Prem/Gen/Val), and Metric (Occ/Cap/%)
long_df[['Lot', 'Category', 'Metric']] = long_df['Lot_Metric'].str.split(' ', n=2, expand=True)

# Pivot metrics to columns
long_df = long_df.pivot_table(index=['Day of the Week', 'Date', 'Lot', 'Category'], 
                              columns='Metric', values='Value').reset_index()

# Clean column names
long_df.columns.name = None

print(long_df.head())


  Day of the Week       Date  Lot Category         %     Cap Gen Cap  \
0          Friday 2025-05-02  Lot        2  0.820428     NaN     NaN   
1          Friday 2025-05-02  Lot        3       NaN  1192.0     NaN   
2          Friday 2025-05-02  Lot        4       NaN     NaN  1430.0   
3          Friday 2025-05-02  Lot        5       NaN  2296.0     NaN   
4          Friday 2025-05-02  Lot        6       NaN  1337.0     NaN   

  Gen Capacity Gen Occ      Gen%     Occ    Prem % Prem Cap Prem Occ  \
0       1214.0   996.0       NaN     NaN       NaN    345.0    260.0   
1          NaN     NaN       NaN   711.0       NaN      NaN      NaN   
2          NaN  1104.0  0.772028     NaN  0.957143    350.0    335.0   
3          NaN     NaN       NaN  1165.0       NaN      NaN      NaN   
4          NaN     NaN       NaN   739.0       NaN      NaN      NaN   

      Prem% Val Cap Val Occ  Val%  
0  0.753623    20.0     5.0  0.25  
1       NaN     NaN     NaN   NaN  
2       NaN     NaN     Na

In [127]:
combined_df.columns

Index(['Day of the Week', 'Date', 'Lot 2 Prem Occ', 'Lot 2 Prem Cap',
       'Lot 2 Prem%', 'Lot 2 Gen Occ', 'Lot 2 Gen Capacity', 'Lot 2 %',
       'Lot 2 Val Occ', 'Lot 2 Val Cap', 'Lot 2 Val%', 'Lot 3 Occ',
       'Lot 3 Cap', 'Lot 3%', 'Lot 4 Prem Occ', 'Lot 4 Prem Cap',
       'Lot 4 Prem %', 'Lot 4 Gen Occ', 'Lot 4 Gen Cap', 'Lot 4 Gen%',
       'Lot 5 Occ', 'Lot 5 Cap', 'Lot 5%', 'Lot 5 Val Occ', 'Lot 5 Val Cap',
       'Lot 5 Val%', 'Lot 6 Occ', 'Lot 6 Cap', 'Lot 6%', 'Total Facility Occ',
       'Total Facility Cap', 'Total Facility%'],
      dtype='object', name=0)