In [222]:
#all imports
import pandas as pd
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
%matplotlib inline


df = pd.read_csv('all_parking_data.csv')
df2 = pd.read_csv('recent_parking_data.csv')

In [224]:
# === Map 2025 column names to match 2020–2023 ===
column_map_2025 = {
    'Day of the Week': 'Day',
    'Lot 2 Prem Occ': 'LOT 2 Premium Occupied',
    'Lot 2 Prem Cap': 'LOT 2 Premium Spaces',
    'Lot 2 Prem%': 'LOT 2 Premium % Capacity',
    'Lot 2 Gen Occ': 'LOT 2 General Occupied',
    'Lot 2 Gen Capacity': 'LOT 2 General Spaces',
    'Lot 2 %': 'LOT 2 General % Capacity',
    'Lot 2 Val Occ': 'LOT 2 Valet Occupied',
    'Lot 2 Val Cap': 'LOT 2 Valet Spaces',
    'Lot 2 Val%': 'LOT 2 Valet % Capacity',
    'Lot 3 Occ': 'LOT 3 Occupied',
    'Lot 3 Cap': 'LOT 3 Spaces',
    'Lot 3%': 'LOT 3 % Capacity',
    'Lot 4 Prem Occ': 'LOT 4 Premium Occupied',
    'Lot 4 Prem Cap': 'LOT 4 Premium Spaces',
    'Lot 4 Prem %': 'LOT 4 Premium % Capacity',
    'Lot 4 Gen Occ': 'LOT 4 General Occupied',
    'Lot 4 Gen Cap': 'LOT 4 General Spaces',
    'Lot 4 Gen%': 'LOT 4 General % Capacity',
    'Lot 5 Occ': 'LOT 5 Occupied',
    'Lot 5 Cap': 'LOT 5 Spaces',
    'Lot 5%': 'LOT 5 % Capacity',
    'Lot 5 Val Occ': 'LOT 5 Valet Occupied',
    'Lot 5 Val Cap': 'LOT 5 Valet Spaces',
    'Lot 5 Val%': 'LOT 5 Valet % Capacity',
    'Lot 6 Occ': 'LOT 6 Occupied',
    'Lot 6 Cap': 'LOT 6 Spaces',
    'Lot 6%': 'LOT 6 % Capacity',
    'Total Facility Occ': 'Total Facility Occupied',
    'Total Facility Cap': 'Total Facility Spaces',
    'Total Facility%': 'Total Facility % Capacity'
}

# === Apply mapping to df2 ===
df2 = df2.rename(columns=column_map_2025)

# === Add missing columns to df (old data) with default 0s for Valet lots ===
missing_cols = [
    'LOT 2 Valet Spaces', 'LOT 2 Valet Occupied', 'LOT 2 Valet % Capacity',
    'LOT 5 Valet Spaces', 'LOT 5 Valet Occupied', 'LOT 5 Valet % Capacity',
    'Total Facility Occupied', 'Total Facility Spaces', 'Total Facility % Capacity'
]

for col in missing_cols:
    if col not in df.columns:
        df[col] = 0

# === Align columns between both dataframes ===
all_cols = sorted(set(df.columns).union(df2.columns))
df = df.reindex(columns=all_cols)
df2 = df2.reindex(columns=all_cols)

# === Combine ===
combined_df = pd.concat([df, df2], ignore_index=True)

# === Clean up ===
combined_df = combined_df.sort_values(by="Date").reset_index(drop=True)

print(f"✅ Combined DataFrame shape: {combined_df.shape}")
print("✅ Columns aligned and merged successfully.")


✅ Combined DataFrame shape: (1215, 42)
✅ Columns aligned and merged successfully.


In [226]:
combined_df.tail()

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 General Vacancies,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Premium Vacancies,LOT 2 Valet % Capacity,LOT 2 Valet Occupied,LOT 2 Valet Spaces,LOT 3 % Capacity,LOT 3 Occupied,LOT 3 Spaces,LOT 3 Vacancies,LOT 4 General % Capacity,LOT 4 General Occupied,LOT 4 General Spaces,LOT 4 General Vacancies,LOT 4 Premium % Capacity,LOT 4 Premium Occupied,LOT 4 Premium Spaces,LOT 4 Premium Vacancies,LOT 5 % Capacity,LOT 5 Occupied,LOT 5 Spaces,LOT 5 Vacancies,LOT 5 Valet % Capacity,LOT 5 Valet Occupied,LOT 5 Valet Spaces,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,LOT 6 Vacancies,Total CC Revenue,Total Cash Revenue,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces,Total Revenue
1210,2025-09-26 00:00:00,Friday,0.635914,772.0,1214,,0.930435,321.0,345,,1.35,27.0,20,0.78104,931.0,1192,,0.677622,969.0,1430,,0.931429,326.0,350,,0.61716,1417.0,2296,,1.15,23.0,20,0.626028,837.0,1337,,,,0.683773,5596,8184,
1211,2025-09-27 00:00:00,Saturday,0.665568,808.0,1214,,0.924638,319.0,345,,1.55,31.0,20,0.797819,951.0,1192,,0.572727,819.0,1430,,0.954286,334.0,350,,0.653746,1501.0,2296,,1.05,21.0,20,0.620045,829.0,1337,,,,0.682063,5582,8184,
1212,2025-09-28 00:00:00,Sunday,0.610379,741.0,1214,,0.794203,274.0,345,,1.0,20.0,20,0.706376,842.0,1192,,0.538462,770.0,1430,,0.925714,324.0,350,,0.58101,1334.0,2296,,0.8,16.0,20,0.54899,734.0,1337,,,,0.615225,5035,8184,
1213,2025-09-29 00:00:00,Monday,0.536244,651.0,1214,,0.953623,329.0,345,,0.3,6.0,20,0.552852,659.0,1192,,0.676923,968.0,1430,,0.957143,335.0,350,,0.470819,1081.0,2296,,0.9,18.0,20,0.510097,682.0,1337,,,,0.577102,4723,8184,
1214,2025-09-30 00:00:00,Tuesday,0.543657,660.0,1214,,0.93913,324.0,345,,0.25,5.0,20,0.521812,622.0,1192,,0.803497,1149.0,1430,,0.931429,326.0,350,,0.394599,906.0,2296,,1.05,21.0,20,0.47644,637.0,1337,,,,0.567571,4645,8184,


In [227]:
combined_df.columns

Index(['Date', 'Day', 'LOT 2 General % Capacity', 'LOT 2 General Occupied',
       'LOT 2 General Spaces', 'LOT 2 General Vacancies',
       'LOT 2 Premium % Capacity', 'LOT 2 Premium Occupied',
       'LOT 2 Premium Spaces', 'LOT 2 Premium Vacancies',
       'LOT 2 Valet % Capacity', 'LOT 2 Valet Occupied', 'LOT 2 Valet Spaces',
       'LOT 3 % Capacity', 'LOT 3 Occupied', 'LOT 3 Spaces', 'LOT 3 Vacancies',
       'LOT 4 General % Capacity', 'LOT 4 General Occupied',
       'LOT 4 General Spaces', 'LOT 4 General Vacancies',
       'LOT 4 Premium % Capacity', 'LOT 4 Premium Occupied',
       'LOT 4 Premium Spaces', 'LOT 4 Premium Vacancies', 'LOT 5 % Capacity',
       'LOT 5 Occupied', 'LOT 5 Spaces', 'LOT 5 Vacancies',
       'LOT 5 Valet % Capacity', 'LOT 5 Valet Occupied', 'LOT 5 Valet Spaces',
       'LOT 6 % Capacity', 'LOT 6 Occupied', 'LOT 6 Spaces', 'LOT 6 Vacancies',
       'Total CC Revenue', 'Total Cash Revenue', 'Total Facility % Capacity',
       'Total Facility Occupied',

In [230]:
# === Drop all revenue columns ===
revenue_cols = [col for col in combined_df.columns if 'Revenue' in col]
combined_df = combined_df.drop(columns=revenue_cols, errors='ignore')

# === Rename "Valet" → "Value" in all column names ===
combined_df.columns = combined_df.columns.str.replace('Valet', 'Value', regex=False)

# === Verify ===
print("✅ Cleaned columns:")
print(combined_df.columns.tolist())

✅ Cleaned columns:
['Date', 'Day', 'LOT 2 General % Capacity', 'LOT 2 General Occupied', 'LOT 2 General Spaces', 'LOT 2 General Vacancies', 'LOT 2 Premium % Capacity', 'LOT 2 Premium Occupied', 'LOT 2 Premium Spaces', 'LOT 2 Premium Vacancies', 'LOT 2 Value % Capacity', 'LOT 2 Value Occupied', 'LOT 2 Value Spaces', 'LOT 3 % Capacity', 'LOT 3 Occupied', 'LOT 3 Spaces', 'LOT 3 Vacancies', 'LOT 4 General % Capacity', 'LOT 4 General Occupied', 'LOT 4 General Spaces', 'LOT 4 General Vacancies', 'LOT 4 Premium % Capacity', 'LOT 4 Premium Occupied', 'LOT 4 Premium Spaces', 'LOT 4 Premium Vacancies', 'LOT 5 % Capacity', 'LOT 5 Occupied', 'LOT 5 Spaces', 'LOT 5 Vacancies', 'LOT 5 Value % Capacity', 'LOT 5 Value Occupied', 'LOT 5 Value Spaces', 'LOT 6 % Capacity', 'LOT 6 Occupied', 'LOT 6 Spaces', 'LOT 6 Vacancies', 'Total Facility % Capacity', 'Total Facility Occupied', 'Total Facility Spaces']


In [231]:
combined_df.shape

(1215, 39)

In [233]:
combined_df[80:90]

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 General Vacancies,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Premium Vacancies,LOT 2 Value % Capacity,LOT 2 Value Occupied,LOT 2 Value Spaces,LOT 3 % Capacity,LOT 3 Occupied,LOT 3 Spaces,LOT 3 Vacancies,LOT 4 General % Capacity,LOT 4 General Occupied,LOT 4 General Spaces,LOT 4 General Vacancies,LOT 4 Premium % Capacity,LOT 4 Premium Occupied,LOT 4 Premium Spaces,LOT 4 Premium Vacancies,LOT 5 % Capacity,LOT 5 Occupied,LOT 5 Spaces,LOT 5 Vacancies,LOT 5 Value % Capacity,LOT 5 Value Occupied,LOT 5 Value Spaces,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,LOT 6 Vacancies,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces
80,2020-07-23,Thursday,0.191961,234.0,1219,985.0,0.098765,32.0,324,292.0,0.0,0.0,0,0.258389,308.0,1192,884.0,0.408392,584.0,1430,846.0,0.434659,153.0,352,199.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
81,2020-07-24,Friday,0.224774,274.0,1219,945.0,0.101852,33.0,324,291.0,0.0,0.0,0,0.236577,282.0,1192,910.0,0.427273,611.0,1430,819.0,0.517045,182.0,352,170.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
82,2020-07-25,Saturday,0.241181,294.0,1219,925.0,0.101852,33.0,324,291.0,0.0,0.0,0,0.282718,337.0,1192,855.0,0.446853,639.0,1430,791.0,0.528409,186.0,352,166.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
83,2020-07-26,Sunday,0.193601,236.0,1219,983.0,0.070988,23.0,324,301.0,0.0,0.0,0,0.199664,238.0,1192,954.0,0.355245,508.0,1430,922.0,0.357955,126.0,352,226.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
84,2020-07-27,Monday,0.173093,211.0,1219,1008.0,0.074074,24.0,324,300.0,0.0,0.0,0,0.185403,221.0,1192,971.0,0.317483,454.0,1430,976.0,0.400568,141.0,352,211.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
85,2020-07-28,Tuesday,0.153404,187.0,1219,1032.0,0.07716,25.0,324,299.0,0.0,0.0,0,0.165268,197.0,1192,995.0,0.414685,593.0,1430,837.0,0.4375,154.0,352,198.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
86,2020-07-29,Wednesday,0.169811,207.0,1219,1012.0,0.089506,29.0,324,295.0,0.0,0.0,0,0.197148,235.0,1192,957.0,0.41049,587.0,1430,843.0,0.411932,145.0,352,207.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
87,2020-07-30,Thursday,0.169811,207.0,1219,1012.0,0.089506,29.0,324,295.0,0.0,0.0,0,0.197148,235.0,1192,957.0,0.408392,584.0,1430,846.0,0.411932,145.0,352,207.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
88,2020-07-31,Friday,0.219852,268.0,1219,951.0,0.101852,33.0,324,291.0,0.0,0.0,0,0.246644,294.0,1192,898.0,0.455245,651.0,1430,779.0,0.4375,154.0,352,198.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0
89,2020-08-01,Saturday,0.210829,257.0,1219,962.0,0.089506,29.0,324,295.0,0.0,0.0,0,0.288591,344.0,1192,848.0,0.428671,613.0,1430,817.0,0.454545,160.0,352,192.0,0.0,0.0,2200,2200.0,0.0,0.0,0,-1.0,-1.0,-1,-1.0,0.0,0,0


In [236]:
import numpy as np
#redo totals

# Work only on first 1062 rows
df_subset = df.iloc[:1062].copy()

# --- Identify the relevant columns ---
lot_cols = {
    2: {
        'occ': ['LOT 2 General Occupied', 'LOT 2 Premium Occupied'],
        'spaces': ['LOT 2 General Spaces', 'LOT 2 Premium Spaces']
    },
    3: {
        'occ': ['LOT 3 Occupied'],
        'spaces': ['LOT 3 Spaces']
    },
    4: {
        'occ': ['LOT 4 General Occupied', 'LOT 4 Premium Occupied'],
        'spaces': ['LOT 4 General Spaces', 'LOT 4 Premium Spaces']
    },
    5: {
        'occ': ['LOT 5 Occupied'],
        'spaces': ['LOT 5 Spaces']
    },
    6: {
        'occ': ['LOT 6 Occupied'],
        'spaces': ['LOT 6 Spaces']
    }
}

# --- Sum all lots except Lot 6 first ---
total_occ = pd.Series(0, index=df_subset.index)
total_spaces = pd.Series(0, index=df_subset.index)

for lot in [2, 3, 4, 5]:
    occ_cols = [c for c in lot_cols[lot]['occ'] if c in df_subset.columns]
    space_cols = [c for c in lot_cols[lot]['spaces'] if c in df_subset.columns]
    total_occ += df_subset[occ_cols].fillna(0).sum(axis=1)
    total_spaces += df_subset[space_cols].fillna(0).sum(axis=1)

# --- Handle Lot 6 separately: ignore if -1 ---
if all(c in df_subset.columns for c in ['LOT 6 Occupied', 'LOT 6 Spaces']):
    lot6_occ = df_subset['LOT 6 Occupied']
    lot6_spaces = df_subset['LOT 6 Spaces']

    mask_valid = (lot6_occ != -1) & (lot6_spaces != -1)
    total_occ += lot6_occ.where(mask_valid, 0)
    total_spaces += lot6_spaces.where(mask_valid, 0)

# --- Compute Total Facility % Capacity ---
df_subset['Total Facility Occupied'] = total_occ
df_subset['Total Facility Spaces'] = total_spaces
df_subset['Total Facility % Capacity'] = np.where(
    total_spaces > 0,
    (total_occ / total_spaces) * 100,
    np.nan
).round(2)

# --- Preview ---
print(df_subset[['Date', 'Total Facility Occupied', 'Total Facility Spaces', 'Total Facility % Capacity']][80:90])


          Date  Total Facility Occupied  Total Facility Spaces  \
80  2020-07-23                   1311.0                   6717   
81  2020-07-24                   1382.0                   6717   
82  2020-07-25                   1489.0                   6717   
83  2020-07-26                   1131.0                   6717   
84  2020-07-27                   1051.0                   6717   
85  2020-07-28                   1156.0                   6717   
86  2020-07-29                   1203.0                   6717   
87  2020-07-30                   1200.0                   6717   
88  2020-07-31                   1400.0                   6717   
89  2020-08-01                   1403.0                   6717   

    Total Facility % Capacity  
80                      19.52  
81                      20.57  
82                      22.17  
83                      16.84  
84                      15.65  
85                      17.21  
86                      17.91  
87               

In [238]:
# --- Update the original combined_df with new totals ---
combined_df.loc[:1061, 'Total Facility Occupied'] = df_subset['Total Facility Occupied'].values
combined_df.loc[:1061, 'Total Facility Spaces'] = df_subset['Total Facility Spaces'].values
combined_df.loc[:1061, 'Total Facility % Capacity'] = df_subset['Total Facility % Capacity'].values


In [240]:
# Drop all columns containing 'Vacancies' in combined_df
combined_df = combined_df.drop(
    columns=[col for col in combined_df.columns if 'Vacancies' in col],
    errors='ignore'
)

# Verify
print("✅ Columns after dropping 'Vacancies':")
print(combined_df.columns.tolist())

✅ Columns after dropping 'Vacancies':
['Date', 'Day', 'LOT 2 General % Capacity', 'LOT 2 General Occupied', 'LOT 2 General Spaces', 'LOT 2 Premium % Capacity', 'LOT 2 Premium Occupied', 'LOT 2 Premium Spaces', 'LOT 2 Value % Capacity', 'LOT 2 Value Occupied', 'LOT 2 Value Spaces', 'LOT 3 % Capacity', 'LOT 3 Occupied', 'LOT 3 Spaces', 'LOT 4 General % Capacity', 'LOT 4 General Occupied', 'LOT 4 General Spaces', 'LOT 4 Premium % Capacity', 'LOT 4 Premium Occupied', 'LOT 4 Premium Spaces', 'LOT 5 % Capacity', 'LOT 5 Occupied', 'LOT 5 Spaces', 'LOT 5 Value % Capacity', 'LOT 5 Value Occupied', 'LOT 5 Value Spaces', 'LOT 6 % Capacity', 'LOT 6 Occupied', 'LOT 6 Spaces', 'Total Facility % Capacity', 'Total Facility Occupied', 'Total Facility Spaces']


In [242]:
combined_df[80:90]

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Value % Capacity,LOT 2 Value Occupied,LOT 2 Value Spaces,LOT 3 % Capacity,LOT 3 Occupied,LOT 3 Spaces,LOT 4 General % Capacity,LOT 4 General Occupied,LOT 4 General Spaces,LOT 4 Premium % Capacity,LOT 4 Premium Occupied,LOT 4 Premium Spaces,LOT 5 % Capacity,LOT 5 Occupied,LOT 5 Spaces,LOT 5 Value % Capacity,LOT 5 Value Occupied,LOT 5 Value Spaces,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces
80,2020-07-23,Thursday,0.191961,234.0,1219,0.098765,32.0,324,0.0,0.0,0,0.258389,308.0,1192,0.408392,584.0,1430,0.434659,153.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,19.52,1311,6717
81,2020-07-24,Friday,0.224774,274.0,1219,0.101852,33.0,324,0.0,0.0,0,0.236577,282.0,1192,0.427273,611.0,1430,0.517045,182.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,20.57,1382,6717
82,2020-07-25,Saturday,0.241181,294.0,1219,0.101852,33.0,324,0.0,0.0,0,0.282718,337.0,1192,0.446853,639.0,1430,0.528409,186.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,22.17,1489,6717
83,2020-07-26,Sunday,0.193601,236.0,1219,0.070988,23.0,324,0.0,0.0,0,0.199664,238.0,1192,0.355245,508.0,1430,0.357955,126.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,16.84,1131,6717
84,2020-07-27,Monday,0.173093,211.0,1219,0.074074,24.0,324,0.0,0.0,0,0.185403,221.0,1192,0.317483,454.0,1430,0.400568,141.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,15.65,1051,6717
85,2020-07-28,Tuesday,0.153404,187.0,1219,0.07716,25.0,324,0.0,0.0,0,0.165268,197.0,1192,0.414685,593.0,1430,0.4375,154.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,17.21,1156,6717
86,2020-07-29,Wednesday,0.169811,207.0,1219,0.089506,29.0,324,0.0,0.0,0,0.197148,235.0,1192,0.41049,587.0,1430,0.411932,145.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,17.91,1203,6717
87,2020-07-30,Thursday,0.169811,207.0,1219,0.089506,29.0,324,0.0,0.0,0,0.197148,235.0,1192,0.408392,584.0,1430,0.411932,145.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,17.87,1200,6717
88,2020-07-31,Friday,0.219852,268.0,1219,0.101852,33.0,324,0.0,0.0,0,0.246644,294.0,1192,0.455245,651.0,1430,0.4375,154.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,20.84,1400,6717
89,2020-08-01,Saturday,0.210829,257.0,1219,0.089506,29.0,324,0.0,0.0,0,0.288591,344.0,1192,0.428671,613.0,1430,0.454545,160.0,352,0.0,0.0,2200,0.0,0.0,0,-1.0,-1.0,-1,20.89,1403,6717


In [258]:
combined_df[1200:1210]

Unnamed: 0,Date,Day,LOT 2 General % Capacity,LOT 2 General Occupied,LOT 2 General Spaces,LOT 2 Premium % Capacity,LOT 2 Premium Occupied,LOT 2 Premium Spaces,LOT 2 Value % Capacity,LOT 2 Value Occupied,LOT 2 Value Spaces,LOT 3 % Capacity,LOT 3 Occupied,LOT 3 Spaces,LOT 4 General % Capacity,LOT 4 General Occupied,LOT 4 General Spaces,LOT 4 Premium % Capacity,LOT 4 Premium Occupied,LOT 4 Premium Spaces,LOT 5 % Capacity,LOT 5 Occupied,LOT 5 Spaces,LOT 5 Value % Capacity,LOT 5 Value Occupied,LOT 5 Value Spaces,LOT 6 % Capacity,LOT 6 Occupied,LOT 6 Spaces,Total Facility % Capacity,Total Facility Occupied,Total Facility Spaces
1200,2025-09-16 00:00:00,Tuesday,0.651565,791.0,1214,0.915942,316.0,345,0.5,10.0,20,0.58557,698.0,1192,0.854545,1222.0,1430,0.94,329.0,350,0.456882,1049.0,2296,0.9,18.0,20,0.474944,635.0,1337,0.618035,5058,8184
1201,2025-09-17 00:00:00,Wednesday,0.632619,768.0,1214,0.904348,312.0,345,0.5,10.0,20,0.616611,735.0,1192,0.862937,1234.0,1430,0.951429,333.0,350,0.454704,1044.0,2296,0.8,16.0,20,0.474196,634.0,1337,0.620235,5076,8184
1202,2025-09-18 00:00:00,Thursday,0.649918,789.0,1214,0.907246,313.0,345,1.0,20.0,20,0.669463,798.0,1192,0.817483,1169.0,1430,0.965714,338.0,350,0.514373,1181.0,2296,1.1,22.0,20,0.525804,703.0,1337,0.649194,5313,8184
1203,2025-09-19 00:00:00,Friday,0.663097,805.0,1214,0.837681,289.0,345,1.1,22.0,20,0.717282,855.0,1192,0.636364,910.0,1430,0.94,329.0,350,0.573171,1316.0,2296,1.35,27.0,20,0.569185,761.0,1337,0.646628,5292,8184
1204,2025-09-20 00:00:00,Saturday,0.669687,813.0,1214,0.875362,302.0,345,1.4,28.0,20,0.760067,906.0,1192,0.567133,811.0,1430,0.954286,334.0,350,0.603659,1386.0,2296,1.15,23.0,20,0.569933,762.0,1337,0.652126,5337,8184
1205,2025-09-21 00:00:00,Sunday,0.607084,737.0,1214,0.785507,271.0,345,1.2,24.0,20,0.658557,785.0,1192,0.520979,745.0,1430,0.871429,305.0,350,0.549216,1261.0,2296,0.9,18.0,20,0.540763,723.0,1337,0.592009,4845,8184
1206,2025-09-22 00:00:00,Monday,0.595552,723.0,1214,0.875362,302.0,345,0.85,17.0,20,0.557047,664.0,1192,0.654545,936.0,1430,0.96,336.0,350,0.466899,1072.0,2296,0.65,13.0,20,0.492147,658.0,1337,0.57478,4704,8184
1207,2025-09-23 00:00:00,Tuesday,0.616145,748.0,1214,0.898551,310.0,345,0.7,14.0,20,0.525168,626.0,1192,0.781119,1117.0,1430,0.957143,335.0,350,0.44338,1018.0,2296,0.85,17.0,20,0.466717,624.0,1337,0.585899,4795,8184
1208,2025-09-24 00:00:00,Wednesday,0.64827,787.0,1214,0.921739,318.0,345,0.8,16.0,20,0.594799,709.0,1192,0.820979,1174.0,1430,0.968571,339.0,350,0.462979,1063.0,2296,1.05,21.0,20,0.537771,719.0,1337,0.626833,5130,8184
1209,2025-09-25 00:00:00,Thursday,0.603789,733.0,1214,0.907246,313.0,345,0.85,17.0,20,0.698826,833.0,1192,0.804895,1151.0,1430,0.96,336.0,350,0.519164,1192.0,2296,1.2,24.0,20,0.57442,768.0,1337,0.653715,5350,8184


# Export data before EDA

In [87]:
combined_df.to_csv("COMBINEDparkingdata.csv", index=False)

In [246]:
combined_df['LOT 2 General Spaces'].unique()

array([1219, 1214], dtype=int64)

In [248]:
import numpy as np

# --- Select occupancy columns ---
occ_cols = [col for col in combined_df.columns if "Occupied" in col]

# --- Create a clean copy ---
df_clean = combined_df[occ_cols].copy()

# --- Mask out invalid rows ---
for occ_col in occ_cols:
    # Corresponding total capacity column
    space_col = occ_col.replace("Occupied", "Spaces")
    
    if space_col in combined_df.columns:
        # Keep only rows where occupancy != -1 AND capacity != 0
        df_clean[occ_col] = df_clean[occ_col].where(
            (combined_df[occ_col] != -1) & (combined_df[space_col] != 0)
        )

# --- Calculate averages ignoring NaNs ---
average_occupancies = df_clean.mean()

# --- Display results ---
print("Average Occupancies per Lot in combined_df (ignoring -1 and 0 capacity rows):")
for lot, avg in average_occupancies.items():
    print(f"{lot}: {avg:.2f} spaces")


Average Occupancies per Lot in combined_df (ignoring -1 and 0 capacity rows):
LOT 2 General Occupied: 579.91 spaces
LOT 2 Premium Occupied: 135.79 spaces
LOT 2 Value Occupied: 13.58 spaces
LOT 3 Occupied: 635.17 spaces
LOT 4 General Occupied: 763.11 spaces
LOT 4 Premium Occupied: 265.38 spaces
LOT 5 Occupied: 700.97 spaces
LOT 5 Value Occupied: 15.95 spaces
LOT 6 Occupied: 668.97 spaces
Total Facility Occupied: 3246.62 spaces


In [250]:
import pandas as pd
import numpy as np

# --- Select occupancy columns ---
occ_cols = [col for col in combined_df.columns if "Occupied" in col]

# --- Create a clean copy for occupancy and percentage ---
df_clean = combined_df[occ_cols].copy()
df_percent = pd.DataFrame()  # To store percentages

# --- Mask out invalid rows and calculate percentage ---
for occ_col in occ_cols:
    space_col = occ_col.replace("Occupied", "Spaces")
    
    if space_col in combined_df.columns:
        # Mask invalid rows
        valid_mask = (combined_df[occ_col] != -1) & (combined_df[space_col] != 0)
        df_clean[occ_col] = combined_df[occ_col].where(valid_mask)
        
        # Calculate percentage occupancy
        df_percent[occ_col.replace("Occupied", "%")] = df_clean[occ_col] / combined_df[space_col] * 100

# --- Calculate averages ignoring NaNs ---
average_occupancies = df_clean.mean()
average_percentages = df_percent.mean()

# --- Display results ---
print("Average Occupancies and Percentages per Lot:")
for occ_col in occ_cols:
    space_col = occ_col.replace("Occupied", "Spaces")
    percent_col = occ_col.replace("Occupied", "%")
    
    avg_occ = average_occupancies[occ_col]
    avg_pct = average_percentages[percent_col]
    total_spaces = combined_df[space_col].replace(0, np.nan).mean()  # Avoid 0 capacity affecting average
    
    print(f"{occ_col}: {avg_occ:.2f} spaces, {total_spaces:.2f} spaces total, {avg_pct:.2f}% occupancy")


Average Occupancies and Percentages per Lot:
LOT 2 General Occupied: 579.91 spaces, 1218.37 spaces total, 47.61% occupancy
LOT 2 Premium Occupied: 135.79 spaces, 326.64 spaces total, 41.25% occupancy
LOT 2 Value Occupied: 13.58 spaces, 20.00 spaces total, 67.89% occupancy
LOT 3 Occupied: 635.17 spaces, 1192.00 spaces total, 53.29% occupancy
LOT 4 General Occupied: 763.11 spaces, 1430.00 spaces total, 53.36% occupancy
LOT 4 Premium Occupied: 265.38 spaces, 351.75 spaces total, 75.46% occupancy
LOT 5 Occupied: 700.97 spaces, 2212.09 spaces total, 31.59% occupancy
LOT 5 Value Occupied: 15.95 spaces, 20.00 spaces total, 79.77% occupancy
LOT 6 Occupied: 668.97 spaces, 333.78 spaces total, 50.04% occupancy
Total Facility Occupied: 3246.62 spaces, 7067.90 spaces total, 45.42% occupancy


In [274]:
import pandas as pd
import numpy as np

df = combined_df.copy()

# --- Convert Date column robustly ---
def parse_date(x):
    try:
        # Convert floats, ints, or strings to datetime
        return pd.to_datetime(str(x).strip())
    except:
        return pd.NaT

df['Date'] = df['Date'].apply(parse_date)

# --- Check that 2025 rows are parsed ---
print(df[df['Date'].dt.year == 2025].head())

# --- Filter rows after May 1, 2025 ---
df_filtered = df[df['Date'] > pd.Timestamp('2025-05-01')]
print(f"Rows after May 1, 2025: {len(df_filtered)}")

# --- Occupancy calculations ---
occ_cols = [col for col in df_filtered.columns if "Occupied" in col]
summary = []

for occ_col in occ_cols:
    space_col = occ_col.replace("Occupied", "Spaces")
    if space_col not in df_filtered.columns:
        continue

    valid_rows = df_filtered[(df_filtered[occ_col] != -1) & (df_filtered[space_col] > 0)]

    if valid_rows.empty:
        summary.append({
            "Lot": occ_col,
            "Average Occupancy": np.nan,
            "Average Spaces": np.nan,
            "Average %": np.nan
        })
        continue

    avg_occ = valid_rows[occ_col].mean()
    avg_spaces = valid_rows[space_col].mean()
    avg_pct = (valid_rows[occ_col] / valid_rows[space_col] * 100).mean()

    summary.append({
        "Lot": occ_col,
        "Average Occupancy": avg_occ,
        "Average Spaces": avg_spaces,
        "Average %": avg_pct
    })

summary_df = pd.DataFrame(summary)
print(summary_df)


           Date       Day  LOT 2 General % Capacity  LOT 2 General Occupied  \
1062 2025-05-01  Thursday                  0.725700                   881.0   
1063 2025-05-02    Friday                  0.820428                   996.0   
1064 2025-05-03  Saturday                  0.850906                  1033.0   
1065 2025-05-04    Sunday                  0.762768                   926.0   
1066 2025-05-05    Monday                  0.676277                   821.0   

      LOT 2 General Spaces  LOT 2 Premium % Capacity  LOT 2 Premium Occupied  \
1062                  1214                  0.817391                   282.0   
1063                  1214                  0.753623                   260.0   
1064                  1214                  0.730435                   252.0   
1065                  1214                  0.721739                   249.0   
1066                  1214                  0.831884                   287.0   

      LOT 2 Premium Spaces  LOT 2 Value % Ca

In [391]:
import pandas as pd

# Assume your dataframe is called combined_df
# Keep only relevant columns
cols_to_keep = [
    'Date', 'Day',
    'LOT 2 General % Capacity','LOT 2 General Occupied','LOT 2 General Spaces',
    'LOT 2 Premium % Capacity','LOT 2 Premium Occupied','LOT 2 Premium Spaces',
    'LOT 2 Value % Capacity','LOT 2 Value Occupied','LOT 2 Value Spaces',
    'LOT 3 % Capacity','LOT 3 Occupied','LOT 3 Spaces',
    'LOT 4 General % Capacity','LOT 4 General Occupied','LOT 4 General Spaces',
    'LOT 4 Premium % Capacity','LOT 4 Premium Occupied','LOT 4 Premium Spaces',
    'LOT 5 % Capacity','LOT 5 Occupied','LOT 5 Spaces',
    'LOT 5 Value % Capacity','LOT 5 Value Occupied','LOT 5 Value Spaces',
    'LOT 6 % Capacity','LOT 6 Occupied','LOT 6 Spaces'
]

df = combined_df[cols_to_keep]

# Define a mapping of lots to their three related columns
lots_mapping = {
    'LOT 2 General': ['LOT 2 General % Capacity', 'LOT 2 General Occupied', 'LOT 2 General Spaces'],
    'LOT 2 Premium': ['LOT 2 Premium % Capacity', 'LOT 2 Premium Occupied', 'LOT 2 Premium Spaces'],
    'LOT 2 Value': ['LOT 2 Value % Capacity', 'LOT 2 Value Occupied', 'LOT 2 Value Spaces'],
    'LOT 3': ['LOT 3 % Capacity', 'LOT 3 Occupied', 'LOT 3 Spaces'],
    'LOT 4 General': ['LOT 4 General % Capacity', 'LOT 4 General Occupied', 'LOT 4 General Spaces'],
    'LOT 4 Premium': ['LOT 4 Premium % Capacity', 'LOT 4 Premium Occupied', 'LOT 4 Premium Spaces'],
    'LOT 5': ['LOT 5 % Capacity', 'LOT 5 Occupied', 'LOT 5 Spaces'],
    'LOT 5 Value': ['LOT 5 Value % Capacity', 'LOT 5 Value Occupied', 'LOT 5 Value Spaces'],
    'LOT 6': ['LOT 6 % Capacity', 'LOT 6 Occupied', 'LOT 6 Spaces']
}

# Create a list to store the new rows
long_data = []

# Loop through each row and each lot to reshape
for _, row in df.iterrows():
    for lot_name, columns in lots_mapping.items():
        long_data.append({
            'Date': row['Date'],
            'Day': row['Day'],
            'Lot': lot_name,
            '% Capacity': row[columns[0]],
            'Occupied': row[columns[1]],
            'Spaces': row[columns[2]]
        })

# Create new long dataframe
long_df = pd.DataFrame(long_data)

# Optional: save to CSV for Tableau
long_df.to_csv('lot_long_format.csv', index=False)

print(long_df[9558:9568])


long_df.to_csv('past5parkingdatalong.csv', index=False)

            Date       Day            Lot  % Capacity  Occupied  Spaces
9558  2025-05-01  Thursday  LOT 2 General    0.725700     881.0    1214
9559  2025-05-01  Thursday  LOT 2 Premium    0.817391     282.0     345
9560  2025-05-01  Thursday    LOT 2 Value    0.100000       2.0      20
9561  2025-05-01  Thursday          LOT 3    0.557886     665.0    1192
9562  2025-05-01  Thursday  LOT 4 General    0.920280    1316.0    1430
9563  2025-05-01  Thursday  LOT 4 Premium    0.951429     333.0     350
9564  2025-05-01  Thursday          LOT 5    0.450348    1034.0    2296
9565  2025-05-01  Thursday    LOT 5 Value    0.600000      12.0      20
9566  2025-05-01  Thursday          LOT 6    0.537771     719.0    1337
9567  2025-05-02    Friday  LOT 2 General    0.820428     996.0    1214
