# Analyze Lot Mapping Duplicates

**Purpose:** Understand why lot_mapping_enhanced.csv has 276 rows but only 191 unique lot numbers.

**Key Question:** Should we keep duplicate lots (one lot serving multiple zones) or consolidate them?

In [2]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
print("Libraries loaded successfully")

Libraries loaded successfully


## Load Enhanced Lot Mapping

In [3]:
enhanced = pd.read_csv('../data/lot_mapping_enhanced.csv')

print(f"Total rows: {len(enhanced)}")
print(f"Unique lot numbers: {enhanced['Lot_number'].nunique()}")
print(f"\nColumn info:")
print(enhanced.info())

Total rows: 187
Unique lot numbers: 186

Column info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 14 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Lot_number                        187 non-null    int64  
 1   Zone_Name                         187 non-null    object 
 2   zone_type                         187 non-null    object 
 3   capacity                          160 non-null    float64
 4   location_description              187 non-null    object 
 5   is_dorm_parking                   187 non-null    int64  
 6   alternative_location_description  49 non-null     object 
 7   Unnamed: 7                        0 non-null      float64
 8   Unnamed: 8                        0 non-null      float64
 9   Unnamed: 9                        0 non-null      float64
 10  Unnamed: 10                       0 non-null      float64
 11  Unnamed: 11      

## Identify Duplicate Lots

In [4]:
# Find lots that appear multiple times
lot_counts = enhanced['Lot_number'].value_counts()
duplicate_lots = lot_counts[lot_counts > 1]

print(f"Lots appearing multiple times: {len(duplicate_lots)}")
print(f"\nTop 20 lots by number of zone assignments:")
print(duplicate_lots.head(20))

# Show examples of duplicate lots
print("\n" + "="*70)
print("EXAMPLES OF LOTS WITH MULTIPLE ZONE ASSIGNMENTS")
print("="*70)

for lot_num in duplicate_lots.head(10).index:
    print(f"\nLOT {lot_num}:")
    lot_data = enhanced[enhanced['Lot_number'] == lot_num]
    print(lot_data[['Lot_number', 'Zone_Name', 'zone_type', 'location_description']].to_string(index=False))

Lots appearing multiple times: 1

Top 20 lots by number of zone assignments:
Lot_number
186    2
Name: count, dtype: int64

EXAMPLES OF LOTS WITH MULTIPLE ZONE ASSIGNMENTS

LOT 186:
 Lot_number Zone_Name zone_type location_description
        186   Green 1    Permit      MCEACHERN NORTH
        186     Red 5    Permit      MCEACHERN NORTH


## Analyze Zone Patterns

In [5]:
# What zone combinations exist?
print("\n" + "="*70)
print("ZONE TYPE DISTRIBUTION")
print("="*70)

print("\nZone types in duplicate lots:")
duplicate_lot_data = enhanced[enhanced['Lot_number'].isin(duplicate_lots.index)]
print(duplicate_lot_data['zone_type'].value_counts())

print("\nZone names in duplicate lots:")
print(duplicate_lot_data['Zone_Name'].value_counts().head(20))


ZONE TYPE DISTRIBUTION

Zone types in duplicate lots:
zone_type
Permit    2
Name: count, dtype: int64

Zone names in duplicate lots:
Zone_Name
Green 1    1
Red 5      1
Name: count, dtype: int64


## Data Quality Issues to Address

In [6]:
print("\n" + "="*70)
print("DATA QUALITY ANALYSIS")
print("="*70)

# Check for lots with no zone assigned
no_zone = enhanced[enhanced['Zone_Name'].isna() | (enhanced['Zone_Name'] == '')]
print(f"\nLots with no Zone_Name assigned: {len(no_zone)}")
print("These are the 128 NEW lots from ticket data that need zone assignment.")

# Check for lots with zone but missing other info
has_zone = enhanced[enhanced['Zone_Name'].notna() & (enhanced['Zone_Name'] != '')]
print(f"\nLots with Zone_Name assigned: {len(has_zone)}")
print(f"  With zone_type: {has_zone['zone_type'].notna().sum()}")
print(f"  With capacity: {has_zone['capacity'].notna().sum()}")
print(f"  With location: {has_zone['location_description'].notna().sum()}")


DATA QUALITY ANALYSIS

Lots with no Zone_Name assigned: 0
These are the 128 NEW lots from ticket data that need zone assignment.

Lots with Zone_Name assigned: 187
  With zone_type: 187
  With capacity: 160
  With location: 187


## Create Primary Zone Mapping 

In [7]:
# For each lot, pick the zone with the most complete data
def select_primary_zone(group):
    """
    When a lot has multiple zone assignments, select the primary one based on:
    1. Has zone_type filled in
    2. Not 'Unknown'
    3. First alphabetically (for consistency)
    """
    if len(group) == 1:
        return group.iloc[0]
    
    # Prefer rows with zone_type
    with_type = group[group['zone_type'].notna() & (group['zone_type'] != '') & (group['zone_type'] != 'Unknown')]
    if len(with_type) > 0:
        # If multiple, pick first alphabetically for consistency
        return with_type.sort_values('Zone_Name').iloc[0]
    
    # Otherwise, just take first
    return group.iloc[0]

# Create consolidated mapping
consolidated = enhanced.groupby('Lot_number', as_index=False).apply(select_primary_zone)
consolidated = consolidated.reset_index(drop=True)

print(f"\n" + "="*70)
print("CONSOLIDATED LOT MAPPING CREATED")
print("="*70)
print(f"\nOriginal rows: {len(enhanced)}")
print(f"Consolidated rows: {len(consolidated)}")
print(f"Unique lot numbers: {consolidated['Lot_number'].nunique()}")

# Show what changed
print(f"\nExample consolidations:")
for lot_num in duplicate_lots.head(5).index:
    print(f"\nLOT {lot_num}:")
    print("  Original:")
    orig = enhanced[enhanced['Lot_number'] == lot_num][['Zone_Name', 'zone_type']]
    print("  " + orig.to_string(index=False, header=False).replace('\n', '\n  '))
    print("  Selected:")
    selected = consolidated[consolidated['Lot_number'] == lot_num][['Zone_Name', 'zone_type']]
    print("  " + selected.to_string(index=False, header=False))


CONSOLIDATED LOT MAPPING CREATED

Original rows: 187
Consolidated rows: 186
Unique lot numbers: 186

Example consolidations:

LOT 186:
  Original:
  Green 1 Permit
    Red 5 Permit
  Selected:
  Green 1 Permit


  consolidated = enhanced.groupby('Lot_number', as_index=False).apply(select_primary_zone)


## Save Consolidated Mapping

In [8]:
# Save consolidated version
consolidated.to_csv('../data/lot_mapping_consolidated.csv', index=False)

print(f"\nSaved: data/lot_mapping_consolidated.csv")
print(f"  {len(consolidated)} unique lots")
print(f"  {consolidated['Zone_Name'].notna().sum()} with zone assignments")
print(f"  {(consolidated['Zone_Name'].isna() | (consolidated['Zone_Name'] == '')).sum()} still need zone assignment")


Saved: data/lot_mapping_consolidated.csv
  186 unique lots
  186 with zone assignments
  0 still need zone assignment
