# Noise Complaints Associated With Areas That Have Liquor Licenses

This notebook analyzes the relationship between liquor licenses and noise complaints in NYC.

## 1. Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 2. Load Data

In [None]:
# Define data paths
data_dir = Path('../data/raw')
liquor_license_file = data_dir / 'sla_active.csv'
noise_complaints_file = data_dir / '311_noise.csv'

# Load liquor license data
print("Loading liquor license data...")
liquor_df = pd.read_csv(liquor_license_file)
print(f"Loaded {len(liquor_df)} liquor license records")

# Load noise complaint data (if available)
if noise_complaints_file.exists():
    print("\nLoading noise complaint data...")
    noise_df = pd.read_csv(noise_complaints_file)
    print(f"Loaded {len(noise_df)} noise complaint records")
else:
    print("\nNote: 311_noise.csv not found. Please download NYC 311 noise complaint data.")
    print("Data source: https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9")
    noise_df = None

## 3. Explore Liquor License Data

In [None]:
# Display basic information
print("Liquor License Data Structure:")
print(liquor_df.info())
print("\nFirst few records:")
liquor_df.head()

In [None]:
# Check for missing values
print("Missing Values:")
missing = liquor_df.isnull().sum()
print(missing[missing > 0])

In [None]:
# Summary statistics
print("Summary Statistics:")
liquor_df.describe()

## 4. Data Preprocessing

In [None]:
# Parse georeference data to extract latitude and longitude
def extract_coordinates(georeference):
    """Extract latitude and longitude from POINT string."""
    if pd.isna(georeference):
        return pd.Series({'longitude': None, 'latitude': None})
    try:
        # Remove 'POINT (' and ')' and split
        coords = georeference.replace('POINT (', '').replace(')', '').split()
        return pd.Series({'longitude': float(coords[0]), 'latitude': float(coords[1])})
    except:
        return pd.Series({'longitude': None, 'latitude': None})

# Extract coordinates
liquor_df[['longitude', 'latitude']] = liquor_df['Georeference'].apply(extract_coordinates)

# Convert date columns to datetime
date_columns = ['Original Issue Date', 'Last Issue Date', 'Effective Date', 'Expiration Date']
for col in date_columns:
    liquor_df[col] = pd.to_datetime(liquor_df[col], errors='coerce')

print("Preprocessed liquor license data:")
print(f"Records with valid coordinates: {liquor_df[['longitude', 'latitude']].notna().all(axis=1).sum()}")

## 5. Analyze Liquor License Distribution

In [None]:
# Distribution by county
print("Liquor Licenses by County:")
county_counts = liquor_df['Premises County'].value_counts()
print(county_counts)

# Visualize
plt.figure(figsize=(10, 6))
county_counts.plot(kind='bar')
plt.title('Distribution of Liquor Licenses by County')
plt.xlabel('County')
plt.ylabel('Number of Licenses')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Distribution by type
print("\nLiquor Licenses by Type:")
type_counts = liquor_df['Description'].value_counts()
print(type_counts)

# Visualize top 10 types
plt.figure(figsize=(12, 6))
type_counts.head(10).plot(kind='barh')
plt.title('Top 10 License Types')
plt.xlabel('Number of Licenses')
plt.ylabel('License Type')
plt.tight_layout()
plt.show()

In [None]:
# Geographic distribution (for Richmond County - Staten Island)
richmond_df = liquor_df[liquor_df['Premises County'] == 'Richmond'].copy()
richmond_valid = richmond_df[richmond_df[['longitude', 'latitude']].notna().all(axis=1)]

if len(richmond_valid) > 0:
    plt.figure(figsize=(12, 10))
    plt.scatter(richmond_valid['longitude'], richmond_valid['latitude'], 
                alpha=0.5, s=20, c='red')
    plt.title('Geographic Distribution of Liquor Licenses in Richmond County (Staten Island)')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    print(f"\nRichmond County: {len(richmond_valid)} licenses with valid coordinates")

## 6. Analyze Noise Complaint Data (if available)

In [None]:
if noise_df is not None:
    print("Noise Complaint Data Structure:")
    print(noise_df.info())
    print("\nFirst few records:")
    display(noise_df.head())
    
    # Filter for noise complaints
    # Typically 311 data has a 'Complaint Type' column
    if 'Complaint Type' in noise_df.columns:
        noise_only = noise_df[noise_df['Complaint Type'].str.contains('Noise', case=False, na=False)]
        print(f"\nFiltered to {len(noise_only)} noise-related complaints")
else:
    print("Noise complaint data not available. Please download from NYC Open Data.")

## 7. Spatial Analysis (if noise data available)

In [None]:
if noise_df is not None:
    # This section would contain spatial analysis code
    # Example: Calculate distance between noise complaints and liquor licenses
    # Example: Create heat maps showing overlap
    # Example: Statistical correlation analysis
    
    print("Spatial analysis code would go here")
    print("This would include:")
    print("- Distance calculations between complaints and licenses")
    print("- Clustering analysis")
    print("- Heat map visualizations")
    print("- Statistical correlation tests")
else:
    print("Spatial analysis requires noise complaint data.")

## 8. Summary and Conclusions

In [None]:
print("Analysis Summary:")
print(f"\nTotal liquor licenses analyzed: {len(liquor_df)}")
print(f"Counties covered: {liquor_df['Premises County'].nunique()}")
print(f"License types: {liquor_df['Description'].nunique()}")
print(f"Records with valid geographic coordinates: {liquor_df[['longitude', 'latitude']].notna().all(axis=1).sum()}")

if noise_df is not None:
    print(f"\nNoise complaints analyzed: {len(noise_df)}")
else:
    print("\nNote: Complete the analysis by adding noise complaint data.")

## Next Steps

1. Download NYC 311 noise complaint data from [NYC Open Data](https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9)
2. Filter for noise-related complaints
3. Perform spatial join to find noise complaints near liquor licenses
4. Analyze temporal patterns (time of day, day of week)
5. Create visualizations showing correlations
6. Generate statistical reports