In [1]:
!pip install contextily
!pip install geopandas
!pip install shapely

Collecting contextily
  Downloading contextily-1.6.2-py3-none-any.whl.metadata (2.9 kB)
Collecting mercantile (from contextily)
  Downloading mercantile-1.2.1-py3-none-any.whl.metadata (4.8 kB)
Collecting rasterio (from contextily)
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio->contextily)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio->contextily)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio->contextily)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading contextily-1.6.2-py3-none-any.whl (17 kB)
Downloading mercantile-1.2.1-py3-none-any.whl (14 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m26.5 MB/s[0m eta [36m0:00:0

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import contextily as ctx
import geopandas as gpd
from shapely.geometry import Point
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from prophet import Prophet

In [3]:
file_paths = {
    2020: '311_2020.csv',
    2021: '311_2021.csv',
    2022: '311_2022.csv',
    2023: '311_2023.csv',
    2024: '311_2024.csv'
}

# --- Load and filter data ---
all_animal_complaints = pd.DataFrame()

for year, file_path in file_paths.items():
    print(f"\nLoading data for {year}...")
    try:
        data = pd.read_csv(file_path)
        # Filter for Animal Control complaints
        animal_complaints = data[data['subject'] == 'Animal Control'].copy()
        animal_complaints['Year'] = pd.to_datetime(animal_complaints['open_dt']).dt.year
        animal_complaints['Month'] = pd.to_datetime(animal_complaints['open_dt']).dt.month
        animal_complaints['year'] = year  # for consistency

        all_animal_complaints = pd.concat([all_animal_complaints, animal_complaints], ignore_index=True)
        print(f"Animal complaints in {year}: {len(animal_complaints)}")
    except Exception as e:
        print(f"Error processing file for {year}: {e}")

if all_animal_complaints.empty:
    print("No animal complaint data found. Exiting.")
    exit()


Loading data for 2020...


  data = pd.read_csv(file_path)


Animal complaints in 2020: 1142

Loading data for 2021...


  data = pd.read_csv(file_path)


Animal complaints in 2021: 991

Loading data for 2022...


  data = pd.read_csv(file_path)


Animal complaints in 2022: 838

Loading data for 2023...
Animal complaints in 2023: 1277

Loading data for 2024...
Animal complaints in 2024: 1245


In [None]:
%pip install prophet



In [None]:
# --- Drop rows with missing coordinates ---
all_animal_complaints = all_animal_complaints.dropna(subset=['latitude', 'longitude'])

In [None]:
# --- KMeans clustering for spatial hotspots ---
coordinates = all_animal_complaints[['latitude', 'longitude']].values
scaler = StandardScaler()
scaled_coordinates = scaler.fit_transform(coordinates)

optimal_k = 5
kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
all_animal_complaints['cluster'] = kmeans.fit_predict(scaled_coordinates)

In [None]:
# --- Hotspot definition by complaint growth ---
past_years = [2020, 2021, 2022]
future_years = [2023, 2024]

cluster_year_counts = all_animal_complaints.groupby(['cluster', 'Year']).size().reset_index(name='count')
counts_pivot = cluster_year_counts.pivot(index='cluster', columns='Year', values='count').fillna(0)
counts_pivot['past_total'] = counts_pivot[past_years].sum(axis=1)
counts_pivot['future_total'] = counts_pivot[future_years].sum(axis=1)
counts_pivot['growth'] = counts_pivot['future_total'] - counts_pivot['past_total']

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:n_changepoints greater than number of observations. Using 3.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpmfbcgsfp/xfki4cs_.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpmfbcgsfp/r5g8xus8.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=4758', 'data', 'file=/tmp/tmpmfbcgsfp/xfki4cs_.json', 'init=/tmp/tmpmfbcgsfp/r5g8xus8.json', 'output', 'file=/tmp/tmpmfbcgsfp/prophet_model626fztz5/prophet_model-20250430225429.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
22:54:29 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
22:54:30 - cmdstanpy - INFO - Chain [1] done processing
INFO:cm

In [None]:

# Label as hotspot: top 1 cluster by growth
hot_clusters = counts_pivot.sort_values('growth', ascending=False).head(1).index.tolist()
all_animal_complaints['is_hotspot'] = all_animal_complaints['cluster'].isin(hot_clusters).astype(int)

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:prophet:n_changepoints greater than number of observations. Using 3.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpmfbcgsfp/v3633p3t.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpmfbcgsfp/so3_76it.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.11/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=78867', 'data', 'file=/tmp/tmpmfbcgsfp/v3633p3t.json', 'init=/tmp/tmpmfbcgsfp/so3_76it.json', 'output', 'file=/tmp/tmpmfbcgsfp/prophet_modelun5df9yw/prophet_model-20250430225442.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
22:54:42 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
22:54:42 - cmdstanpy - INFO - Chain [1] done processing
INFO:c

In [None]:
# --- Prepare ML data (leakage-free) ---
train_data = all_animal_complaints[all_animal_complaints['Year'].isin(past_years)]
test_data = all_animal_complaints[all_animal_complaints['Year'].isin(future_years)]

features = ['latitude', 'longitude', 'Year']
X_train = train_data[features]
y_train = train_data['is_hotspot']
X_test = test_data[features]
y_test = test_data['is_hotspot']

In [None]:

# --- Logistic Regression for hotspot prediction ---
lr = LogisticRegression(max_iter=200, random_state=42)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print("\nClassification Report on Future-Year Hotspot Prediction (Logistic Regression):")
print(classification_report(y_test, y_pred))

In [None]:
# --- Seasonal analysis: volume by month ---
monthly_counts = all_animal_complaints.groupby('Month').size()
plt.figure(figsize=(10, 6))
monthly_counts.plot(kind='line', marker='o')
plt.title('Seasonal Trends in Animal Complaints (2020-2024)')
plt.xlabel('Month')
plt.ylabel('Number of Complaints')
plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.grid(True)
plt.tight_layout()
plt.savefig('seasonal_trends_animal.png')
plt.close()


Classification Report on Future-Year Hotspot Prediction:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    544987

    accuracy                           1.00    544987
   macro avg       1.00      1.00      1.00    544987
weighted avg       1.00      1.00      1.00    544987



In [None]:
# --- Time series forecasting of animal complaint volume by season ---
monthly_totals = all_animal_complaints.groupby(['Year', 'Month']).size().reset_index(name='y')
monthly_totals['ds'] = pd.to_datetime(monthly_totals['Year'].astype(str) + '-' + monthly_totals['Month'].astype(str) + '-01')

model = Prophet(yearly_seasonality=True, seasonality_mode='multiplicative')
model.fit(monthly_totals[['ds', 'y']])
future = model.make_future_dataframe(periods=12, freq='M')
forecast = model.predict(future)

fig = model.plot(forecast)
plt.title('Forecast of Monthly Animal Complaint Volume')
plt.savefig('forecast_animal_complaints.png')
plt.close()

In [None]:
print("Train hotspots:", y_train.sum())
print("Test hotspots:", y_test.sum())


Train hotspots: 4
Test hotspots: 0
