# 🤖 Notebook: Machine Learning Modeling

This notebook trains machine learning models to predict ride demand for Capital Bikeshare stations.

## ✅ Goals:
- Load processed dataset with distance-based features
- Define target and features
- Train/test split
- Train machine learning models
- Evaluate model performance
- Analyze feature importance


In [2]:
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import BallTree
from scipy.spatial import cKDTree

# 📦 Standard Library
import sys
from pathlib import Path
import json
from collections import Counter

# 📊 Data Analysis & Visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 🌍 Geospatial Libraries
import geopandas as gpd
import geopy
from shapely.geometry import Point, shape
from shapely.ops import nearest_points
from shapely.wkt import loads 
from geopy import distance

# 🗺️ Folium Mapping
import folium

# 🛠️ Project-Specific Modules
sys.path.append(str(Path().resolve().parent / "src"))

from paths import  INTERIM_DIR, PROCESSED_DIR
# from helpers_folium import load_geojson_as_gdf, load_bikeshare_data, create_centered_map

## Prepare the data

In [3]:
prince_george_df = pd.read_parquet(PROCESSED_DIR / "prince_george.parquet")