# **🚖 Taxi Fare Prediction App in Colab (Streamlit + ngrok)**

In [24]:
# 🚨 STEP 1: Install dependencies
!pip install streamlit ngrok scikit-learn==1.6.1 xgboost==3.0.3 pandas==2.2.2 cloudpickle pyngrok -q

In [11]:
# 🚨 STEP 2: Upload your saved model (taxi_fare_predictor.pkl)
from google.colab import files
print("📤 Upload your 'taxi_fare_predictor.pkl' model file")
uploaded = files.upload()

📤 Upload your 'taxi_fare_predictor.pkl' model file


In [25]:
# 🚨 STEP 3: Create Streamlit app script
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import cloudpickle
import datetime

# Load model
def load_model():
    with open("taxi_fare_predictor.pkl", "rb") as f:
        return cloudpickle.load(f)

model = load_model()

st.set_page_config(page_title="Taxi Fare Predictor", page_icon="🚖")
st.title("🚖 NYC Taxi Fare Estimator")

# Input UI
pickup_date = st.date_input("Pickup Date", datetime.date.today())
pickup_time = st.time_input("Pickup Time", datetime.time(12, 0))
pickup_lat = st.number_input("Pickup Latitude", value=40.768)
pickup_lon = st.number_input("Pickup Longitude", value=-73.982)
dropoff_lat = st.number_input("Dropoff Latitude", value=40.730)
dropoff_lon = st.number_input("Dropoff Longitude", value=-73.980)
passenger_count = st.slider("Passengers", 1, 6, 1)

if st.button("Predict Fare"):
    # Feature engineering
    pickup_datetime = datetime.datetime.combine(pickup_date, pickup_time)
    pickup_hour = pickup_datetime.hour
    pickup_day_of_week = pickup_datetime.weekday()
    is_weekend = 1 if pickup_day_of_week >= 5 else 0
    is_night = 1 if pickup_hour <= 6 or pickup_hour >= 22 else 0

    def haversine(lon1, lat1, lon2, lat2):
        lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
        dlon, dlat = lon2 - lon1, lat2 - lat1
        a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
        return 6371 * 2 * np.arcsin(np.sqrt(a)) * 0.621371  # miles

    distance = haversine(pickup_lon, pickup_lat, dropoff_lon, dropoff_lat)
    duration = (distance / 15) * 60 if distance > 0 else 0

    input_df = pd.DataFrame([{
        "passenger_count": passenger_count,
        "pickup_longitude": pickup_lon,
        "pickup_latitude": pickup_lat,
        "dropoff_longitude": dropoff_lon,
        "dropoff_latitude": dropoff_lat,
        "RatecodeID": 1,
        "payment_type": 1,
        "pickup_hour": pickup_hour,
        "pickup_day_of_week": pickup_day_of_week,
        "is_weekend": is_weekend,
        "is_night": is_night,
        "trip_distance_log": np.log1p(distance),
        "trip_duration_log": np.log1p(duration)
    }])

    prediction = model.predict(input_df)[0]
    st.success(f"Estimated Fare: ${prediction:.2f}")


Overwriting app.py


In [29]:
# 🚨 STEP 4: Run Streamlit app via ngrok
from pyngrok import ngrok
import time
from google.colab import userdata

# Kill previous tunnels
ngrok.kill()

# Get the authtoken from Colab secrets
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')

# Configure pyngrok with the authtoken
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Start ngrok tunnel
# Change: Pass the port as the address argument
public_url = ngrok.connect(8501)
print("🚀 Your app is live at:", public_url)

# Run streamlit
!streamlit run app.py &>/dev/null &

# Wait a few seconds for it to start
time.sleep(5)

🚀 Your app is live at: NgrokTunnel: "https://11c5c2318269.ngrok-free.app" -> "http://localhost:8501"
