In [1]:
import pandas as pd
import numpy as np
import requests
import json
import os
from dotenv import load_dotenv
import folium
from datetime import datetime
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns
from deap import base, creator, tools, algorithms
import random
from geopy.distance import geodesic
import warnings
warnings.filterwarnings('ignore')

load_dotenv()

MAPMYINDIA_API_KEY = os.getenv('MAPMYINDIA_API_KEY')
WEATHER_API_KEY = os.getenv('WEATHER_API_KEY')
GOOGLE_MAPS_API_KEY = os.getenv('GOOGLE_MAPS_API_KEY')

print("Environment loaded successfully!")


Environment loaded successfully!


In [None]:
df = pd.read_excel('../data/Starbucks_Mumbai_Dataset.xlsx')

print(f"Dataset shape: {df.shape}")
print("\nDataset columns:")
print(df.columns.tolist())
print("\nFirst 5 rows:")
df.head()


Dataset shape: (32, 13)

Dataset columns:
['Brand', 'Store Number', 'Store Name', 'Ownership Type', 'Street Address', 'City', 'State', 'Country', 'Pincode', 'Phone Number', 'Timezone', 'Longitude', 'Latitude']

First 5 rows:


Unnamed: 0,Brand,Store Number,Store Name,Ownership Type,Street Address,City,State,Country,Pincode,Phone Number,Timezone,Longitude,Latitude
0,Starbucks,22456-221251,Prabhadevi,Joint Venture,"Upper Ground Floor, Century Bhavan, Century Ba...",Mumbai,MH,IN,400030,,GMT+05:30 Asia/New_Delhi,72.82,19.01
1,Starbucks,19524-202205,Malad West - Infiniti Mall,Joint Venture,"Upper Ground Floor, Infiniti Mall, Link Road, ...",Mumbai,MH,IN,400064,,GMT+05:30 Asia/New_Delhi,72.83,19.18
2,Starbucks,21450-211905,Santacruz West,Joint Venture,"Grd Flr Red Chillies Entertainment Bldg, Jct o...",Mumbai,MH,IN,400054,,GMT+05:30 Asia/New_Delhi,72.83,19.08
3,Starbucks,22539-222210,Nariman Point,Joint Venture,"Ground Floor, Shree Nirmal Building, Behind Tr...",Mumbai,MH,IN,400021,,GMT+05:30 Asia/New_Delhi,72.82,18.93
4,Starbucks,24458-238127,Andheri West - Four Bunglows,Joint Venture,"Kokilaben Dhirubhai Ambani Hospital, Four Bung...",Mumbai,MH,IN,400053,,GMT+05:30 Asia/New_Delhi,72.82,19.13


In [4]:
df = df.dropna(subset=['Latitude', 'Longitude'])

df['store_id'] = df.index

df_clean = df[['store_id', 'Store Name', 'Latitude', 'Longitude', 
               'Street Address', 'City', 'State']].copy()

print(f"Cleaned dataset shape: {df_clean.shape}")
df_clean.head()


Cleaned dataset shape: (32, 7)


Unnamed: 0,store_id,Store Name,Latitude,Longitude,Street Address,City,State
0,0,Prabhadevi,19.01,72.82,"Upper Ground Floor, Century Bhavan, Century Ba...",Mumbai,MH
1,1,Malad West - Infiniti Mall,19.18,72.83,"Upper Ground Floor, Infiniti Mall, Link Road, ...",Mumbai,MH
2,2,Santacruz West,19.08,72.83,"Grd Flr Red Chillies Entertainment Bldg, Jct o...",Mumbai,MH
3,3,Nariman Point,18.93,72.82,"Ground Floor, Shree Nirmal Building, Behind Tr...",Mumbai,MH
4,4,Andheri West - Four Bunglows,19.13,72.82,"Kokilaben Dhirubhai Ambani Hospital, Four Bung...",Mumbai,MH


In [5]:
def mapmyindia_geocode(address):
    """
    Use MapMyIndia Geocoding API to get coordinates
    Reference: [[7]]
    """
    base_url = "https://apis.mapmyindia.com/advancedmaps/v1"
    endpoint = f"{base_url}/{MAPMYINDIA_API_KEY}/geocode"
    
    params = {
        'address': address,
        'region': 'IND'
    }
    
    try:
        response = requests.get(endpoint, params=params)
        if response.status_code == 200:
            data = response.json()
            if data.get('copResults'):
                result = data['copResults'][0]
                return {
                    'lat': float(result['latitude']),
                    'lng': float(result['longitude']),
                    'formatted_address': result.get('formattedAddress', '')
                }
    except Exception as e:
        print(f"Geocoding error: {e}")
    
    return None

def validate_coordinates(df):
    """Validate and update coordinates if necessary"""
    for idx, row in df.iterrows():
        if pd.isna(row['Latitude']) or pd.isna(row['Longitude']):
            address = f"{row['Street Address']}, {row['City']}, {row['State/Province']}"
            result = mapmyindia_geocode(address)
            if result:
                df.at[idx, 'Latitude'] = result['lat']
                df.at[idx, 'Longitude'] = result['lng']
                print(f"Updated coordinates for {row['Store Name']}")
    return df


In [6]:
def get_weather_data(lat, lon):
    """
    Get current weather data from weatherapi.com
    """
    base_url = "http://api.weatherapi.com/v1/current.json"
    
    params = {
        'key': WEATHER_API_KEY,
        'q': f"{lat},{lon}",
        'aqi': 'yes'
    }
    
    try:
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            data = response.json()
            weather_info = {
                'temperature': data['current']['temp_c'],
                'humidity': data['current']['humidity'],
                'wind_speed': data['current']['wind_kph'],
                'condition': data['current']['condition']['text'],
                'visibility': data['current']['vis_km'],
                'precipitation': data['current']['precip_mm']
            }
            return weather_info
    except Exception as e:
        print(f"Weather API error: {e}")
    
    return {
        'temperature': 25.0,
        'humidity': 70.0,
        'wind_speed': 10.0,
        'condition': 'Clear',
        'visibility': 10.0,
        'precipitation': 0.0
    }


In [None]:
def get_travel_time_distance(origin_lat, origin_lon, dest_lat, dest_lon, departure_time='now'):
    """
    Get travel time and distance using Google Distance Matrix API
    """
    base_url = "https://maps.googleapis.com/maps/api/distancematrix/json"
    
    params = {
        'origins': f"{origin_lat},{origin_lon}",
        'destinations': f"{dest_lat},{dest_lon}",
        'mode': 'driving',
        'departure_time': departure_time,
        'traffic_model': 'best_guess',
        'key': GOOGLE_MAPS_API_KEY
    }
    
    try:
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            data = response.json()
            if data['status'] == 'OK':
                element = data['rows'][0]['elements'][0]
                if element['status'] == 'OK':
                    return {
                        'distance': element['distance']['value'] / 1000, 
                        'duration': element['duration']['value'] / 60,    
                        'duration_in_traffic': element.get('duration_in_traffic', {}).get('value', element['duration']['value']) / 60
                    }
    except Exception as e:
        print(f"Distance Matrix API error: {e}")
    
    distance = geodesic((origin_lat, origin_lon), (dest_lat, dest_lon)).km
    return {
        'distance': distance,
        'duration': distance * 3,
        'duration_in_traffic': distance * 3.5
    }


In [8]:
def generate_training_data(df_locations, num_samples=1000):
    """
    Generate training data with various origin-destination pairs
    """
    training_data = []
    
    for _ in range(num_samples):
        origin_idx = np.random.randint(0, len(df_locations))
        dest_idx = np.random.randint(0, len(df_locations))
        
        if origin_idx == dest_idx:
            continue
        
        origin = df_locations.iloc[origin_idx]
        destination = df_locations.iloc[dest_idx]
        
    


In [1]:
pip install nbconvert[webpdf]

Collecting playwright (from nbconvert[webpdf])
  Downloading playwright-1.53.0-py3-none-win_amd64.whl.metadata (3.5 kB)
Collecting pyee<14,>=13 (from playwright->nbconvert[webpdf])
  Downloading pyee-13.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting greenlet<4.0.0,>=3.1.1 (from playwright->nbconvert[webpdf])
  Downloading greenlet-3.2.3-cp312-cp312-win_amd64.whl.metadata (4.2 kB)
Downloading playwright-1.53.0-py3-none-win_amd64.whl (35.4 MB)
   ---------------------------------------- 0.0/35.4 MB ? eta -:--:--
   ---------------------------------------- 0.3/35.4 MB ? eta -:--:--
    --------------------------------------- 0.8/35.4 MB 1.9 MB/s eta 0:00:19
   - -------------------------------------- 1.3/35.4 MB 2.2 MB/s eta 0:00:16
   -- ------------------------------------- 1.8/35.4 MB 2.3 MB/s eta 0:00:15
   -- ------------------------------------- 2.4/35.4 MB 2.4 MB/s eta 0:00:15
   -- ------------------------------------- 2.6/35.4 MB 2.2 MB/s eta 0:00:16
   --- -------------------

In [2]:
jupyter nbconvert --to webpdf --allow-chromium-download route_optimization.ipynb

SyntaxError: invalid syntax (3442288267.py, line 1)