In [8]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import time

# === Load master Parquet file ===
file_path = r"C:\Users\PESU-RF\Downloads\Onions\Onions\processed_data\master_parquet\master_dataset_distinct_continuous_daily.parquet"
df = pd.read_parquet(file_path)

# === Extract unique mandis ===
mandis = df['mandi'].dropna().unique()
print(f"Found {len(mandis)} unique mandis.")

# === Initialize geocoder ===
geolocator = Nominatim(user_agent="mandi_geocoder")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)  # 2s delay

# === Safe geocode function with retries ===
def safe_geocode(query, retries=3, delay=2):
    for attempt in range(retries):
        try:
            location = geolocator.geocode(query, timeout=10)
            if location:
                return location.latitude, location.longitude
            else:
                return None, None
        except Exception as e:
            print(f"Attempt {attempt+1} failed for {query}: {e}")
            time.sleep(delay)
    return None, None

# === Geocode all mandis ===
mandi_coords = []
for mandi in mandis:
    lat, lon = safe_geocode(f"{mandi}, India")
    print(f"{mandi}: {lat}, {lon}")
    mandi_coords.append({"mandi": mandi, "latitude": lat, "longitude": lon})

# === Save results ===
df_coords = pd.DataFrame(mandi_coords)
df_coords.to_csv(r"C:\Users\PESU-RF\Downloads\Onions\Onions\processed_data\lat_long\mandi_coordinates.csv", index=False)
df_coords.to_parquet(r"C:\Users\PESU-RF\Downloads\Onions\Onions\processed_data\lat_long\mandi_coordinates.parquet", index=False)

print("✅ Latitude and longitude for all mandis saved.")

Found 1629 unique mandis.
Naharlagun: 27.1308555, 93.709712
Howly: 26.4249628, 90.9712027
Gauripur: 26.0865358, 89.964662
Dibrugarh: 27.4844597, 94.9019447
P.O. Uparhali Guwahati: None, None
Dhing: 26.4449658, 92.5268228
Mapusa: 15.590853, 73.8102146
Ahmedabad(Chimanbhai Patal Market Vasana): None, None
Khambhat(Veg Yard Khambhat): None, None
Deesa(Deesa Veg Yard): None, None
Bhavnagar: 21.7718836, 72.1416449
Mahuva(Station Road): 21.0943917, 71.7568456
Dahod(Veg. Market): None, None
Jamnagar: 22.4732415, 70.0552102
Visavadar: 21.3417058, 70.7534299
Nadiyad(Piplag): None, None
Mehsana(Mehsana Veg): None, None
Morbi: 22.8003959, 70.886232
Gondal: 21.9575096, 70.8009896
Rajkot(Veg.Sub Yard): None, None
Surat: 21.1923647, 72.9551023
Padra: 22.2412214, 73.0855259
Vadodara(Sayajipura): None, None
Ambala City: 30.3780749, 76.7646449
Barara: 30.2423627, 77.0460519
Naraingarh: 30.445576, 77.1256318
Shahzadpur: 30.446413, 77.0334204
Bhiwani: 28.7931703, 76.1391283
Ch. Dadri: 28.5848609, 77.3601

In [1]:
%pip install geopy

Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Downloading geographiclib-2.1-py3-none-any.whl.metadata (1.6 kB)
Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
Downloading geographiclib-2.1-py3-none-any.whl (40 kB)
Installing collected packages: geographiclib, geopy

   -------------------- ------------------- 1/2 [geopy]
   -------------------- ------------------- 1/2 [geopy]
   -------------------- ------------------- 1/2 [geopy]
   ---------------------------------------- 2/2 [geopy]

Successfully installed geographiclib-2.1 geopy-2.4.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
