In [None]:
import pandas as pd
import openmeteo_requests
import requests_cache
from retry_requests import retry
import time
import os

# 1. Setup API client
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

def fetch_historical_weather(lat, lon):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": lat, "longitude": lon,
        "start_date": "2018-01-01", "end_date": "2024-12-31",
        "daily": ["temperature_2m_mean", "relative_humidity_2m_mean", "precipitation_sum"],
        "timezone": "auto"
    }
    try:
        # Added a timeout of 60 seconds to prevent the "7-minute hang"
        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]
        daily = response.Daily()
        return pd.DataFrame({
            "date": pd.date_range(
                start=pd.to_datetime(daily.Time(), unit="s"),
                periods=len(daily.Variables(0).ValuesAsNumpy()),
                freq='D'
            ),
            "temp": daily.Variables(0).ValuesAsNumpy(),
            "humidity": daily.Variables(1).ValuesAsNumpy(),
            "rain": daily.Variables(2).ValuesAsNumpy(),
            "latitude": lat,
            "longitude": lon
        })
    except Exception as e:
        if "limit exceeded" in str(e).lower() or "timeout" in str(e).lower():
            print(f"Connection issue/Rate limit. Waiting 60s and retrying...")
            time.sleep(60)
            return fetch_historical_weather(lat, lon)
        print(f"Skipping {lat}, {lon} due to error: {e}")
        return None

# 2. Load Data
csv_path = r"C:\Users\PESU-RF\Downloads\Onions\Onions\processed_data\lat_long\mandi_coordinates.csv"
df_locations = pd.read_csv(csv_path).dropna(subset=['latitude', 'longitude'])
output_filename = "historical_weather_2018_2024.parquet"

# 3. Execution Loop with Checkpointing
for index, row in df_locations.iterrows():
    print(f"[{index + 1}/{len(df_locations)}] Fetching: {row['latitude']}, {row['longitude']}")
    
    # Check if this location is already in our saved file (optional resume logic)
    # For now, we will just append each new location.
    
    df_result = fetch_historical_weather(row['latitude'], row['longitude'])
    
    if df_result is not None:
        # SAVE IMMEDIATELY: This keeps data safe even if the script hangs later
        if not os.path.exists(output_filename):
            df_result.to_parquet(output_filename, engine='fastparquet', index=False)
        else:
            df_result.to_parquet(output_filename, engine='fastparquet', append=True, index=False)
    
    time.sleep(1.5) # Slightly faster but still safe

print(f"Success! Process complete. Data is in {output_filename}")

[1/1113] Fetching: 27.1308555, 93.709712
[2/1113] Fetching: 26.4249628, 90.9712027
[3/1113] Fetching: 26.0865358, 89.964662
[4/1113] Fetching: 27.4844597, 94.9019447
[6/1113] Fetching: 26.4449658, 92.5268228
[7/1113] Fetching: 15.590853, 73.8102146
[11/1113] Fetching: 21.7718836, 72.1416449
[12/1113] Fetching: 21.0943917, 71.7568456
[14/1113] Fetching: 22.4732415, 70.0552102
[15/1113] Fetching: 21.3417058, 70.7534299
[18/1113] Fetching: 22.8003959, 70.886232
[19/1113] Fetching: 21.9575096, 70.8009896
[21/1113] Fetching: 21.1923647, 72.9551023
[22/1113] Fetching: 22.2412214, 73.0855259
[24/1113] Fetching: 30.3780749, 76.7646449
[25/1113] Fetching: 30.2423627, 77.0460519
[26/1113] Fetching: 30.445576, 77.1256318
[27/1113] Fetching: 30.446413, 77.0334204
[28/1113] Fetching: 28.7931703, 76.1391283
[29/1113] Fetching: 28.5848609, 77.3601866
[30/1113] Fetching: 28.4031478, 77.3105561
[31/1113] Fetching: 28.4474356, 76.8262064
[32/1113] Fetching: 28.4646148, 77.0299194
[33/1113] Fetching: 29.

In [2]:
%pip install openmeteo_requests requests_cache retry_requests fastparquet

Collecting fastparquet
  Downloading fastparquet-2025.12.0-cp313-cp313-win_amd64.whl.metadata (4.6 kB)
Collecting cramjam>=2.3 (from fastparquet)
  Downloading cramjam-2.11.0-cp313-cp313-win_amd64.whl.metadata (681 bytes)
Collecting fsspec (from fastparquet)
  Downloading fsspec-2025.12.0-py3-none-any.whl.metadata (10 kB)
Downloading fastparquet-2025.12.0-cp313-cp313-win_amd64.whl (667 kB)
   ---------------------------------------- 0.0/667.4 kB ? eta -:--:--
   --------------------------------------- 667.4/667.4 kB 20.9 MB/s eta 0:00:00
Downloading cramjam-2.11.0-cp313-cp313-win_amd64.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   ---------------------------------------- 1.7/1.7 MB 28.4 MB/s eta 0:00:00
Downloading fsspec-2025.12.0-py3-none-any.whl (201 kB)
Installing collected packages: fsspec, cramjam, fastparquet

   ---------------------------------------- 0/3 [fsspec]
   ---------------------------------------- 0/3 [fsspec]
   ---------------


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
