In [None]:
import os
import time
import pandas as pd
import datetime as dt
from tqdm import tqdm
import openmeteo_requests
import requests_cache
from retry_requests import retry

os.makedirs('output', exist_ok=True)
os.makedirs('output/logs', exist_ok=True)

weather_code_mapping = {
    0: "Clear sky", 1: "Mainly clear", 2: "Partly cloudy", 3: "Cloudy",
    45: "Fog", 48: "Depositing rime fog", 51: "Light drizzle", 53: "Moderate drizzle", 55: "Dense drizzle",
    56: "Light freezing drizzle", 57: "Dense freezing drizzle", 61: "Light rain", 63: "Moderate rain",
    65: "Heavy rain", 66: "Light freezing rain", 67: "Heavy freezing rain", 71: "Light snow",
    73: "Moderate snow", 75: "Heavy snow", 77: "Snow grains", 80: "Showers of rain",
    81: "Heavy showers of rain", 82: "Violent showers of rain", 85: "Showers of snow",
    86: "Heavy showers of snow", 95: "Thunderstorms", 96: "Thunderstorms with light hail",
    99: "Thunderstorms with heavy hail"
}

def get_weather_data_from_df(df):
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)
    weather_data = []

    start_date = dt.datetime(2022, 12, 25)
    end_date = dt.datetime(2025, 6, 17)
    url = "https://archive-api.open-meteo.com/v1/archive"

    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = end_date.strftime('%Y-%m-%d')

    print(f"Processing {len(df)} stores...")

    for _, row in tqdm(df.iterrows(), total=len(df)):
        latitude = row["Latitude"]
        longitude = row["Longitude"]
        location_name = row["Address"]
        store_no = row["Store_No"]

        params = {
            "latitude": [latitude],
            "longitude": [longitude],
            "start_date": start_date_str,
            "end_date": end_date_str,
            "hourly": ["temperature_2m", "weather_code"],
            "temperature_unit": "celsius",
            "timezone": "Asia/Singapore"
        }

        try:
            responses = openmeteo.weather_api(url, params=params)
            response = responses[0]
            hourly = response.Hourly()

            timestamps = pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            ).tz_convert("Asia/Singapore")

            temperature_2m = hourly.Variables(0).ValuesAsNumpy()
            weather_codes = hourly.Variables(1).ValuesAsNumpy()

            for i in range(len(timestamps)):
                ts = timestamps[i]
                temp = temperature_2m[i]
                code = weather_codes[i]
                desc = weather_code_mapping.get(code, "Unknown weather code")

                weather_data.append([
                    ts, code, desc, temp,
                    latitude, longitude, location_name,
                    store_no, row["Operating_Hours"]
                ])

        except Exception as e:
            print(f"[ERROR] Store {store_no} ({latitude}, {longitude}) failed: {e}")
            continue

    return pd.DataFrame(weather_data, columns=[
        "Timestamp", "Weather Code", "Weather Description", "Temperature (°C)",
        "Latitude", "Longitude", "Location Name", "Store No", "Operating Hours"
    ])

# Load master store list
fmstores = pd.read_csv(r" ... csv")
fmstores['Address'] = fmstores['Address'] + ' ' + fmstores['Address_2']

# Fetch weather data
fmweather = get_weather_data_from_df(fmstores)

# Save result
output_path = r" ... csv"
fmweather.to_csv(output_path, index=False)
print(f"\n✅ Weather data saved to: {output_path}")
print(f"Total rows saved: {len(fmweather):,}")