In [1]:
import requests
import pandas as pd
import os
from datetime import datetime
import time

API_KEY = "470d8152fada130825799f3d4f4a54ca"
CITIES = ["Delhi", "Mumbai", "Bhopal", "Indore", "Bangalore"]
OUTPUT_PATH = "../data/raw/weather_data.csv"

def risk_level(temp_c, wind_mps):
    if temp_c > 40 or wind_mps > 14:  # 14 mps ≈ 50 km/h
        return "High"
    elif temp_c > 35 or wind_mps > 8:
        return "Moderate"
    else:
        return "Low"

def fetch_and_save():
    all_data = []
    
    for city in CITIES:
        url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={API_KEY}"
        data = requests.get(url).json()
        df = pd.json_normalize(data)
        
        # Clean data
        df_clean = df[[
            "name", "dt", "coord.lat", "coord.lon", "main.temp",
            "main.humidity", "wind.speed", "weather"
        ]].copy()
        
        df_clean["temp_celsius"] = (df_clean["main.temp"] - 273.15).round(1)
        df_clean["date_time"] = pd.to_datetime(df_clean["dt"], unit='s')
        
        df_clean["weather_main"] = df_clean["weather"].apply(lambda x: x[0]["main"])
        df_clean["description"] = df_clean["weather"].apply(lambda x: x[0]["description"])
        
        # ✅ Correct way of applying risk_level
        df_clean["risk_level"] = df_clean.apply(
            lambda row: risk_level(row["temp_celsius"], row["wind.speed"]), axis=1
        )
        
        df_clean.drop(columns=["main.temp", "weather", "dt"], inplace=True)
        df_clean.rename(columns={
            "name": "city",
            "coord.lat": "lat",
            "coord.lon": "lon",
            "main.humidity": "humidity",
            "wind.speed": "wind_mps",
            "weather_main": "weather"
        }, inplace=True)

        all_data.append(df_clean)
    
    final_df = pd.concat(all_data, ignore_index=True)
    
    OUTPUT_PATH="weather_data.csv"
    if os.path.exists(OUTPUT_PATH):
        final_df.to_csv(OUTPUT_PATH, mode='a', header=False, index=False)
    else:
        final_df.to_csv(OUTPUT_PATH, index=False)
    
    print(f"✅ Data added at {datetime.now()}")


In [None]:
while True:
    fetch_and_save()
    time.sleep(1200)  # sleep for 10 min (600 seconds)


✅ Data added at 2025-11-13 12:07:00.421375


In [None]:
# def risk_level(temp_c, wind_mps):
#     if temp_c > 40 or wind_mps > 14:  # 14 mps ≈ 50 km/h
#         return "High"
#     elif temp_c > 35 or wind_mps > 8:
#         return "Moderate"
#     else:
#         return "Low"