<a href="https://colab.research.google.com/github/aravinddddddd/Weather-Data-Analyzer-/blob/main/%20Real-Time%20Data%20Integration%20%26%20Analytics%20Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import requests
import pandas as pd
import os
import json
from datetime import datetime
CONFIG = {
    "api_key": "5e1b250446765426815156d433a8081c",  # üîë Replace with your OpenWeather API key
    "cities": ["Bangalore", "Hyderabad", "Mumbai", "Delhi"]
}

API_KEY = CONFIG["api_key"]
CITIES = CONFIG["cities"]
BASE_URL = "https://api.openweathermap.org/data/2.5/weather"
DATA_FILE = "weather_data.csv"

def fetch_weather(city):
    """Fetch current weather data for a city from OpenWeather API"""
    params = {"q": city, "appid": API_KEY, "units": "metric"}
    try:
        response = requests.get(BASE_URL, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        return {
            "city": city,
            "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
            "temperature": data["main"]["temp"],
            "humidity": data["main"]["humidity"],
            "pressure": data["main"]["pressure"],
            "weather": data["weather"][0]["main"]
        }
    except requests.exceptions.RequestException as e:
        print(f"‚ùå Failed for {city}: {e}")
        return None

def validate_data(df):
    """Remove rows with missing values"""
    return df.dropna()

def remove_duplicates(df):
    """Keep only unique city + timestamp combinations"""
    return df.drop_duplicates(subset=["city", "timestamp"], keep="last")

def append_to_csv(new_data):
    """Append validated data to CSV file"""
    new_df = pd.DataFrame(new_data)
    new_df = validate_data(new_df)
    if os.path.exists(DATA_FILE):
        old_df = pd.read_csv(DATA_FILE)
        combined_df = pd.concat([old_df, new_df])
        combined_df = remove_duplicates(combined_df)
    else:
        combined_df = new_df
    combined_df.to_csv(DATA_FILE, index=False)
    print("‚úÖ Data appended successfully to weather_data.csv")

def generate_summary():
    """Generate daily summary file"""
    if not os.path.exists(DATA_FILE):
        print("‚ö† No data found to summarize.")
        return
    df = pd.read_csv(DATA_FILE)
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    df["date"] = df["timestamp"].dt.date

    summary = (
        df.groupby(["date", "city"])
        .agg({"temperature": "mean", "humidity": "mean", "pressure": "mean"})
        .reset_index()
    )
    summary.to_csv("daily_summary.csv", index=False)
    print("üìä Daily summary saved to daily_summary.csv")

def main():
    all_data = []
    for city in CITIES:
        print(f"Fetching weather for {city} ...")
        weather = fetch_weather(city)
        if weather:
            all_data.append(weather)
    if all_data:
        append_to_csv(all_data)
        generate_summary()
        print("üéâ Pipeline run complete!")
    else:
        print("‚ö† No new data fetched.")

if __name__ == "__main__":
    main()

Fetching weather for Bangalore ...
Fetching weather for Hyderabad ...


  "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),


Fetching weather for Mumbai ...
Fetching weather for Delhi ...
‚úÖ Data appended successfully to weather_data.csv
üìä Daily summary saved to daily_summary.csv
üéâ Pipeline run complete!
