In [1]:
# Cell 1 — Imports & constants
import os
import json
import time
from datetime import datetime
import random

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import joblib
from sklearn.svm import SVC

import requests

import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.svm import SVC

# Where history is stored
HISTORY_PATH = "history.json"

# Model filename
MODEL_PATH = "weather_model.pkl"

# OpenWeatherMap API key
API_KEY = "9f9c0c8022cb0167aa2d200823ff82aa"
OWM_BASE_WEATHER = "https://api.openweathermap.org/data/2.5/weather"
OWM_ONECALL = "https://api.openweathermap.org/data/2.5/onecall"

# image size used for training/prediction 
IMG_SIZE = (200, 200)


In [15]:
# Cell 2 — History helpers (single JSON file containing both prediction and weather histories)
def _ensure_history_file():
    if not os.path.exists(HISTORY_PATH):
        empty = {"predictions": [], "weather_searches": []}
        with open(HISTORY_PATH, "w") as f:
            json.dump(empty, f, indent=2)

def load_history():
    """Return the whole history as a dict with keys 'predictions' and 'weather_searches'."""
    _ensure_history_file()
    with open(HISTORY_PATH, "r") as f:
        return json.load(f)

def save_history(history_dict):
    """Overwrite the history json file with history_dict."""
    with open(HISTORY_PATH, "w") as f:
        json.dump(history_dict, f, indent=2, default=str)

def add_prediction_history(image_path, prediction, confidence=None, probs=None, timestamp=None):
    """Append a prediction record to history."""
    history = load_history()
    if timestamp is None:
        timestamp = datetime.now().isoformat()
    record = {
        "id": int(time.time() * 1000) + random.randint(0, 999),
        "type": "prediction",
        "timestamp": timestamp,
        "image_path": image_path,
        "prediction": str(prediction),
        "confidence": float(confidence) if confidence is not None else None,
        "probs": {str(k): float(v) for k, v in (probs.items() if isinstance(probs, dict) else (zip([], [])))}
    }
    # if probs is an iterable of labels/values, convert
    if isinstance(probs, (list, tuple, np.ndarray)):
        labels = list(clf.classes_) if 'clf' in globals() else None
        if labels is not None:
            record["probs"] = {str(l): float(p) for l, p in zip(labels, probs)}
        else:
            record["probs"] = {str(i): float(p) for i, p in enumerate(probs)}
    history["predictions"].append(record)
    save_history(history)
    return record

def add_weather_history(city, weather_info, timestamp=None):
    """Append a weather search record to history."""
    history = load_history()
    if timestamp is None:
        timestamp = datetime.now().isoformat()
    record = {
        "id": int(time.time() * 1000) + random.randint(0, 999),
        "type": "weather_search",
        "timestamp": timestamp,
        "city": city,
        "weather": weather_info  # expect a dict with detailed fields
    }
    history["weather_searches"].append(record)
    save_history(history)
    return record

def delete_history_entry(entry_id):
    """Delete a single history entry by its id."""
    history = load_history()
    before = (len(history["predictions"]) + len(history["weather_searches"]))
    history["predictions"] = [r for r in history["predictions"] if r.get("id") != entry_id]
    history["weather_searches"] = [r for r in history["weather_searches"] if r.get("id") != entry_id]
    save_history(history)
    after = (len(history["predictions"]) + len(history["weather_searches"]))
    return before - after  # number of removed entries

def clear_history():
    save_history({"predictions": [], "weather_searches": []})


In [None]:
#Training cell (SVM with probability=True).

IMG_SIZE = (200, 200)      # resize all images to same size
MODEL_PATH = "weather_model.pkl"

# Function that loads an image and flattens it into a 1D feature vector
def load_image_flat(path):
    img = Image.open(path).convert("RGB")  # convert to RGB for consistency
    img = img.resize(IMG_SIZE)             # same size for every image
    return np.array(img).flatten()         # flatten 200x200x3 → 120k features

DATA_DIR = "images"
all_paths = []    # stores file paths of all images
all_labels = []   # stores category name for each image

# Collect all image paths + labels 
for label in os.listdir(DATA_DIR):
    folder = os.path.join(DATA_DIR, label)
    if not os.path.isdir(folder):
        continue     # skip if not a folder
    
    for fname in os.listdir(folder):
        if fname.lower().endswith((".jpg", ".jpeg", ".png")):
            all_paths.append(os.path.join(folder, fname))
            all_labels.append(label)

print(f"Found {len(all_paths)} images across {len(set(all_labels))} categories: {sorted(set(all_labels))}")

# Convert all images into feature vectors 
X = [load_image_flat(p) for p in all_paths]   # list of 1D arrays
y = all_labels                                 # true labels

# Split into 80% training, 20% testing 
X_train, X_test, y_train, y_test, train_paths, test_paths = train_test_split(
    X, y, all_paths,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print(f"Training on {len(X_train)} images, testing on {len(X_test)} images")

#Train the SVM model 
clf = SVC(kernel="rbf", probability=True)
clf.fit(X_train, y_train)

#Quick evaluation 
y_pred = clf.predict(X_test)
print("\nClassification report (test split):")
print(classification_report(y_test, y_pred))

#Save trained model
joblib.dump(clf, MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")


Found 912 images across 3 categories: ['cloudy', 'day', 'night']
Training on 729 images, testing on 183 images

Classification report (test split):
              precision    recall  f1-score   support

      cloudy       0.89      0.85      0.87        60
         day       0.93      0.96      0.94       122
       night       0.00      0.00      0.00         1

    accuracy                           0.92       183
   macro avg       0.61      0.60      0.61       183
weighted avg       0.91      0.92      0.91       183

Model saved to weather_model.pkl


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
#Prediction function 
#load model if not already loaded
if 'clf' not in globals():
    if os.path.exists(MODEL_PATH):
        clf = joblib.load(MODEL_PATH)
    else:
        raise FileNotFoundError(f"Model file '{MODEL_PATH}' not found. Run the training cell first.")

def predict_weather_image(img_path, show_image=True):
    # safety: ensure a file (not folder)
    if os.path.isdir(img_path):
        raise ValueError("Provided path is a directory. Please provide a full image file path (e.g. images/day/img1.jpg).")
    # load and show once
    img = Image.open(img_path).convert("RGB")
    if show_image:
        plt.figure(figsize=(4,4))
        plt.imshow(img)
        plt.axis("off")
        plt.title("Uploaded Image")
        plt.show()
    # preprocess
    arr = np.array(img.resize(IMG_SIZE)).flatten()
    # predict
    pred = clf.predict([arr])[0]
    pred = str(pred)  # convert numpy type to python str
    result = {"prediction": pred}
    # probabilities
    if hasattr(clf, "predict_proba"):
        probs = clf.predict_proba([arr])[0]
        labels = [str(l) for l in clf.classes_]
        # build dict label->prob
        probs_dict = {label: float(p) for label, p in zip(labels, probs)}
        # overall confidence = highest prob
        confidence = float(max(probs)) * 100.0
        result.update({"confidence": confidence, "probs": probs_dict})
        # print clean one-line summary + confidence
        summary = f"{pred} ({confidence:.1f}% confident)"
        print(summary)
    else:
        result.update({"confidence": None, "probs": None})
        print(pred)
    # save to history
    try:
        add_prediction_history(img_path, result["prediction"], confidence=result.get("confidence"), probs=result.get("probs"))
    except Exception as e:
        print("Warning: could not save prediction history:", e)
    return result


In [None]:
#Weather API function 
def get_weather_for_city(city):
    city = city.strip()
    params = {"q": city, "appid": API_KEY, "units": "metric"}
    r = requests.get(OWM_BASE_WEATHER, params=params, timeout=10)
    data = r.json()
    if data.get("cod") != 200:
        # not found or error
        return None, data.get("message", "unknown error")
    # extract main info
    weather_desc = data["weather"][0]["description"].capitalize()
    temp = data["main"]["temp"]
    feels_like = data["main"].get("feels_like")
    humidity = data["main"].get("humidity")
    wind_speed = data.get("wind", {}).get("speed", 0.0)  # m/s
    wind_deg = data.get("wind", {}).get("deg", None)
    rain_1h = data.get("rain", {}).get("1h", 0.0)  # mm in last 1h if present

    # get coordinates and call onecall for hourly percentage of precipitation 
    coord = data.get("coord", {})
    lat = coord.get("lat"); lon = coord.get("lon")
    chance_of_rain_pct = None
    pop_max_pct = None
    hourly_summary = None
    if lat is not None and lon is not None:
        params2 = {"lat": lat, "lon": lon, "appid": API_KEY, "units": "metric", "exclude": "minutely,daily,alerts"}
        r2 = requests.get(OWM_ONECALL, params=params2, timeout=10)
        one = r2.json()
        hourly = one.get("hourly", [])
        # compute chance: average and max pop across next 12 hours if available
        pops = [h.get("pop", 0.0) for h in hourly[:12]]  # 0..1
        if pops:
            avg_pop = float(sum(pops) / len(pops))
            chance_of_rain_pct = avg_pop * 100.0
            pop_max_pct = float(max(pops) * 100.0)
            hourly_summary = {"next_12h_avg_pop_pct": chance_of_rain_pct, "next_12h_max_pop_pct": pop_max_pct}
    # wind strength textual
    wind_ms = wind_speed
    if wind_ms < 2:
        wind_desc = "calm"
    elif wind_ms < 6:
        wind_desc = "light breeze"
    elif wind_ms < 12:
        wind_desc = "moderate wind"
    elif wind_ms < 20:
        wind_desc = "strong wind"
    else:
        wind_desc = "very strong wind"

    info = {
        "city": city.title(),
        "weather": weather_desc,
        "temperature_c": temp,
        "feels_like_c": feels_like,
        "humidity_pct": humidity,
        "wind_speed_m_s": wind_ms,
        "wind_desc": wind_desc,
        "wind_deg": wind_deg,
        "rain_1h_mm": rain_1h,
        "hourly_summary": hourly_summary,
        "source_timestamp": datetime.now().isoformat()
    }

    # save to history
    try:
        add_weather_history(city.title(), info)
    except Exception as e:
        print("Warning: could not save weather history:", e)

    return info, None


In [None]:
#cell 6
def compare_cities():
    # Ask how many cities
    n = int(input("How many cities do you want to compare? ").strip())

    cities = []
    for i in range(n):
        city = input(f"Enter city #{i+1}: ").strip()
        
        # API request
        params = {"q": city, "appid": API_KEY, "units": "metric"}
        r = requests.get(OWM_BASE_WEATHER, params=params).json()

        if r.get("cod") != 200:
            print(f"❌ '{city}' not found. Skipping.")
            continue

        # Extract info
        temp = r["main"]["temp"]
        wind = r["wind"]["speed"]
        humidity = r["main"]["humidity"]

        # Rain may be missing → put N/A
        rain = r.get("rain", {}).get("1h", "N/A")

        cities.append({
            "City": city.title(),
            "Temp (°C)": temp,
            "Wind (m/s)": wind,
            "Humidity (%)": humidity,
            "Rain (mm/hr)": rain
        })

    if len(cities) == 0:
        print("No valid cities were entered.")
        return

    # Convert to DataFrame
    df = pd.DataFrame(cities)
    print("\n===== City Weather Table =====")
    print(df)

    x = range(len(df))

    plt.figure(figsize=(10, 6))
    width = 0.25

    plt.bar([i - width for i in x], df["Temp (°C)"], width=width, color="red", label="Temperature (°C)")
    plt.bar(x, df["Wind (m/s)"], width=width, color="blue", label="Wind Speed (m/s)")
    plt.bar([i + width for i in x], df["Humidity (%)"], width=width, color="green", label="Humidity (%)")

    plt.xticks(x, df["City"])
    plt.title("City Weather Comparison")
    plt.ylabel("Values")
    plt.legend()
    plt.grid(axis="y", linestyle="--", alpha=0.5)
    plt.show()

    print("\n===== Summary =====")

    # Temperature
    hottest = df.loc[df["Temp (°C)"].idxmax()]
    coldest = df.loc[df["Temp (°C)"].idxmin()]

    print(f"Hottest: {hottest['City']} ({hottest['Temp (°C)']}°C)")
    print(f"Coldest: {coldest['City']} ({coldest['Temp (°C)']}°C)")

    # Wind
    windiest = df.loc[df["Wind (m/s)"].idxmax()]
    least_windy = df.loc[df["Wind (m/s)"].idxmin()]

    print(f"Windiest: {windiest['City']} ({windiest['Wind (m/s)']} m/s)")
    print(f"Least windy: {least_windy['City']} ({least_windy['Wind (m/s)']} m/s)")

    # Rain (check for valid numeric rain values)
    valid_rain = df[df["Rain (mm/hr)"] != "N/A"]

    if len(valid_rain) == 0:
        print("Rain: N/A for all cities")
    else:
        most_rain = valid_rain.loc[valid_rain["Rain (mm/hr)"].idxmax()]
        least_rain = valid_rain.loc[valid_rain["Rain (mm/hr)"].idxmin()]

        print(f"Most rainy: {most_rain['City']} ({most_rain['Rain (mm/hr)']} mm/hr)")
        print(f"Least rainy: {least_rain['City']} ({least_rain['Rain (mm/hr)']} mm/hr)")        

    print("================================\n")



In [None]:
# small utilities to inspect and manage history
def show_history_summary(limit=20):
    h = load_history()
    print("=== PREDICTION HISTORY (most recent first) ===")
    for r in sorted(h["predictions"], key=lambda x: x["timestamp"], reverse=True)[:limit]:
        print(f"{r['id']} | {r['timestamp']} | {r['image_path']} -> {r['prediction']} ({r.get('confidence')})")
    print("\n=== WEATHER SEARCH HISTORY (most recent first) ===")
    for r in sorted(h["weather_searches"], key=lambda x: x["timestamp"], reverse=True)[:limit]:
        city = r.get("city")
        w = r.get("weather", {}).get("weather")
        t = r.get("weather", {}).get("temperature_c")
        print(f"{r['id']} | {r['timestamp']} | {city} -> {w} {t}°C")
    print("=== End history ===")

def delete_history_by_id():
    try:
        entry_id = int(input("Enter entry id to delete: ").strip())
    except:
        print("Invalid id")
        return
    removed = delete_history_entry(entry_id)
    if removed:
        print(f"Deleted {removed} entries with id {entry_id}")
    else:
        print("No entry with that id found.")

def clear_all_history_prompt():
    confirm = input("Really clear all history? Type YES to confirm: ").strip()
    if confirm == "YES":
        clear_history()
        print("History cleared.")
    else:
        print("Cancelled.")
# wrapper functions
def predict_from_user():
    img_path = input("Enter image file path: ").strip()
    result = predict_weather_image(img_path)
    return result

def weather_search_from_user():
    city = input("Enter city name: ").strip()
    info, err = get_weather_for_city(city)
    if err:
        print(f"❌ Error: {err}")
    else:
        # print a neat summary with chance of rain + wind
        print("\n===== Weather Summary =====")
        print(f"{info['city']}: {info['weather']}, {info['temperature_c']}°C (feels like {info['feels_like_c']}°C)")
        if info["hourly_summary"]:
            print(f"Chance of precipitation (next 12h) — avg: {info['hourly_summary']['next_12h_avg_pop_pct']:.1f}%  max: {info['hourly_summary']['next_12h_max_pop_pct']:.1f}%")
        if info["rain_1h_mm"]:
            print(f"Recent rain: {info['rain_1h_mm']} mm in last 1h")
        print(f"Wind: {info['wind_speed_m_s']} m/s — {info['wind_desc']}")
        print("===========================\n")
    return info


In [None]:
#Menu
def menu():
    print("\n==== WEATHER MENU ====")
    print("1. Predict image ")
    print("2. Get weather for city ")
    print("3. Compare city's weather")
    print("4. Show history summary")
    print("5. Delete history entry by id")
    print("6. Clear all history")
    print("7. Exit")
    while True:
        choice = input("Choice (1-7): ").strip()
        if choice == "1":
            predict_from_user()
        elif choice == "2":
            weather_search_from_user()
        elif choice == "3":
            compare_cities()
        elif choice == "4":
            show_history_summary()
        elif choice == "5":
            delete_history_by_id()
        elif choice == "6":
            clear_all_history_prompt()
        elif choice == "7":
            print("Exiting menu.")
            break
        else:
            print("Invalid choice — try again.")

# To run the menu, call: menu()


In [9]:
# Cell 9 – cleaner helper to show bar chart of prediction probabilities
def plot_probs(probs_dict):
    labels = list(probs_dict.keys())
    raw_values = list(probs_dict.values())

    # Convert fractions (0.0–1.0) to percentages if needed
    values = []
    for v in raw_values:
        try:
            fv = float(v)
        except:
            fv = 0.0

        if fv <= 1.0:     # assume fraction
            fv = fv * 100
        values.append(fv)

    # Plot
    plt.figure(figsize=(6, 3))
    plt.bar(labels, values)
    plt.ylabel("Probability (%)")
    plt.title("Class Probabilities")
    plt.show()


In [22]:
menu()


==== WEATHER MENU ====
1. Predict image 
2. Get weather for city 
3. Compare city's weather
4. Show history summary
5. Delete history entry by id
6. Clear all history
7. Exit
Exiting menu.
