Install Libraries

In [5]:
!pip install -q transformers faiss-cpu sentence-transformers requests gradio google-genai

Imports

In [6]:
import requests
import os
import sys
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import gradio as gr
from datetime import datetime, timedelta
from google import genai

Keys and Configurations

In [7]:
WEATHER_API_KEY = "c29a1c1db2f59aee082e6dab1c16debc"
GEMINI_API_KEY = "AIzaSyCmtFUG1DN4HVQm1asEMmOGoN3UIB8YopU"
GOOGLE_API_KEY = "AIzaSyDHrL-BbHzWyB5bLxoKOe1k8jhuGo_BLI4"
WEATHER_BASE_URL = "https://api.openweathermap.org/data/2.5/forecast"
GOOGLE_GEOCODE_URL = "https://maps.googleapis.com/maps/api/geocode/json"
EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2'
TOP_K_CONTEXT = 3
FORECAST_DAYS = 5

Defining Class and Objects

In [None]:
class WeatherRAGAssistant:
    def __init__(self, weather_key: str, gemini_key: str):
        self.embed_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
        self.gemini_client = genai.Client(api_key=gemini_key)
        self.weather_key = weather_key
        self.index = None
        self.texts = []
        self.api_response = None

    def fetch_and_index_weather(self, lat: float, lon: float):
        """Fetch weather forecast using coordinates."""
        params = {"lat": lat, "lon": lon, "appid": self.weather_key, "units": "metric"}
        response = requests.get(WEATHER_BASE_URL, params=params, timeout=10)
        response.raise_for_status()
        api_response = response.json()
        self.api_response = api_response
        self.texts = self._format_weather_data(api_response)
        self._build_faiss_index(self.texts)
        return api_response, self.texts

    def _format_weather_data(self, api_response: dict) -> list[str]:
        daily_data = {}
        timezone_offset = api_response['city'].get('timezone', 0)

        for three_hour_forecast in api_response.get('list', []):
            timestamp = three_hour_forecast['dt']
            utc_datetime = datetime.utcfromtimestamp(timestamp)
            local_datetime = utc_datetime + timedelta(seconds=timezone_offset)
            date_str = local_datetime.strftime('%Y-%m-%d')

            temp_max = three_hour_forecast['main']['temp_max']
            temp_min = three_hour_forecast['main']['temp_min']
            description = three_hour_forecast['weather'][0]['description']
            rain = three_hour_forecast.get('rain', {}).get('3h', 0)

            if date_str not in daily_data:
                daily_data[date_str] = {
                    'temp_max': temp_max, 'temp_min': temp_min,
                    'description_list': set(), 'rain_total': 0.0
                }

            daily_data[date_str]['temp_max'] = max(daily_data[date_str]['temp_max'], temp_max)
            daily_data[date_str]['temp_min'] = min(daily_data[date_str]['temp_min'], temp_min)
            daily_data[date_str]['description_list'].add(description)
            daily_data[date_str]['rain_total'] += rain

        texts = []
        for date, data in daily_data.items():
            descriptions = ", ".join(sorted(list(data['description_list'])))
            rain_total_rounded = round(data['rain_total'], 2)
            texts.append(
                f"Date: {date}, Max Temp: {round(data['temp_max'], 1)}°C, "
                f"Min Temp: {round(data['temp_min'], 1)}°C, "
                f"Total Rain: {rain_total_rounded}mm, Conditions: {descriptions}"
            )
        texts.sort()
        return texts

    def _build_faiss_index(self, texts: list[str]):
        embeddings = self.embed_model.encode(texts, convert_to_numpy=True)
        embeddings = np.ascontiguousarray(embeddings.astype('float32'))
        d = embeddings.shape[1]
        self.index = faiss.IndexFlatL2(d)
        self.index.add(embeddings)

    def search_index(self, query: str) -> list[str]:
        if self.index is None or not self.texts:
            return []
        try:
            query_vec = self.embed_model.encode([query], convert_to_numpy=True).astype('float32')
            D, I = self.index.search(query_vec, TOP_K_CONTEXT)
            return [self.texts[i] for i in I[0] if 0 <= i < len(self.texts)]
        except Exception:
            return []

    def generate_response(self, user_query: str, context: list[str]) -> str:
        if not context:
            return "I could not find relevant weather data."
        prompt = f"""
        You are a helpful weather assistant.
        Context:
        ---
        {'\n'.join(context)}
        ---
        Question: {user_query}
        Answer:
        """
        try:
            response = self.gemini_client.models.generate_content(
                model="gemini-2.5-flash", contents=prompt
            )
            return response.text.strip()
        except Exception as e:
            return f" Gemini API error: Could not generate response. Details: {e}"

Pipeline

In [None]:
assistant = WeatherRAGAssistant(WEATHER_API_KEY, GEMINI_API_KEY)
initial_location_texts = []
INITIAL_PROMPT = f"Enter location or use  Use My Location to load the {FORECAST_DAYS}-day forecast."

def reverse_geocode(lat: float, lon: float) -> str:
    """Convert coordinates to a human-readable address using Google Geocoding API."""
    try:
        params = {"latlng": f"{lat},{lon}", "key": GOOGLE_API_KEY}
        response = requests.get(GOOGLE_GEOCODE_URL, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        if data.get("results"):
            return data["results"][0]["formatted_address"]
        return "Unknown Location"
    except Exception as e:
        return f"Error: {e}"

def use_my_location(lat, lon):
    global initial_location_texts
    location_name = reverse_geocode(lat, lon)
    _, initial_location_texts = assistant.fetch_and_index_weather(lat=lat, lon=lon)
    if initial_location_texts:
        return f" Location detected: {location_name}. Weather data loaded!"
    else:
        initial_location_texts = []
        return f" Failed to load weather data for: {location_name}"

def update_location_and_index(new_location: str):
    """Validate location with Google Geocoding, then fetch weather via lat/lon."""
    global initial_location_texts
    clean_location = new_location.strip()
    try:
        geo_params = {"address": clean_location, "key": GOOGLE_API_KEY}
        response = requests.get(GOOGLE_GEOCODE_URL, params=geo_params, timeout=10)
        response.raise_for_status()
        geo_data = response.json()
        if not geo_data.get("results"):
            return f" Could not find any location matching '{clean_location}'."

        # Get lat/lon
        location_info = geo_data["results"][0]["geometry"]["location"]
        lat, lon = location_info["lat"], location_info["lng"]
        resolved_location = geo_data["results"][0]["formatted_address"]

    except Exception as e:
        return f" Error validating location: {e}"

    try:
        _, initial_location_texts = assistant.fetch_and_index_weather(lat=lat, lon=lon)
        if initial_location_texts:
            return f" Weather data for {resolved_location} loaded!"
        else:
            initial_location_texts = []
            return f" Failed to load weather data for {resolved_location}."
    except Exception as e:
        return f" Error fetching weather: {e}"

def get_target_date_string(user_query: str) -> str | None:
    query_lower = user_query.lower()
    tz_offset = assistant.api_response['city'].get('timezone', 0) if assistant.api_response else 0
    current_time = datetime.utcnow() + timedelta(seconds=tz_offset)
    target_date = None

    day_mapping = {'today': 0, 'tomorrow': 1, 'day after tomorrow': 2}
    current_weekday = current_time.weekday()
    day_names = ['monday','tuesday','wednesday','thursday','friday','saturday','sunday']

    for i in range(7):
        target_day_index = (current_weekday + i) % 7
        if day_names[target_day_index] in query_lower:
            target_date = current_time + timedelta(days=i)
            break

    for term, days_delta in day_mapping.items():
        if term in query_lower:
            target_date = current_time + timedelta(days=days_delta)
            break

    return target_date.strftime('%Y-%m-%d') if target_date else None

def answer_weather_query(user_query: str):
    if not initial_location_texts:
        return "Please load the weather data first."

    target_date_str = get_target_date_string(user_query)

    if target_date_str:
        target_data = None
        for text in initial_location_texts:
            if target_date_str in text:
                target_data = text
                break
        if not target_data:
            return f"The forecast data for {target_date_str} is not available."

        final_context = f"The verified weather data for {target_date_str} is: {target_data}"
        prompt = f"""
        You are a weather expert. Provide the weather details **including the date** in your answer.

        ---
        VERIFIED WEATHER DATA FOR {target_date_str}:
        {final_context}
        ---

        QUESTION: {user_query}

        ANSWER FORMAT:
        - Date: {target_date_str}
        - Max Temp / Min Temp
        - Weather conditions
        - Recommendation
        """
        context_for_llm = [final_context]
    else:
        context_for_llm = assistant.search_index(user_query)
        if not context_for_llm:
            return "I found no relevant weather information."
        prompt = f"""
        You are a helpful and concise weather assistant.
        Always include the **date(s)** explicitly in your answer.

        Context:
        ---
        {'\n'.join(context_for_llm)}
        ---

        Question: {user_query}

        Answer:
        - Date(s): state clearly
        - Weather details (Max/Min temp, rain, conditions)
        - Recommendation
        """

    try:
        response = assistant.gemini_client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt
        )
        return response.text.strip()
    except Exception as e:
        return f" Gemini API Error: Could not generate response. Details: {e}"

Gradio UI

In [None]:
with gr.Blocks(title="Enhanced OpenWeatherMap Assistant") as demo:
    gr.Markdown(f"""
        # Enhanced RAG Weather Assistant (OpenWeatherMap)
        Ask about the **{FORECAST_DAYS}-day forecast** (Today + 4 Upcoming Days).
    """)

    with gr.Row():
        location_input = gr.Textbox(
            label="1. Enter Location (City, Country OR Landmark OR lat,lon)",
            placeholder="e.g., Hyderabad, IN or 17.3850,78.4867",
            interactive=True,
            scale=3
        )
        refresh_button = gr.Button("Refresh Data", scale=1)

    status_output = gr.Textbox(
        label="Data Status",
        value=INITIAL_PROMPT,
        interactive=False
    )

    gr.Markdown("## Ask a Question")

    with gr.Row():
        query_input = gr.Textbox(
            label="Your Weather Question",
            placeholder="e.g., What's the highest temperature tomorrow?",
            lines=2,
            scale=4
        )
        submit_button = gr.Button("Get Answer", scale=1)

    answer_output = gr.Textbox(
        label="Assistant's Answer",
        lines=5,
        interactive=False
    )

    gr.Markdown(f"*(Data provided by OpenWeatherMap, RAG powered by Sentence-Transformers and Gemini-2.5-flash)*")

    location_input.submit(
        fn=update_location_and_index,
        inputs=[location_input],
        outputs=[status_output]
    )
    refresh_button.click(
        fn=update_location_and_index,
        inputs=[location_input],
        outputs=[status_output]
    )

    submit_button.click(
        fn=answer_weather_query,
        inputs=[query_input],
        outputs=[answer_output]
    )
    query_input.submit(
        fn=answer_weather_query,
        inputs=[query_input],
        outputs=[answer_output]
    )

demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://978fdf6bd38cac464e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


