# Setup & Configuration  


Weather Data Functions: For setting up 'get_weather' function, the "requests" library is required. Additionally, in order to generate the returns in readable format, installing tabulate package is required to use the tabulat function. Moreover, to encode the location names, the function "quote" is required.

In [2]:
%pip install tabulate
import requests
from tabulate import tabulate
from urllib.parse import quote

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\waqua\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Natural Language Processing: After 'get_weather' the development moves to natural language processing. Setting up for that requires the function to understand natural language, extract the question and provide responses or error messages. 

In [3]:
%pip install spacy
%pip install dateparser
%pip install geopy

import spacy
import dateparser
from geopy.geocoders import Nominatim
from datetime import datetime, timedelta

# Load spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading spaCy model 'en_core_web_sm'...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\waqua\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\waqua\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: C:\Users\waqua\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.


# Weather Data Functions
The purpose of this function is to generate a response containing current weather conditions at a specific city, with a short forecast. The coding was prepared in a way that the get_weather(location, forecast_days) function calls the wttr.in service, parses the JSON response and gives the desired return.   


In [4]:
def _safe_int_conversion(value):
    """
    Safely converts a value to an integer, returning None if conversion fails.
    """
    try:
        return int(float(value)) # Convert to float first to handle decimal strings
    except (ValueError, TypeError):
        return None

def _format_current_weather_table(current_weather: dict) -> str:
    """
    Formats current weather data into a human-readable table.
    """
    headers = ["Metric", "Value"]
    table_data = [
        ["Temperature", f"{current_weather.get('temperature_celsius')}°C"],
        ["Feels Like", f"{current_weather.get('feels_like_celsius')}°C"],
        ["Description", current_weather.get('description')],
        ["Humidity", f"{current_weather.get('humidity_percent')}%"] if current_weather.get('humidity_percent') is not None else ["Humidity", "N/A"],
        ["Wind Speed", f"{current_weather.get('wind_speed_kmph')} km/h"] if current_weather.get('wind_speed_kmph') is not None else ["Wind Speed", "N/A"],
    ]
    return tabulate(table_data, headers=headers, tablefmt="grid")

def _format_forecast_table(forecast_data: list) -> str:
    """
    Formats forecast data into a human-readable table.
    """
    headers = ["Date", "Max Temp (°C)", "Min Temp (°C)", "Description"]
    table_data = []
    for day in forecast_data:
        table_data.append([
            day.get('date'),
            day.get('maxtemp_celsius'),
            day.get('mintemp_celsius'),
            day.get('description')
        ])
    return tabulate(table_data, headers=headers, tablefmt="grid")

def get_weather(location: str, forecast_days: int) -> dict:
    """
    Fetches current weather conditions and forecast data for a specified location
    using the wttr.in API.

    Args:
        location (str): The name of the city or location (e.g., "London", "New York").
        forecast_days (int): The number of forecast days to retrieve (1-5).

    Returns:
        dict: A dictionary containing current weather and forecast data formatted as tables,
              or an error message if the request fails or the location is invalid.
    """
    # 1. Input Validation
    if not 1 <= forecast_days <= 5:
        return {"error": "Forecast days must be between 1 and 5."}

    # 2. URL Encoding for Location
    # Encode the location to handle spaces and special characters in the URL.
    encoded_location = quote(location)

    # 3. Constructing the API URL
    # The format=j1 parameter requests JSON output.
    # The forecast parameter limits the number of forecast days.
    api_url = f"https://wttr.in/{encoded_location}?format=j1&forecast={forecast_days}"

    try:
        # 4. Making the API Request
        response = requests.get(api_url)
        response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx).

        # 5. Parsing the JSON Response
        json_data = response.json()

        # 6. Error Handling for Invalid Location from API
        # wttr.in returns an "error" key if the location is not found.
        if "error" in json_data:
            return {"error": f"Location not found or invalid: {location}"}

        # 7. Extracting Current Weather Conditions
        current_condition_data = json_data.get("current_condition", [{}])[0]
        current_weather = {
            "temperature_celsius": _safe_int_conversion(current_condition_data.get("temp_C")),
            "description": current_condition_data.get("weatherDesc", [{}])[0].get("value") if current_condition_data.get("weatherDesc") else "N/A",
            "feels_like_celsius": _safe_int_conversion(current_condition_data.get("FeelsLikeC")),
            "humidity_percent": _safe_int_conversion(current_condition_data.get("humidity")),
            "wind_speed_kmph": _safe_int_conversion(current_condition_data.get("windspeedKmph")),
        }

        # 8. Extracting Forecast Data
        forecast_data_list = []
        for day_data in json_data.get("weather", [])[:forecast_days]:
            daily_forecast = {
                "date": day_data.get("date"),
                "maxtemp_celsius": _safe_int_conversion(day_data.get("maxtempC")),
                "mintemp_celsius": _safe_int_conversion(day_data.get("mintempC")),
                "description": day_data.get("hourly", [{}])[0].get("weatherDesc", [{}])[0].get("value") if day_data.get("hourly") else "N/A"
            }
            forecast_data_list.append(daily_forecast)

        # 9. Formatting data as tables
        current_weather_table = _format_current_weather_table(current_weather)
        forecast_table = _format_forecast_table(forecast_data_list)

        # 10. Returning the Processed Data
        return {
            "current_weather_table": current_weather_table,
            "forecast_table": forecast_table
        }

    except requests.exceptions.RequestException as e:
        # Handle network-related errors (e.g., connection refused, DNS error, HTTP errors).
        return {"error": f"Network error or API request failed: {e}"}
    except ValueError:
        # Handle JSON decoding errors if the response is not valid JSON.
        return {"error": "Failed to decode JSON response from API."}
    except Exception as e:
        # Catch any other unexpected errors for robustness.
        return {"error": f"An unexpected error occurred: {e}"}

The output generated is a formatted current weaher condition and forecast for selected number of days for the specific city.

In [5]:
weather_data = get_weather("Sydney", 2)
print(weather_data['current_weather_table'])
print(weather_data['forecast_table'])

+-------------+--------------------+
| Metric      | Value              |
| Temperature | 17°C               |
+-------------+--------------------+
| Feels Like  | 17°C               |
+-------------+--------------------+
| Description | Patchy rain nearby |
+-------------+--------------------+
| Humidity    | 52%                |
+-------------+--------------------+
| Wind Speed  | 12 km/h            |
+-------------+--------------------+
+------------+-----------------+-----------------+---------------+
| Date       |   Max Temp (°C) |   Min Temp (°C) | Description   |
| 2025-09-18 |              18 |              14 | Clear         |
+------------+-----------------+-----------------+---------------+
| 2025-09-19 |              22 |              12 | Clear         |
+------------+-----------------+-----------------+---------------+


# Natural Language Processing

The purpose of this module is to parse natural language questions about weather and extract location, time, and weather attribute asked about. It uses spaCy for entity recognition, dateparser to interpret time, and geopy for locations. 
It includes user friendly guidance, confidence scoring, fallback suggestions, context awareness etc and returns location, time, forecast and level of confidence. The code also provides error messages that are conversational instead of hard errors.  

The parser now comes with some advanced features made for easier integration with a user interface:

- User-Friendly Guidance: Gives structured messages and some suggestions when parsing fails or the input is kinda ambiguous.
- Confidence Scoring: Gives scores for the extracted parts like location, time and attributes so the UI can show uncertainty and ask user confirmation.
- Fallback Suggestions: Suggests alternative city matches when the location provided isn’t clear enough.
- Context Awareness: Keeps track of the conversation, letting follow-up questions reuse previously extracted values for smoother interactions.

In [10]:
# Global context storage for conversational awareness
CONTEXT = {
    "location": None,
    "time": None,
    "attributes": [],
    "negated_attributes": []
}
def _parse_time_expression(text: str):
    """
    Parses a time expression from text and returns a string representation.
    Uses dateparser to interpret natural language dates.
    """
    dt = dateparser.parse(text)
    if dt:
        today = datetime.now().date()
        if dt.date() == today:
            return "today"
        elif dt.date() == today + timedelta(days=1):
            return "tomorrow"
        else:
            return dt.strftime("%Y-%m-%d")
    return None
def parse_weather_question_nlp(question: str) -> dict:
    """
    Enhanced NLP-based weather question parser with:
    - User-friendly error messages and suggestions
    - Confidence scoring
    - Fallback interaction with location suggestions
    - Conversational context awareness

    Returns:
        dict with keys:
        - location, time, attributes, negated_attributes
        - confidence (scores for each element)
        - suggestions (e.g., alternate city matches)
        - messages (user-friendly guidance/errors)
        - context (carried over values from previous queries)
    """
    ATTRIBUTE_KEYWORDS = {
        "rain": "rain", "precipitation": "rain",
        "temperature": "temperature", "temp": "temperature", "hot": "temperature", "cold": "temperature",
        "sun": "sun", "sunny": "sun",
        "wind": "wind", "windy": "wind",
        "humidity": "humidity", "humid": "humidity",
        "clouds": "clouds", "cloudy": "clouds",
        "conditions": "description", "weather": "description", "forecast": "description"
    }

    doc = nlp(question)
    lower_question = question.lower()

    extracted_info = {
        "location": None,
        "time": None,
        "attributes": [],
        "negated_attributes": [],
        "confidence": {},
        "suggestions": {},
        "messages": [],
        "context": CONTEXT.copy()
    }

    # ---- 1. Extract Location ----
    location_found = False
    location_confidence = 0.0
    suggestions = []

    for ent in doc.ents:
        if ent.label_ == "GPE":
            try:
                location = geolocator.geocode(ent.text, exactly_one=False, limit=3)
                if location:
                    extracted_info["location"] = ent.text
                    location_found = True
                    location_confidence = ent.kb_id_ if ent.kb_id_ else 0.9  # heuristic confidence
                    if len(location) > 1:
                        suggestions = [loc.address for loc in location]
                    break
            except Exception:
                continue

    if not location_found:
        # Fallback check in common cities
        common_cities = ["London", "Paris", "New York", "Berlin", "Tokyo", "Sankt Pölten", "Vienna", "Perth", "Melbourne", "Sydney"]
        for city in sorted(common_cities, key=len, reverse=True):
            if city.lower() in lower_question:
                extracted_info["location"] = city
                location_found = True
                location_confidence = 0.7
                break

    # If still not found, fallback to context
    if not location_found and CONTEXT["location"]:
        extracted_info["location"] = CONTEXT["location"]
        extracted_info["messages"].append("No location specified, using last known location from context.")
        location_confidence = 0.6

    if not location_found and not CONTEXT["location"]:
        extracted_info["messages"].append(
            "Could not confidently extract a location. Please specify a city name (e.g., 'London' or 'New York')."
        )
        if suggestions:
            extracted_info["suggestions"]["location"] = suggestions

    extracted_info["confidence"]["location"] = location_confidence

    # ---- 2. Extract Time ----
    time_expression_text = None
    time_confidence = 0.0

    time_phrases = sorted([
        "the day after tomorrow", "next 3 days", "next 5 days", "next weekend",
        "this week", "next week", "in a week", "in two days", "next month",
        "tomorrow", "today"
    ], key=len, reverse=True)

    for phrase in time_phrases:
        if phrase in lower_question:
            time_expression_text = phrase
            break

    if time_expression_text:
        extracted_info["time"] = _parse_time_expression(time_expression_text)
        time_confidence = 0.9
    else:
        parsed_full_q_time = _parse_time_expression(question)
        if parsed_full_q_time:
            extracted_info["time"] = parsed_full_q_time
            time_confidence = 0.7
        elif CONTEXT["time"]:
            extracted_info["time"] = CONTEXT["time"]
            extracted_info["messages"].append("No explicit time found, using last known time from context.")
            time_confidence = 0.6
        else:
            extracted_info["time"] = "today"
            extracted_info["messages"].append("No time specified, defaulting to 'today'.")
            time_confidence = 0.5

    extracted_info["confidence"]["time"] = time_confidence

    # ---- 3. Extract Attributes ----
    found_attributes = set()
    found_negated_attributes = set()
    attr_confidence = 0.0

    for token in doc:
        if token.text.lower() in ATTRIBUTE_KEYWORDS:
            attr = ATTRIBUTE_KEYWORDS[token.text.lower()]
            is_negated = False
            if token.i > 0 and doc[token.i - 1].text.lower() in ["not", "no", "n\'t", "n’t"]:
                is_negated = True
            if token.i > 1 and doc[token.i - 2].text.lower() == "without":
                is_negated = True
            if is_negated:
                found_negated_attributes.add(attr)
            else:
                found_attributes.add(attr)

    if found_attributes or found_negated_attributes:
        attr_confidence = 0.9
    elif CONTEXT["attributes"]:
        found_attributes = set(CONTEXT["attributes"])
        attr_confidence = 0.6
        extracted_info["messages"].append("No attributes specified, using last known attributes from context.")
    else:
        found_attributes.add("description")
        attr_confidence = 0.5
        extracted_info["messages"].append("No specific attributes found, defaulting to general weather description.")

    extracted_info["attributes"] = list(found_attributes)
    extracted_info["negated_attributes"] = list(found_negated_attributes)
    extracted_info["confidence"]["attributes"] = attr_confidence

    # ---- 4. Update Global Context ----
    CONTEXT.update({
        "location": extracted_info["location"],
        "time": extracted_info["time"],
        "attributes": extracted_info["attributes"],
        "negated_attributes": extracted_info["negated_attributes"]
    })
    extracted_info["context"] = CONTEXT.copy()

    return extracted_info


The following code lets the user ask a weather question in plain language. First, it checks if the NLP parser parse_weather_question_nlp is loaded, then tries to figure out the location and time from what the user typed. It then decides how many days of forecast to fetch and calls get_weather to get the current weather and forecast. The results are printed in tables, and if anything goes wrong, like it can’t understand the question or fetch the data, it shows a friendly error message.

In [11]:
# Prompt the user for a weather-related question
user_question = input("Ask a weather-related question: ")

# Ensure the NLP function is defined
if 'parse_weather_question_nlp' not in globals():
    print("Please run the cell that defines 'parse_weather_question_nlp' before running this cell.")
else:
    parsed_info = parse_weather_question_nlp(user_question)

    if "error" in parsed_info:
        print(f"Sorry, I couldn't understand your question. {parsed_info['error']}")
    else:
        location = parsed_info["location"]
        time = parsed_info["time"]

        # Determine forecast_days based on time expression
        if time == "today" or time == "tomorrow":
            forecast_days = 1
        elif "next" in time and "days" in time:
            forecast_days = int(time.split()[1])
        else:
            forecast_days = 1  # Default to 1 day if not clear

        weather_result = get_weather(location, forecast_days)

        if "error" in weather_result:
            print(f"Sorry, I couldn't retrieve the weather for {location}. {weather_result['error']}")
        else:
            print(f"\nHere's the weather for {location} ({time}):\n")
            print(weather_result["current_weather_table"])
            print("\nForecast:\n")
            print(weather_result["forecast_table"])


Here's the weather for Tokyo (today):

+-------------+---------------+
| Metric      | Value         |
| Temperature | 27°C          |
+-------------+---------------+
| Feels Like  | 31°C          |
+-------------+---------------+
| Description | Partly cloudy |
+-------------+---------------+
| Humidity    | 84%           |
+-------------+---------------+
| Wind Speed  | 46 km/h       |
+-------------+---------------+

Forecast:

+------------+-----------------+-----------------+---------------+
| Date       |   Max Temp (°C) |   Min Temp (°C) | Description   |
| 2025-09-18 |              28 |              25 | Partly Cloudy |
+------------+-----------------+-----------------+---------------+


# User Interface

# Main Application Logic

# Testing and Examples