<a href="https://colab.research.google.com/github/XiaoyuMa-MOYEE/Weather_forecast_testVersion/blob/master/submit_EN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Project environment configuration

In [None]:
!pip install -U plotly




[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
os.environ['OPENWEATHER_API_KEY'] = '56d26083c4e5d4828784871da1b7b0b3'


# Main code file

### Obtain weather data

In [None]:
# Old code for getting weather data via API
'''
## Old code using API to get data
# def test_of_data():
#     city = "Perth"
#     # Send GET request
#     # Standardized format
#     # response = requests.get(f'https://wttr.in/{city}?3&format=%l:+%c+%t+%C+%w')
#     # %l: location
#     # %c: current weather icon
#     # %t: temperature
#     # %C: weather description
#     # %w: wind speed
#     # Output response content
#
#     # Access default format
#     response = requests.get(f'https://wttr.in/{city}?format=j1')
#     data = response.json()
#     # Print full data to observe structure
#     print(data)
#     # Access weather data for the next three days
#     for day in data['weather']:
#         date = day['date']
#         maxtemp = day['maxtempC']
#         mintemp = day['mintempC']
#         description = day['hourly'][4]['weatherDesc'][0]['value']  # Midday weather
#         print(f"{date}: {description}, {mintemp}°C ~ {maxtemp}°C")
#         print(len(data['weather']))
'''
from collections import defaultdict
import requests
# Get weather data (returns JSON data)
def get_weather_forecast(city, max_day):
    """
   Fetches and organizes weather forecast data for a given city over a specified number of days.

   This function sends a GET request to the OpenWeatherMap 5-day/3-hour forecast API,
   parses the JSON response, and organizes the forecast data by day. It returns both
   a high-level summary for display and detailed data for further processing (e.g., charts).

   Parameters:
   -----------
   city : str
       The name of the city for which the weather forecast is requested.
       It should be a valid location recognized by the OpenWeatherMap API.

   max_day : int
       The number of days to include in the forecast (1 to 5). Values above 5 will be truncated.

   Returns:
   --------
   tuple
       A tuple containing:
       - result (dict): Summary forecast data formatted as:
           {
               "city": <city_name>,
               "forecast": [
                   {
                       "date": <YYYY-MM-DD>,
                       "time": <HH:MM:SS>,
                       "temperature": <Celsius>,
                       "description": <weather description>,
                       "speed": <wind speed>,
                       "humidity": <humidity percentage>,
                       "pop": <precipitation probability>
                   },
                   ...
               ]
           }

       - forecast_by_day (dict): Raw forecast data grouped by date as:
           {
               <date>: [
                   (<datetime>, <temp>, <description>, <wind>, <humidity>, <pop>),
                   ...
               ]
           }

   Raises:
   -------
   Prints an error and returns None if:
   - The network request fails
   - The API returns a non-200 status code

   Example:
   --------
    result, forecast_data = get_weather_forecast("Perth", 3)
    print(result["forecast"][0]["temperature"])
   27.5

   Notes:
   ------
   - Requires internet access to call the external API.
   - Uses metric units (°C, km/h).
   - Language is set to 'zh_en' (hybrid Chinese-English where supported).
   - Designed for use in applications such as weather dashboards or chatbots.
   """
    # api's key
    import os

    api_key = os.environ.get("OPENWEATHER_API_KEY")
    if not api_key:
        print("API key not found. Please set the OPENWEATHER_API_KEY environment variable.")
        return None

    # api_key = "56d26083c4e5d4828784871da1b7b0b3"
    # api
    url = "https://api.openweathermap.org/data/2.5/forecast"
    params = {"q": city, "appid": api_key, "units": "metric", "lang": "zh_en"
              }
    try:
        response = requests.get(url, params=params)
        # Raise exception if no response
        response.raise_for_status()
        data = response.json()
        # Attempt to print
        # print(data)

        if response.status_code != 200:
            print("Request failed:", data.get("message", "Unknown error"))
            return None

        # Organize 3-hour interval weather data by day
        forecast_by_day = defaultdict(list)
        for item in data["list"]:
            # Date
            date = item["dt_txt"].split(" ")[0]
            # Temperature
            temp = item["main"]["temp"]
            # Description
            desc = item["weather"][0]["description"]
            # Wind speed
            speed = item["wind"]["speed"]
            # Humidity
            humidity = item["main"]["humidity"]
            # Precipitation probability
            pop = item.get("pop", 0)
            time = item["dt_txt"]
            forecast_by_day[date].append((time, temp, desc, speed, humidity, pop))

        # Output daily weather (prefer 12:00 data)
        # print(f"City: {data['city']['name']}")
        result = {"city": data["city"]["name"], "forecast": []}
        for date, items in list(forecast_by_day.items())[:max_day]:  # Up to 5 days
            # Select 12:00 data or use the first entry
            mid = next((x for x in items if "12:00" in x[0]), items[0])
            # Test statement
            # print(f"Date: {date}, Weather: {mid[2]}, Temp: {mid[1]}°C, Wind: {mid[3]}, Humidity: {mid[4]}, POP: {mid[5]}")

            result["forecast"].append(
                {"date": date, "time": mid[0], "temperature": mid[1], "description": mid[2], "speed": mid[3],
                 "humidity": mid[4], "pop": mid[5]
                 })

        return result, forecast_by_day  # result is weather summary, forecast_by_day is detailed data for plotting
    except requests.exceptions.RequestException as e:  # Catch all requests-related exceptions
        print(f"Network request failed: please check your connection\n{e}")
        return None  # Return failure indicator


### Natural language matching

In [None]:
# Define response handlers
def res_pop():
    return "pop"


def res_describe_weather():
    return "describe"


def res_clothing():
    return "temperature"


def res_temperature():
    return "temperature"


def res_wind():
    return "wind"


def res_humidity():
    return "humidity"


# Mapping dictionary between questions and response handlers
response_mapping = {
    "umbrella": res_pop,
    "rain": res_pop,
    "precipitation": res_pop,
    "wet": res_pop,
    "showers": res_pop,
    "rainy": res_pop,
    "storms": res_pop,
    "need an umbrella": res_pop,

    "weather": res_describe_weather,
    "forecast": res_describe_weather,
    "today's weather": res_describe_weather,
    "how's the weather": res_describe_weather,
    "what's the weather like": res_describe_weather,

    "clothing": res_clothing,
    "dress": res_clothing,
    "wear": res_clothing,
    "what to wear": res_clothing,
    "should I wear": res_clothing,
    "clothes": res_clothing,

    "temperature": res_temperature,
    "how hot": res_temperature,
    "how cold": res_temperature,
    "hot": res_temperature,
    "cold": res_temperature,
    "temperature today": res_temperature,

    "wind": res_wind,  # wind speed
    "wind speed": res_wind,  # wind speed
    "how windy": res_wind,  # is it windy
    "windy": res_wind,  # wind

    "humidity": res_humidity,  # humidity
    "how humid": res_humidity,  # is it humid
    "humid": res_humidity  # humidity
}


# Input processing function
def get_weather_response(question):
    """
    Analyzes a natural language weather-related question and returns a corresponding response type keyword.

    This function matches user input (in plain English) to predefined keywords and determines
    which type of weather information is being requested — such as rain, temperature, wind, or humidity.
    It uses a keyword-to-function mapping dictionary to identify the intent.

    Parameters:
    -----------
    question : str
        A natural language string that represents the user's weather-related question.
        For example: "Do I need an umbrella?", "How windy will it be?", "What's the temperature today?"

    Returns:
    --------
    str
        A keyword representing the requested data type:
        - "pop"         → precipitation probability
        - "describe"    → general weather description
        - "temperature" → temperature value (used for both clothing and temperature queries)
        - "wind"        → wind speed
        - "humidity"    → humidity level

        If the input cannot be matched to any known keyword, the function returns:
        - "Sorry, I couldn't understand your question."

    Matching Details:
    -----------------
    - Matching is case-insensitive (`re.IGNORECASE`)
    - Accepts singular and plural forms of keywords (e.g., "storm" and "storms")
    - Uses regular expression word boundaries to ensure accurate token detection

    Example:
    --------
     get_weather_response("Do I need an umbrella?")
    'pop'

     get_weather_response("How cold will it be?")
    'temperature'

     get_weather_response("Tell me the humidity level")
    'humidity'

     get_weather_response("Tell me a joke")
    "Sorry, I couldn't understand your question."
    """
    # Iterate through the dictionary to match keywords
    for keyword, handler in response_mapping.items():
        # re.IGNORECASE ignores case, s? matches singular/plural
        if re.search(r"\b" + re.escape(keyword) + r"s?\b", question, re.IGNORECASE):
            return handler()
    return "Sorry, I couldn't understand your question."

# Debug test
# print(get_weather_response("perth"))


### The obtained weather information is processed into list data for use in line breaking

In [None]:
def get_line_chart_data(questions, city, max_day):  # input parameters
    """
    Parses user input and weather data to generate a time series dataset for plotting a weather-related line chart.

    This function serves as a middle layer between user interaction and visualization. It maps a user’s question
    to a specific weather feature (e.g., rain probability, temperature), extracts corresponding forecast data
    from a previously fetched dataset, and formats it for line chart generation.

    Parameters:
    -----------
    questions : str
        A natural language input representing what the user wants to know about the weather.
        Example: "Do I need an umbrella?", "How windy is it?", "What should I wear?"

    city : str
        The name of the city for which the forecast is to be generated.
        Must be a valid city supported by the weather API.

    max_day : int
        The number of forecast days to retrieve data for (range: 1 to 5).

    Returns:
    --------
    tuple
        If the user's question matches a known weather keyword:
            - data_list (List[Tuple[str, float|str]]): List of (timestamp, value) tuples for the relevant weather metric.
            - index (int): Always 1 indicating chartable data was found.
            - answer (str): A keyword indicating the type of weather data (e.g., "pop", "temperature").

        If the question is ambiguous or doesn't match:
            - lines (List[str]): A list of strings containing a general weather summary.
            - index (int): 0 indicating no chartable data.
            - answer (str): "null"

    Supported answer types:
    ------------------------
    - "pop"         → Precipitation probability
    - "describe"    → Weather description
    - "cloth"       → Interpreted as temperature (used for clothing suggestions)
    - "temperature" → Air temperature
    - "wind"        → Wind speed
    - "humidity"    → Humidity level

    Example:
    --------
    get_line_chart_data("rain", "Perth", 3)
    ([("2025-05-21 12:00:00", 0.65), ("2025-05-22 12:00:00", 0.30), ...], 1, "pop")

    get_line_chart_data("Can you tell me about the weather?", "Perth", 3)
    (["Your question is a bit unclear...", "Date: 2025-05-21, Weather: Clear, Temperature: 26°C, ..."], 0, "null")

    Notes:
    ------
    - This function assumes `get_weather_forecast()` and `get_weather_response()` have already been defined.
    - Intended to be used as the data backend for a plotting or UI layer, such as Plotly or ipywidgets.
    """
    result, forecast_by_day = get_weather_forecast(city, max_day)  # get weather data
    answer = get_weather_response(questions)  # get answer type

    def handle_pop():
        precipitation_list = []
        for date, items in forecast_by_day.items():
            for item in items:
                time = item[0]
                pop = item[5]
                precipitation_list.append((time, pop))
        return precipitation_list

        # print("Handling umbrella-related question.")

    def handle_describe():
        precipitation_list = []
        for date, items in forecast_by_day.items():
            for item in items:
                time = item[0]
                weather = item[2]
                precipitation_list.append((time, weather))
        return precipitation_list
        # print("Handling weather description-related question.")

    def handle_cloth():
        precipitation_list = []
        for date, items in forecast_by_day.items():
            for item in items:
                time = item[0]
                temperature = item[1]
                precipitation_list.append((time, temperature))
        return precipitation_list
        # print("Handling clothing suggestion-related question.")

    def handle_temperature():
        precipitation_list = []
        for date, items in forecast_by_day.items():
            for item in items:
                time = item[0]
                temperature = item[1]
                precipitation_list.append((time, temperature))
        return precipitation_list
        # print("Handling temperature-related question.")

    def handle_wind():
        precipitation_list = []
        for date, items in forecast_by_day.items():
            for item in items:
                time = item[0]
                wind = item[3]
                precipitation_list.append((time, wind))
        return precipitation_list
        # print("Handling wind speed-related question.")

    def handle_humidity():
        precipitation_list = []
        for date, items in forecast_by_day.items():
            for item in items:
                time = item[0]
                humidity = item[4]
                precipitation_list.append((time, humidity))
        return precipitation_list
        # print("Handling humidity-related question.")

    def process_answer(answer):
        if answer == "pop":
            index = 1
            return handle_pop(), index, answer
        elif answer == "describe":
            index = 1
            return handle_describe(), index, answer
        elif answer == "cloth":
            index = 1
            return handle_cloth(), index, answer
        elif answer == "temperature":
            index = 1
            return handle_temperature(), index, answer
        elif answer == "wind":
            index = 1
            return handle_wind(), index, answer
        elif answer == "humidity":
            index = 1
            return handle_humidity(), index, answer
        else:
            index = 0
            lines = ["Your question is a bit unclear. Here is a simple summary of all data. For line charts, please ask more specifically, such as 'Will it rain?' or 'How's the weather?'"]
            for day in result['forecast']:
                lines.append(
                    f"Date: {day['date']}, Weather: {day['description']}, Temperature: {day['temperature']}°C,"
                    f" Wind Speed: {day['speed']}, Humidity: {day['humidity']}, Precipitation Probability: {day['pop']}"
                )
            answer = "null"
            return lines, index, answer

    return process_answer(answer)  # Key note: outer return result


### Generate a line graph

In [None]:
from datetime import datetime
import plotly.graph_objects as go
from IPython.display import display


def line_chart(questions, city, max_day):
    # print("Entered the function")
    data, index, answer = get_line_chart_data(questions, city, max_day)  # Get data
    if index == 0:  # No chart needed, only print output
        print("\n".join(data))
    else:
        title_of_image = "null"
        description = "null"
        if answer == "pop":
            title_of_image = "Rainfall prediction chart"
            description = "rain"
        elif answer == "describe":
            title_of_image = "Weather description map"
            description = "weather"
        elif answer in ("cloth", "temperature"):
            title_of_image = "Temperature prediction chart"
            description = "temperature"
        elif answer == "wind":
            title_of_image = "Wind speed prediction graph"
            description = "wind speed"
        elif answer == "humidity":
            title_of_image = "Humidity prediction chart"
            description = "humidity"
        print(f"The question you asked is related to {description}, generating a line chart now.")
        times = [datetime.strptime(item[0], "%Y-%m-%d %H:%M:%S") for item in data]
        values = [item[1] for item in data]

        # Create interactive chart using Plotly
        fig = go.Figure()

        # Add data to the chart
        fig.add_trace(go.Scatter(x=times, y=values, mode='lines+markers', name='Precipitation Probability'))

        # Update layout
        fig.update_layout(
            title=title_of_image,
            xaxis_title="Date and Time",
            yaxis_title={
                "pop": "Precipitation Probability",
                "describe": "Weather Condition Code",
                "cloth": "Temperature (°C)",
                "temperature": "Temperature (°C)",
                "wind": "Wind Speed (km/h)",
                "humidity": "Humidity (%)"
            }.get(answer, "Value"),
            xaxis=dict(tickformat="%Y-%m-%d %H:%M:%S", tickangle=45),
            autosize=True
        )

        # Show chart
        # import plotly.io as pio
        # pio.renderers.default = 'notebook_connected'

        fig.show(renderer="colab")
        # display(fig)


 ### Input verification

In [None]:
import re


def validate_inputs(city, day, question):
    """
    Validate whether the inputs for city, number of days, and question are legal.
    Parameters:
        city (str): City name, should be a valid spelling
        day (str): Forecast days, should be numeric
        question (str): Question string, should be protected from injection
    Returns:
        city, day, question, index (int)
        index = 1 means all inputs are valid, 0 means at least one is invalid
    """

    def is_valid_city(city):
        # First check if it's empty or contains only whitespace
        if not city or not city.strip():
            return False

        # Allow letters, spaces, hyphens, apostrophes, dots, and diacritics
        pattern = r"^[\w\s\-\.'’À-ÿĀ-ž]+$"
        return bool(re.match(pattern, city, re.UNICODE))


    def is_safe_question(question):
        # First check if it's empty or contains only whitespace
        if not question or not question.strip():
            return False

        # Keyword blacklist to prevent SQL injection attacks
        blacklist = ['--', ';', '/*', '*/', '@@', '@', 'char', 'nchar',
                     'varchar', 'nvarchar', 'alter', 'begin', 'cast',
                     'create', 'cursor', 'declare', 'delete', 'drop',
                     'exec', 'execute', 'fetch', 'insert', 'kill',
                     'select', 'sys', 'sysobjects', 'syscolumns',
                     'table', 'update']

        lower_q = question.lower()
        return all(keyword not in lower_q for keyword in blacklist)


    # Validate sequentially
    if is_valid_city(city) and is_safe_question(question):
        return city, day, question, 1
    else:
        return city, day, question, 0


### UI

In [None]:
import ipywidgets as widgets
from IPython.display import display


def run_forecast_ui():
    """
   Launches an interactive graphical user interface (GUI) for weather forecast input and visualization.

   This function uses `ipywidgets` to render a UI in Jupyter or Google Colab, allowing users to:
       - Input a city name
       - Choose the number of forecast days (1 to 5)
       - Ask a natural language weather-related question (e.g., "Do I need an umbrella?", "How windy?")
       - Trigger a forecast chart generation
       - Exit the interface gracefully

   Components:
   -----------
   - city (Text): Text input for city name (e.g., "Perth")
   - max_day (Dropdown): Dropdown selector for number of forecast days (1–5)
   - question_input (Text): Input box for natural language weather questions
   - predict_button (Button): Submits the query and generates a forecast chart
   - exit_button (Button): Exits the UI and clears output
   - output (Output): Area for feedback, validation messages, or charts

   Behavior:
   ---------
   - When the **Forecast** button is clicked:
       1. Inputs are read and cleared.
       2. `validate_inputs()` checks the input for validity (e.g., proper city name, question safety).
       3. If validation fails: display error message.
       4. If valid: call `line_chart()` to generate and display a chart.
       5. All outputs are printed within the `output` widget.

   - When the **Exit** button is clicked:
       - Closes the widget container and displays an exit message.

   Dependencies:
   -------------
   This function relies on the following helper functions defined elsewhere:
   - `validate_inputs(city: str, day: int, question: str) -> Tuple[str, int, str, int]`
   - `line_chart(question: str, city: str, day: int)`

   Returns:
   --------
   None (displays widget interface interactively in notebook)

   Example Use:
   ------------
   run_forecast_ui()

   Notes:
   ------
   - Intended to run in environments that support `ipywidgets`, such as:
       - Jupyter Notebook
       - Google Colab
   - Not designed for terminal or non-notebook environments.
   """
    html_text = widgets.HTML(
        value="<h2 style='color:#66ccff'>Welcome to the interactive system</h2><p>A simple forecasting system.</p>"
    )

    city = widgets.Text(
        value="perth",
        placeholder='Please enter a city',
        description='City name:',
        disabled=False
    )

    max_day = widgets.Dropdown(
        options=[1, 2, 3, 4, 5],
        value=5,
        description='Forecast days (1-5):',
        layout=widgets.Layout(width='300px'),
        style={'description_width': '150px'}
    )

    question_input = widgets.Text(
        value="rain",
        placeholder='Please enter your question',
        description='What would you like to ask about the weather:',
        disabled=False,
        layout=widgets.Layout(width='300px'),
        style={'description_width': '150px'}
    )

    predict_button = widgets.Button(
        description='Forecast',
        button_style='info',
        tooltip='Click me',
        icon='check'
    )

    exit_button = widgets.Button(
        description='Exit',
        button_style='danger',
        tooltip='End the program',
        icon='sign-out-alt'
    )

    output = widgets.Output()

    # Button click handler
    def on_predict_click(b):
        output.clear_output()

        city_name = city.value.strip()
        question = question_input.value.strip()
        day = int(max_day.value)

        city.value = ''
        question_input.value = ''
        max_day.value = 5

        _, _, _, index = validate_inputs(city_name, day, question)

        with output:
            if index == 0:
                print("❌ Invalid input, please try again:")
                print(f"City: {city_name}")
                print(f"Days: {day}")
                print(f"Question: {question}")
            else:
                print("✅ Input valid, generating chart below")

                try:
                    line_chart(question, city_name, day)
                except Exception as e:
                    print("🚫 line_chart execution failed:", e)

    def on_exit_click(b):
        output.clear_output()
        ui_box.close()
        with output:
            print("Exited the system. Thank you for using!")

    predict_button.on_click(on_predict_click)
    exit_button.on_click(on_exit_click)

    # Wrap all widgets for unified close() on exit
    ui_box = widgets.VBox([html_text, city, max_day, question_input,
                           widgets.HBox([predict_button, exit_button]),
                           output])
    display(ui_box)


### MAIN

In [None]:
def main():
    # line_chart("rain", "perth", 5)
    run_forecast_ui()


# better way to run the main code
if __name__ == "__main__":
    main()


VBox(children=(HTML(value="<h2 style='color:#66ccff'>Welcome to the interactive system</h2><p>A simple forecas…