In [3]:
import requests
from typing import List, Dict
import xml.etree.ElementTree as ET
import pandas as pd


class WeatherData:
    """
    This class is responsible for fetching and parsing weather data from a given URL.
    """

    def __init__(self, latitude: str, longitude: str) -> None:
        """
        Initializes the WeatherData object.

        :param latitude: The latitude to fetch the data for.
        :param longitude: The longitude to fetch the data for.
        """
        self.source_url = f"http://metwdb-openaccess.ichec.ie/metno-wdb2ts/locationforecast?lat={latitude};long={longitude}"
        self.response = None
        self.root = None

    def fetch_data(self) -> None:
        """
        Fetches data from the source_url.
        """
        self.response = requests.get(self.source_url)

    def parse_data(self) -> None:
        """
        Parses XML data from the fetched response.
        """
        xml_data = self.response.text
        self.root = ET.fromstring(xml_data)

    def extract_data(self) -> List[Dict[str, str]]:
        """
        Extracts relevant information from the parsed XML data.

        :return: A list of dictionaries, each containing weather information.
        """
        data = []
        model_elements = self.root.findall('.//model')

        previous_time_data = None
        for time_element in self.root.iter('time'):
            time_data = self._process_time_element(time_element)

            if previous_time_data and previous_time_data['time_from'] == time_data['time_from']:
                previous_time_data.update(time_data)
                time_data = previous_time_data

            for model_element in model_elements:
                if self._within_model_time_range(time_data, model_element):
                    model_data = self._process_model_element(model_element)
                    break

            combined_data = {**model_data, **time_data}
            data.append(combined_data)

            previous_time_data = time_data

        return data

    @staticmethod
    def _process_time_element(time_element: ET.Element) -> Dict[str, str]:
        """
        Process a time element to extract the needed information.

        :param time_element: The time element to process.
        :return: A dictionary containing the processed information.
        """
        time_data = {
            'time_from': time_element.get('from'),
            'time_to': time_element.get('to'),
            'datatype': time_element.get('datatype')
        }
        location_element = time_element.find('location')

        if location_element is not None:
            time_data['altitude'] = location_element.get('altitude')
            time_data['latitude'] = location_element.get('latitude')
            time_data['longitude'] = location_element.get('longitude')

            for child in location_element:
                for attr, value in child.attrib.items():
                    time_data[f"{child.tag}_{attr}"] = value

        return time_data

    @staticmethod
    def _within_model_time_range(time_data: Dict[str, str], model_element: ET.Element) -> bool:
        """
        Checks if the time_data is within the range of the model element.

        :param time_data: The time data to check.
        :param model_element: The model element to check against.
        :return: A boolean indicating if the time_data is within the range of the model element.
        """
        model_from = model_element.get('from')
        model_to = model_element.get('to')

        return model_from <= time_data['time_from'] < model_to

    @staticmethod
    def _process_model_element(model_element: ET.Element) -> Dict[str, str]:
        """
        Process a model element to extract the needed information.

        :param model_element: The model element to process.
        :return: A dictionary containing the processed information.
        """
        model_data = {
            'model_name': model_element.get('name'),
            'termin': model_element.get('termin'),
            'runended': model_element.get('runended'),
            'nextrun': model_element.get('nextrun'),
            'from': model_element.get('from'),
            'to': model_element.get('to')
        }

        for attr, value in model_element.attrib.items():
            model_data[f"model_{attr}"] = value

        return model_data

    def generate_dataframe(self) -> pd.DataFrame:
        """
        Generate a DataFrame from the fetched and processed weather data.

        :return: A DataFrame containing the weather data.
        """
        self.fetch_data()
        self.parse_data()
        data = self.extract_data()
        df = pd.DataFrame(data)
        return df





In [4]:
# Coordinates for Galway, Ireland
latitude_galway = '53.2707'
longitude_galway = '-9.0568'

weather_data_galway = WeatherData(latitude_galway, longitude_galway)
df_galway = weather_data_galway.generate_dataframe()

# Coordinates for Dublin, Ireland
latitude_dublin = '53.3498'
longitude_dublin = '-6.2603'

weather_data_dublin = WeatherData(latitude_dublin, longitude_dublin)
df_dublin = weather_data_dublin.generate_dataframe()


In [5]:
df_galway.head()

Unnamed: 0,model_name,termin,runended,nextrun,from,to,model_termin,model_runended,model_nextrun,model_from,...,dewpointTemperature_id,dewpointTemperature_unit,dewpointTemperature_value,precipitation_unit,precipitation_value,precipitation_minvalue,precipitation_maxvalue,precipitation_probability,symbol_id,symbol_number
0,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,TD,celsius,11.0,,,,,,,
1,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,,,,mm,0.0,0.0,0.0,0.0,PartlyCloud,3.0
2,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,TD,celsius,10.6,,,,,,,
3,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,,,,mm,0.0,0.0,0.0,0.0,PartlyCloud,3.0
4,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,TD,celsius,10.5,,,,,,,


In [6]:
df_dublin.head()

Unnamed: 0,model_name,termin,runended,nextrun,from,to,model_termin,model_runended,model_nextrun,model_from,...,dewpointTemperature_id,dewpointTemperature_unit,dewpointTemperature_value,precipitation_unit,precipitation_value,precipitation_minvalue,precipitation_maxvalue,precipitation_probability,symbol_id,symbol_number
0,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,TD,celsius,5.0,,,,,,,
1,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,,,,mm,0.0,0.0,0.0,0.0,Sun,1.0
2,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,TD,celsius,5.6,,,,,,,
3,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,,,,mm,0.0,0.0,0.0,0.0,Sun,1.0
4,harmonie,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,2023-06-09T12:00:00Z,2023-06-07T06:00:00Z,2023-06-07T08:57:19Z,2023-06-07T16:00:00Z,2023-06-07T15:00:00Z,...,TD,celsius,6.1,,,,,,,
