In [50]:
import xml.etree.ElementTree as ET
from pathlib import Path
import json
from statistics import mean

# converts python data structures to XML
import xmltodict

In [5]:
root = ET.Element('main')
ET.tostring(root, 'unicode')

'<main />'

In [146]:
data = []
for f in Path().rglob('../source_data/*/*.json'):
    city = f.parts[-2]
    json_data = json.loads(f.read_text())
    city_tag = ''.join(w.capitalize() for w in city.split())
    data.append({'city': city_tag, 'data': json_data})
data

[{'city': 'Oviedo',
  'data': {'lat': 43.3606,
   'lon': -5.8446,
   'timezone': 'Europe/Madrid',
   'timezone_offset': 7200,
   'current': {'dt': 1632528000,
    'sunrise': 1632550439,
    'sunset': 1632593763,
    'temp': 15.84,
    'feels_like': 16.09,
    'pressure': 1011,
    'humidity': 100,
    'dew_point': 15.84,
    'uvi': 0,
    'clouds': 46,
    'visibility': 10000,
    'wind_speed': 1.79,
    'wind_deg': 196,
    'wind_gust': 2.24,
    'weather': [{'id': 802,
      'main': 'Clouds',
      'description': 'scattered clouds',
      'icon': '03n'}]},
   'hourly': [{'dt': 1632528000,
     'temp': 15.84,
     'feels_like': 16.09,
     'pressure': 1011,
     'humidity': 100,
     'dew_point': 15.84,
     'uvi': 0,
     'clouds': 46,
     'visibility': 10000,
     'wind_speed': 1.79,
     'wind_deg': 196,
     'wind_gust': 2.24,
     'weather': [{'id': 802,
       'main': 'Clouds',
       'description': 'scattered clouds',
       'icon': '03n'}]},
    {'dt': 1632531600,
     'temp'

Is the current timestamp the same as first hour?

In [147]:
for city in data:
    w = city['data']
    assert w['current']['dt'] == d['hourly'][0]['dt']


Getting the data we need:
- Child element **summary** (parent: **weather**)
    - attribute **mean_temp**: country’s mean temperature
    - attribute **mean_wind_speed**: country’s mean wind speed
    - attribute **coldest_place**: name of the city with the lowest mean temperature value
    - attribute **warmest_place**: name of the city with the highest mean temperature value
    - attribute **windiest_place**: name of the city with the highest mean wind speed value
    

- For each city:
    - attribute **mean_temp**: city’s mean temperature
    - attribute **max_temp**: city’s max temperature
    - attribute **min_temp**: city’s min temperature
    - attribute **mean_wind_speed**: city’s mean wind speed
    - attribute **max_wind_speed**: city’s max wind speed
    - attribute **min_wind_speed**: city’s min wind speed

In [None]:
cdata = {}
for city, city_data in ((city['city'], city['data']) for city in data):
    temps = [h['temp'] for h in city_data['hourly']]
    tmin, tmax, tmean = min(temps), max(temps), round(mean(temps), 2)
    
    speeds = [h['wind_speed'] for h in city_data['hourly']]
    smin, smax, smean = min(speeds), max(speeds), round(mean(speeds), 2)
    
    cdata[city] = {
        'min_temp': tmin,
        'mean_temp': tmean,
        'max_temp': tmax,
        'min_wind_speed': smin,
        'mean_wind_speed': smean,
        'max_wind_speed': smax,

    }
cdata

{'Oviedo': {'min_temp': 14.26,
  'mean_temp': 17.0,
  'max_temp': 21.96,
  'min_wind_speed': 0.17,
  'mean_wind_speed': 1.54,
  'max_wind_speed': 6.26},
 'Barcelona': {'min_temp': 21.66,
  'mean_temp': 24.21,
  'max_temp': 27.38,
  'min_wind_speed': 0.45,
  'mean_wind_speed': 1.26,
  'max_wind_speed': 3.6},
 'Valencia': {'min_temp': 20.9,
  'mean_temp': 23.31,
  'max_temp': 27.57,
  'min_wind_speed': 0.45,
  'mean_wind_speed': 1.45,
  'max_wind_speed': 4.47},
 'Toledo': {'min_temp': 16.11,
  'mean_temp': 19.52,
  'max_temp': 23.49,
  'min_wind_speed': 1.34,
  'mean_wind_speed': 3.86,
  'max_wind_speed': 7.04},
 'Valladolid': {'min_temp': 12.56,
  'mean_temp': 15.61,
  'max_temp': 19.51,
  'min_wind_speed': 0.45,
  'mean_wind_speed': 2.22,
  'max_wind_speed': 8.23},
 'Seville': {'min_temp': 17.24,
  'mean_temp': 21.6,
  'max_temp': 27.13,
  'min_wind_speed': 0.45,
  'mean_wind_speed': 1.04,
  'max_wind_speed': 2.24},
 'Pamplona': {'min_temp': 16.01,
  'mean_temp': 20.45,
  'max_temp': 2

Computing the aggregates for all the cities (the 'summary' section)

In [175]:
temps = [c['mean_temp'] for c in cdata.values()]
tmean = round(mean(temps), 2)
speed = [c['mean_wind_speed'] for c in cdata.values()]
smean = round(mean(speeds), 2)

tmean, smean

(20.43, 1.99)

In [180]:
_, warmest_city = max([(dt['max_temp'], city) for city, dt in cdata.items()])
_, coldest_city = min([(dt['min_temp'], city) for city, dt in cdata.items()])
_, windiest_city = max([(dt['max_wind_speed'], city) for city, dt in cdata.items()])
warmest_city, coldest_city, windiest_city

('Palma', 'Valladolid', 'Pamplona')

In [221]:
obj = {'summary': {
    'mean_temp': tmean,
    'mean_wind_speed': smean,
    'coldest_place': coldest_city,
    'warmest_place': warmest_city,
    'windiest_place': windiest_city
},
    'cities': cdata}
obj

{'summary': {'mean_temp': 20.43,
  'mean_wind_speed': 1.99,
  'coldest_place': 'Valladolid',
  'warmest_place': 'Palma',
  'windiest_place': 'Pamplona'},
 'cities': {'Oviedo': {'min_temp': 14.26,
   'mean_temp': 17.0,
   'max_temp': 21.96,
   'min_wind_speed': 0.17,
   'mean_wind_speed': 1.54,
   'max_wind_speed': 6.26},
  'Barcelona': {'min_temp': 21.66,
   'mean_temp': 24.21,
   'max_temp': 27.38,
   'min_wind_speed': 0.45,
   'mean_wind_speed': 1.26,
   'max_wind_speed': 3.6},
  'Valencia': {'min_temp': 20.9,
   'mean_temp': 23.31,
   'max_temp': 27.57,
   'min_wind_speed': 0.45,
   'mean_wind_speed': 1.45,
   'max_wind_speed': 4.47},
  'Toledo': {'min_temp': 16.11,
   'mean_temp': 19.52,
   'max_temp': 23.49,
   'min_wind_speed': 1.34,
   'mean_wind_speed': 3.86,
   'max_wind_speed': 7.04},
  'Valladolid': {'min_temp': 12.56,
   'mean_temp': 15.61,
   'max_temp': 19.51,
   'min_wind_speed': 0.45,
   'mean_wind_speed': 2.22,
   'max_wind_speed': 8.23},
  'Seville': {'min_temp': 17.2

In [222]:

# https://stackoverflow.com/a/66072187

from lxml import etree
import typing

def obj_to_xml(obj: typing.Union[bool, float, int, str, dict, list], parent_element: typing.Optional[etree.Element] = None, new_element_name: str = 'root') -> etree.Element:
    """
    Recursively walk an object and return its XML representation.

    Args:
        parent_element (typing.Optional[etree.Element]): The element that will be the parent of the element that this
            function will create and return.

        new_element_name (str): The name of the root element that will be created.

        obj (typing.Union[bool, float, int, str, dict, list]): The object to return XML for.

    Returns:
        result (etree.Element): An XML element.

    """

    if parent_element is not None:
        new_element = etree.SubElement(parent_element, new_element_name)

    else:
        new_element = etree.Element(new_element_name)

    if type(obj) == dict:
        for key, value in obj.items():
            if type(value) in (dict, list):
                obj_to_xml(value, new_element, key)

            else:
                # Convert values to a string, make sure boolean values are lowercase
                new_element.attrib[key] = str(value).lower() if type(value) == bool else str(value)

    elif type(obj) == list:
        for list_item in obj:
            # List items have to have a name.  Here we borrow "li" from HTML which stands for list item.
            obj_to_xml(list_item, new_element, 'li')

    else:
        # Convert everything to a string, make sure boolean values are lowercase
        new_element.text = str(obj).lower() if type(obj) == bool else str(obj)

    return new_element

In [223]:
root = obj_to_xml(obj, new_element_name='weather')
root.attrib['country'] = 'Spain'
root.attrib['date'] = '2021-09-25'
tree = etree.ElementTree(root)
xml_str = etree.tostring(tree, encoding='unicode', pretty_print=True)
print(xml_str)

<weather country="Spain" date="2021-09-25">
  <summary mean_temp="20.43" mean_wind_speed="1.99" coldest_place="Valladolid" warmest_place="Palma" windiest_place="Pamplona"/>
  <cities>
    <Oviedo min_temp="14.26" mean_temp="17.0" max_temp="21.96" min_wind_speed="0.17" mean_wind_speed="1.54" max_wind_speed="6.26"/>
    <Barcelona min_temp="21.66" mean_temp="24.21" max_temp="27.38" min_wind_speed="0.45" mean_wind_speed="1.26" max_wind_speed="3.6"/>
    <Valencia min_temp="20.9" mean_temp="23.31" max_temp="27.57" min_wind_speed="0.45" mean_wind_speed="1.45" max_wind_speed="4.47"/>
    <Toledo min_temp="16.11" mean_temp="19.52" max_temp="23.49" min_wind_speed="1.34" mean_wind_speed="3.86" max_wind_speed="7.04"/>
    <Valladolid min_temp="12.56" mean_temp="15.61" max_temp="19.51" min_wind_speed="0.45" mean_wind_speed="2.22" max_wind_speed="8.23"/>
    <Seville min_temp="17.24" mean_temp="21.6" max_temp="27.13" min_wind_speed="0.45" mean_wind_speed="1.04" max_wind_speed="2.24"/>
    <Pamplon

In [224]:

with open('out.xml', 'w') as f:
    f.write(xml_str)