# Download data from a URL that provides data in SDMX format using Python

In [None]:
# 
# pip  requests install pandas matplotlib seaborn

## Retrieve SDMX data

In [1]:
   import requests

   # Define the URL
   #url = "https://lustat.statec.lu/rest/data/LU1,DF_E5106,1.0/.A?startPeriod=2016&dimensionAtObservation=AllDimensions"
   url = "https://lustat.statec.lu/rest/data/LU1,DF_E5106,1.0/.A?startPeriod=2016"

   # Send a GET request to the server
   response = requests.get(url)

   # Check if the request was successful
   if response.status_code == 200:
       print("Data successfully retrieved!")
       
       # Decide how you want to handle the data based on its format (SDMX XML or JSON, etc.)
       content_type = response.headers['Content-Type']
       
       if 'xml' in content_type:
           # Handle SDMX-XML
           sdmx_xml_data = response.text
           with open('data.sdmx.xml', 'w') as file:
               file.write(sdmx_xml_data)
           print("SDMX XML data saved to 'data.sdmx.xml'")
       
       elif 'json' in content_type or 'application/vnd.sdmx.structurespecific+xml' in content_type:
           # Handle SDMX-JSON or specific SDMX format
           sdmx_json_or_specific = response.text
           with open('data.sdmx.json', 'w') as file:
               file.write(sdmx_json_or_specific)
           print("SDMX JSON or Specific data saved to 'data.sdmx.json'")
       
       else:
           # For flat file format (e.g., CSV, TSV), handle accordingly
           sdmx_flatfile_data = response.text
           with open('data.flatfile.txt', 'w') as file:
               file.write(sdmx_flatfile_data)
           print("Flat file data saved to 'data.flatfile.txt'")

   else:
       print(f"Failed to retrieve data. HTTP Status Code: {response.status_code}")


Data successfully retrieved!
SDMX XML data saved to 'data.sdmx.xml'


In [2]:
   import xml.etree.ElementTree as ET
   import pandas as pd

   # Parse the XML file
   tree = ET.parse('data.sdmx.xml')
   root = tree.getroot()

   # Initialize lists to store data
   time_series_data = []

   # Define namespaces if present (common in SDMX-XML)
   ns = {'sdmx': 'http://www.SDMX.org/resources/SDMXML/schemas/v2_1/message'}  # Adjust the namespace URI as needed

   # Iterate over the structure of the XML to extract data
   for series in root.findall('.//sdmx:Series', ns):
       observation = {}
       for obs in series.findall('sdmx:Obs', ns):
           time_period = obs.get('{http://www.sdmx.org/resources/SDMXML/schemas/v2_1/data}TIME_PERIOD')
           value = float(obs.find('{http://www.sdmx.org/resources/SDMXML/schemas/v2_1/data}OBS_VALUE').text)
           
           # Collect observations
           observation[time_period] = value

       # Append to the list (assuming a single series for simplicity; adjust as necessary)
       time_series_data.append(observation)

   # Convert to DataFrame
   if time_series_data:
       df = pd.DataFrame(time_series_data).T  # Transpose to have dates as index and values as columns
       df.columns = ['Value']  # Assign column name
       df.index.name = 'Date'
       
       print(df.head())  # Display the first few rows of the DataFrame

   else:
       print("No data found in XML.")


No data found in XML.


In [2]:
   import matplotlib.pyplot as plt
   import seaborn as sns

   # Set style for seaborn (optional)
   sns.set(style="darkgrid")

   # Plot the time series
   df.plot(figsize=(12, 6))
   plt.title('Time Series Data')
   plt.ylabel('Value')
   plt.xlabel('Date')

   # Display plot
   plt.show()


ImportError: /home/tarikz/.local/lib/python3.13/site-packages/pandas/_libs/tslib.cpython-313-x86_64-linux-gnu.so: cannot read file data: Input/output error