In [1]:
import requests
import pandas as pd
import os
import tarfile
import logging
import shutil
logging.basicConfig(level=logging.INFO)

DOWNLOAD_ODFS = False   #if you need to download the odfs, set to 'True'

Read the csv file downloaded from the XSA, in which all the Markarian421 ObsId are listed.

In [2]:
file_csv = pd.read_csv('NXSA-Results-1585319865076.csv')
ids = file_csv['OBSERVATION.OBSERVATION_ID']

Define a download function with python's request module. 

In [3]:
def download(url, file_name):
    with open(file_name, "wb") as file:
        # get request
        response = requests.get(url)
        # write to file
        file.write(response.content)

Define a function that creates the structure for all the folders containing the data.

In [4]:
def make_dir_structure(ObsId):
    if not os.path.exists(ObsId):
        logging.info(f'Creating the directory structure for the data {ObsId}.')
        
        #make the ObsId main folder
        os.mkdir(ObsId)
        
        #make the subfolders that will go in the ObsId folder
        subfolders = ['rgs', 'odf' ]
        for folder in subfolders:
            os.mkdir(os.path.join(ObsId, folder))
        logging.info('Structure completed!')
        
    else:
        logging.info(f'Folder structure for {ObsId} already OK.')
        

Define an unpacking function to extract the ODF file in the tar.gz format.

In [5]:
def unpack(tar_name, odf_directory):
    if not os.path.exists(odf_directory):
        os.mkdir(odf_directory)
        
    #Extract tar.gz containing the ODF   
    try:
        with tarfile.open(tar_name, "r:gz") as tar:
            tar.extractall(path=odf_directory)

        #Extract the additional tar files inside the previous tar.gz
        for file in os.listdir(odf_directory):
            if file.endswith('.TAR'):
                with tarfile.open(os.path.join(odf_directory,file), "r") as tar:
                    tar.extractall(path=odf_directory)
        logging.info(f'All ODFs for {tar_name} have been extracted!')
        
    except Exception as e:
        logging.error(f'An error has occurred with tar {tar_name}.')
        dir_name = tar_name.split('.')[0]
        shutil.rmtree(dir_name) #removes the observation that 
        logging.debug(str(e))

Generate the URLs to download the ODF for each observation. Notice that the ObsId must be 10 characters in the URL, so if it is less, we add zeros to the ID. 

In [6]:
ids_list = ids.tolist()
ids_zerofill = []

for ObsId in ids_list:
    ids_zerofill.append(str(ObsId).zfill(10))

    
for ObsId in ids_zerofill:
    
    if DOWNLOAD_ODFS:
        url = f"http://nxsa.esac.esa.int/nxsa-sl/servlet/data-action-aio?obsno={ObsId}&level=ODF"
        download(url, f"{ObsId}.tar.gz")
    
    make_dir_structure(ObsId)
    unpack(f'{ObsId}.tar.gz', os.path.join(ObsId, 'odf'))
    
    

INFO:root:Folder structure for 0099280101 already OK.
INFO:root:All ODFs for 0099280101.tar.gz have been extracted!
INFO:root:Folder structure for 0099280201 already OK.
INFO:root:All ODFs for 0099280201.tar.gz have been extracted!
INFO:root:Folder structure for 0099280301 already OK.
INFO:root:All ODFs for 0099280301.tar.gz have been extracted!
INFO:root:Folder structure for 0099280401 already OK.
INFO:root:All ODFs for 0099280401.tar.gz have been extracted!
INFO:root:Folder structure for 0099280501 already OK.
INFO:root:All ODFs for 0099280501.tar.gz have been extracted!
INFO:root:Folder structure for 0099280601 already OK.
INFO:root:All ODFs for 0099280601.tar.gz have been extracted!
INFO:root:Folder structure for 0136540101 already OK.
INFO:root:All ODFs for 0136540101.tar.gz have been extracted!
INFO:root:Folder structure for 0136540201 already OK.
INFO:root:All ODFs for 0136540201.tar.gz have been extracted!
INFO:root:Folder structure for 0136540301 already OK.
INFO:root:All ODFs

INFO:root:Folder structure for 0670920501 already OK.
INFO:root:All ODFs for 0670920501.tar.gz have been extracted!
INFO:root:Folder structure for 0658801301 already OK.
INFO:root:All ODFs for 0658801301.tar.gz have been extracted!
INFO:root:Folder structure for 0658801401 already OK.
INFO:root:All ODFs for 0658801401.tar.gz have been extracted!
INFO:root:Folder structure for 0658801501 already OK.
INFO:root:All ODFs for 0658801501.tar.gz have been extracted!
INFO:root:Folder structure for 0658801601 already OK.
INFO:root:All ODFs for 0658801601.tar.gz have been extracted!
INFO:root:Folder structure for 0658801701 already OK.
INFO:root:All ODFs for 0658801701.tar.gz have been extracted!
INFO:root:Folder structure for 0658801801 already OK.
INFO:root:All ODFs for 0658801801.tar.gz have been extracted!
INFO:root:Folder structure for 0658801901 already OK.
INFO:root:All ODFs for 0658801901.tar.gz have been extracted!
INFO:root:Folder structure for 0658802001 already OK.
INFO:root:All ODFs