In [1]:
import requests
import pandas as pd
import os
import tarfile
import logging
import shutil
logging.basicConfig(level=logging.INFO)

DOWNLOAD_ODFS = True   #if you need to download the odfs, set to 'True'

Read the csv file downloaded from the XSA, in which all the Markarian421 ObsId are listed.

In [2]:
file_csv = pd.read_csv('NXSA-Results-1598860175088.csv')   #must be modified by user
ids = file_csv['OBSERVATION.OBSERVATION_ID']

Define a download function with python's request module. 

In [3]:
def download(url, file_name):
    with open(file_name, "wb") as file:
        # get request
        response = requests.get(url)
        # write to file
        file.write(response.content)

Define a function that creates the structure for all the folders containing the data.

In [4]:
def make_dir_structure(ObsId):
    if not os.path.exists(ObsId):
        logging.info(f'Creating the directory structure for the data {ObsId}.')
        
        #make the ObsId main folder
        os.mkdir(ObsId)
        
        #make the subfolders that will go in the ObsId folder
        subfolders = ['rgs', 'odf' ]
        for folder in subfolders:
            os.mkdir(os.path.join(ObsId, folder))
        logging.info('Structure completed!')
        
    else:
        logging.info(f'Folder structure for {ObsId} already OK.')
        

Define an unpacking function to extract the ODF file in the tar.gz format.

In [5]:
def unpack(tar_name, odf_directory):
    if not os.path.exists(odf_directory):
        os.mkdir(odf_directory)
        
    #Extract tar.gz containing the ODF   
    try:
        with tarfile.open(tar_name, "r:gz") as tar:
            tar.extractall(path=odf_directory)

        #Extract the additional tar files inside the previous tar.gz
        for file in os.listdir(odf_directory):
            if file.endswith('.TAR'):
                with tarfile.open(os.path.join(odf_directory,file), "r") as tar:
                    tar.extractall(path=odf_directory)
        logging.info(f'All ODFs for {tar_name} have been extracted!')
        
    except Exception as e:
        logging.error(f'An error has occurred with tar {tar_name}.')
        dir_name = tar_name.split('.')[0]
        shutil.rmtree(dir_name) #removes the observation that 
        logging.debug(str(e))

Generate the URLs to download the ODF for each observation. Notice that the ObsId must be 10 characters in the URL, so if it is less, we add zeros to the ID. 

In [9]:
ids_list = ids.tolist()
ids_zerofill = []

for ObsId in ids_list:
    ids_zerofill.append(str(ObsId).zfill(10))

#Define the path of the source that will contain all the observations    
path_source = os.path.join('/home','luana','Desktop','Magistrale','Thesis', 'PKS2155-304')    #must be modified by user
os.chdir(path_source)

for ObsId in ids_zerofill:
    
    if DOWNLOAD_ODFS:
        url = f"http://nxsa.esac.esa.int/nxsa-sl/servlet/data-action-aio?obsno={ObsId}&level=ODF"
        download(url, f"{ObsId}.tar.gz")
    
    make_dir_structure(ObsId)
    unpack(f'{ObsId}.tar.gz', os.path.join(ObsId, 'odf'))
    
    

INFO:root:Creating the directory structure for the data 0080940101.
INFO:root:Structure completed!
INFO:root:All ODFs for 0080940101.tar.gz have been extracted!
INFO:root:Creating the directory structure for the data 0080940301.
INFO:root:Structure completed!
INFO:root:All ODFs for 0080940301.tar.gz have been extracted!
INFO:root:Creating the directory structure for the data 0080940401.
INFO:root:Structure completed!
INFO:root:All ODFs for 0080940401.tar.gz have been extracted!
INFO:root:Creating the directory structure for the data 0080940501.
INFO:root:Structure completed!
INFO:root:All ODFs for 0080940501.tar.gz have been extracted!
INFO:root:Creating the directory structure for the data 0124930101.
INFO:root:Structure completed!
INFO:root:All ODFs for 0124930101.tar.gz have been extracted!
INFO:root:Creating the directory structure for the data 0124930201.
INFO:root:Structure completed!
INFO:root:All ODFs for 0124930201.tar.gz have been extracted!
INFO:root:Creating the directory s