# Imports

*notes:*
- everything is sequentially made in this file because strymread can only be launched within jupyter notebooks -> verify this for this part
- keep only the things for this!!!
- find how to send and use the parameters

In [4]:
import asyncio
import subprocess
import numpy as np
import pandas as pd
from datetime import datetime
import ast

0.4.3


# Function definitions

## iRODS command wrappers

In [5]:
def ils():
    '''
    wrapper for iRODS ils command
    :return: list of files and folder in the current folder
    '''
    process_files = subprocess.run(['ils'],
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE,
                                   universal_newlines=True)
    files = process_files.stdout.split(sep='\n')[1:-1]
    return [f.strip() for f in files]


def icd(destination):
    '''
    wrapper for iRODS icd command
    :param destination: destination to which go to
    :return: subprocess output
    '''
    return subprocess.run(['icd', destination],
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          universal_newlines=True)


def ipwd():
    '''
    wrapper for iRODS ipwd command
    :return: current directory on CyVerse
    '''
    pwd = subprocess.run(['ipwd'],
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          universal_newlines=True)
    out = pwd.stdout.strip().strip('\n')
    print('pwd output is:', out)
    return out


## Cache handling

In [6]:
async def async_command_shell(command, verbose: bool = False):
    """Run command in subprocess (shell).
    source: https://fredrikaverpil.github.io/2017/06/20/async-and-await-with-subprocesses/
    """
    # Create subprocess
    process = await asyncio.create_subprocess_shell(command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
    # Status
    if verbose:
        print("Started:", command, "(pid = " + str(process.pid) + ")", flush=True)
    # Wait for the subprocess to finish
    stdout, stderr = await process.communicate()
    # Output
    if process.returncode == 0:
        if verbose:
            print("Done:", command, "(pid = " + str(process.pid) + ")", flush=True)
        return stdout.decode().strip()
    else:
        if verbose:
            print("Failed:", command, "(pid = " + str(process.pid) + ")", flush=True)
        raise Exception(stderr.decode().strip())


async def iget(file_adress, destination, verbose: bool = False):
    '''
    wrapper for iRODS iget command
    async command using asyncio library
    :param file_adress: address on CyVerse fileshare
    :param destination: address to download to on the local computer
    :return: local address of the file
    '''
    try:
        await async_command_shell(f'iget -T {file_adress} {destination}', verbose=verbose)
        local_address = destination + '/' + file_adress.split('/')[-1]
        return local_address
    except Exception as e:
        raise Exception(f'Error while downloading file at:'
                        f'\n\tremote: {file_adress}'
                        f'\n\tto local address: {destination}`'
                        f'\n\tFailing on {e}')


def init_cache(local_folder):
    '''
    clears the cache if exists and initialise it
    :param local_folder: root folder for the analysis
    :return: temporary cache address
    '''
    if local_folder != '':
        subprocess.run(['cd', local_folder],
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE,
                       universal_newlines=True)
    local_folder_absolute = subprocess.run(['pwd'],
                   stdout=subprocess.PIPE,
                   stderr=subprocess.PIPE,
                   universal_newlines=True).stdout.strip()
    files = subprocess.run(['ls'],
                   stdout=subprocess.PIPE,
                   stderr=subprocess.PIPE,
                   universal_newlines=True)
    files = files.stdout.split(sep='\n')
    if 'temp_cache' in files:
        subprocess.run(['rm', '-r', '-f', 'temp_cache'],
                   stdout=subprocess.PIPE,
                   stderr=subprocess.PIPE,
                   universal_newlines=True)
    subprocess.run(['mkdir', 'temp_cache'],
               stdout=subprocess.PIPE,
               stderr=subprocess.PIPE,
               universal_newlines=True)
    temp_cache_address = f'{local_folder_absolute}/temp_cache'
    return temp_cache_address


## Fileshare exploration

In [7]:
def findall_files(root, verbose: bool = False):
    '''
    finds all files within the root directory and recursively below
    :param root: str, root file from which to begin the search
    :param verbose: bool, set to True to see fuller logs
    :return: List<str>
    '''
    dir_queue = [root]
    files = []

    while len(dir_queue) != 0:
        current_dir = dir_queue.pop()
        icd(current_dir)
        queue = ils()
        if verbose:
            print('---------')
            print('current queue dir: ', dir_queue)
            print('current directory is: ', current_dir)
            print('current file queue is: ', queue)

        for f in queue:
            if verbose:
                print('current file tests on: ', f, ' and test gives f[0:2]: ', f[0:2], ' and f[-4:] is: ', f[-4:])
            # avoid dashcams and bafiles folders, only use the libpanda ones -> reduces the number of files to scan for
            if f[0:2] == 'C-' and 'bagfiles' not in f and 'dashcams' not in f:
                dir_queue.append(f[3:])
                if verbose:
                    print('appending dir queue; ', f)
            elif f[-4:] == '.csv':
                # We also conserve the current folder to get the entire path to the file
                current_folder = ipwd()
                files.append(f'{current_folder}/{f}')
                if verbose:
                    print('appending file; ', f)

        if verbose:
            print('found ', len(files), ' files')

    return files


def can_gps_coupling(files):
    '''
    links the CAN and GPS from same acquisitions
    :param files: array of file adresses
    :return: List<{'can': str, 'gps': str || None}>
    '''
    file_list = []
    for file in files:
        if '_CAN_Messages.csv' in file:
            file_list.append({'can': file, 'gps': None})

    for i in range(len(file_list)):
        file_gps = file_list[i]['can'][0:-17] + '_GPS_Messages.csv'
        if file_gps in files:
            file_list[i]['gps'] = file_gps

    return file_list


## File handler & cache

In [9]:
class FileHandler:
    """
    Class handling download and delete of files to be analyzed
    """
    # attributes
    all_files = None
    coupled_files = None
    local_root_folder = None
    remote_addresses = None
    can_local_address = None
    gps_local_address = None
    index = None
    max_index = None

    # methods
    def __init__(self, local_root_folder, start_index: int = 0):
        """
        :param local_root_folder: Local root for the download folder
        """
        self.local_root_folder = local_root_folder
        self.index = start_index
        print('File Handler ready for file exploration')

    def explore(self, analyze: bool = True, root: str = '', exploration_name = None,
                 previous_exploration_path = None, verbose: bool = False):
        """
        Initialises the path objects, then file handler attributes

        :param analyze: True if you want to explore files from CyVerse,
        False if you want to use a file giving the coupled files from a previous FileShare exploration
        :param root: root of the search for exploring on CyVerse
        :param exploration_name: name for the coupled file local copy
        :param previous_exploration_path: local address towards the file giving the coupled files from a previous
        FileShare exploration

        TODO: include a call to an iinit irods function
        """
        # case of file share exploration
        if analyze:
            try:
                self.all_files = findall_files(root, verbose)
                self.coupled_files = can_gps_coupling(self.all_files)
                self.max_index = len(self.coupled_files)
                # save the csv file
                output_filename = coupled_files_file_namer(exploration_name, root)
                df = pd.DataFrame(data={'Files': self.coupled_files})
                df.to_csv(path_or_buf=f'results/{output_filename}')
                if verbose:
                    print('exploration logged as: ', output_filename)
            except Exception as e:
                print(f'CyVerse FileShare exploration failed on: {e}')

        # case of using a file to get the coupled addresses
        else:
            try:
                df = pd.read_csv(previous_exploration_path)
                self.coupled_files = df['Files']
                self.max_index = len(self.coupled_files)
            except Exception as e:
                print(f'retrieving from file at {previous_exploration_path} failed on: {e}')


    def __str__(self):
        if self.max_index is None:
            return f'FileShare exploration is not finished'
        else:
            return f'file handler with {self.max_index} couples, current index is: {self.index}'


    async def next(self, ignore_gps_file: bool = False):
        """
        clears cache & downloads the next couple of files
        :param: ignore_gps_file: set to True to avoid downloading the GPS file
        :return: - object with paths to the downloaded CAN and GPS file
        {'can': str, 'gps': str, 'remote_addresses': {'can': str, 'gps': str}}
                 - if the maximum index is reached, returns an exception as:
        Exception('max_index')
        """
        try:
            if self.index < self.max_index:
                cache = init_cache(self.local_root_folder)
                next_file = self.coupled_files[self.index]
                if type(next_file) == type('string'):
                    self.remote_addresses = ast.literal_eval(next_file)
                else:
                    self.remote_addresses = self.coupled_files[self.index]

                self.can_local_address = await iget(self.remote_addresses['can'], cache)
                if ignore_gps_file:
                    self.gps_local_address = None
                else:
                    self.gps_local_address = await iget(self.remote_addresses['gps'], cache)

                self.index += 1

                return {
                    'can': self.can_local_address,
                    'gps': self.gps_local_address,
                    'remote_addresses': self.remote_addresses
                }
            else:
                raise Exception('max_index')

        except Exception as e:
            raise Exception(f'Downloading next file failed on {e}')

    def clear(self):
        init_cache(self.local_root_folder)
        print('Cache cleared')

def coupled_files_file_namer(name, root):
    return f'file_exploration&{name}&create_on={str(datetime.now()).replace(" ", "_")}&root={root.replace("/", "_")}.csv'


In [None]:
# TO CALL TO PERFORM THE FILE EXPLORATION:
'''
options = {
    # 'local_root_folder': , # where to DL in local -> need to be defined globally
    # 'start_index': 0, # can be set otherwise if specified?
    # 'analyze': True, # this is because this triggers a file exploration on cyverse!
    'root': "", # root for analysis on CyVerse
    'exploration_name': "", # name to give to the file
    # -> nb: get this name from python to JS?
    # 'verbose': , # bool value - set to True for now, change later if needed
}


fh = FileHandler(local_root_folder, start_index: int = 0)
fh.explore(analyze = options.analyze, root = '', exploration_name = None, verbose = False)
'''


In [None]:
print('NOTE BOOK RAN')