# Process Roll and Test Performance

#### Updated: Dec 2, 2022

#  

Process one random roll and test performance. Compare performance between local processing on CSA laptop with VDI HP (CSA performant virtual machine). 

In [1]:
import pandas as pd
import os
import time
from datetime import datetime

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
dataDir = 'C:/Users/rnaidoo/Documents/Projects_data/Alouette_I/01_intake/'
outputDir = 'C:/Users/rnaidoo/Documents/Projects_data/Alouette_I/02_result/'

#  

Process by roll:

In [4]:
rollnames = []
for file in os.listdir(dataDir):
    if 'R' in file:
        rollnames.append(file)

In [5]:
if len(rollnames) == 1:
    start = time.time()
    !python scan2data/user_input.py $dataDir $outputDir
    end = time.time()
    t = end - start
    print('Runtime: ' + str(round(t/60, 1)) + ' min')

Runtime: 2.1 min


Record performance:

In [6]:
df_num = pd.read_csv(outputDir + 'num_data.csv')
df_loss = pd.read_csv(outputDir + 'loss.csv')
df_outlier = pd.read_csv(outputDir + 'outlier.csv')
n_num = len(df_num)
n_loss = len(df_loss)
n_outlier = len(df_outlier)
images_processed = n_num + n_loss + n_outlier
print('Process Rate: ' + str(round(images_processed/t, 1)) + ' images/s')

Process Rate: 0.8 images/s


In [7]:
df_result = pd.DataFrame({
    'Roll': rollnames[0],
    'Processing_Time': t,
    'Images_processed': images_processed,
    'Process_timestamp': datetime.fromtimestamp(end)
}, index=[0])

In [8]:
df_result

Unnamed: 0,Roll,Processing_Time,Images_processed,Process_timestamp
0,R014207824,125.133207,99,2022-12-02 20:16:11.227465


In [9]:
if os.path.exists(outputDir + 'process_log.csv'):
    df_log = pd.read_csv(outputDir + 'process_log.csv')
    df_update = pd.concat([df_log, df_result], axis=0, ignore_index=True)
    df_update.to_csv(outputDir, index=False)
else:
    df_result.to_csv(outputDir, index=False)

PermissionError: [Errno 13] Permission denied: 'C:/Users/rnaidoo/Documents/Projects_data/Alouette_I/02_result/'

#  

#### Development:

Organize numpy arrays into folders by roll and subdirectory:

In [None]:
for file in os.listdir(outputDir):
    if 'mapped_coords' in file:
        fn = file.replace('mapped_coords-', '')
        fn = fn.replace('.npy', '')
        fn_parts = fn.split('_')
        newDir = outputDir + fn_parts[0] + '/' + fn_parts[1] + '/'
        os.makedirs(newDir, exist_ok=True)
        os.rename(outputDir+file, newDir+file)

#  

Demonstrate how to copy file from FTP to local intake directory:

In [None]:
src = 'ftp://donnees-data.asc-csa.gc.ca/users/OpenData_DonneesOuvertes/pub/AlouetteData/Alouette Data/R014207824/3143-14A/'
shutil.copytree(src, dataDir)

In [None]:
os.listdir(src)