In [None]:
!pip install hsclient
!pip install numpy
!pip install scipy

In [124]:
import pickle
targets = ["www.hydroshare.org"]
meta_run_number = 1 # manually change for first run/ second
if meta_run_number == 0:
    meta_runs = [{}, {}] # pre and post
else:
    dbfile = open('pickle', 'rb')    
    meta_runs = pickle.load(dbfile)
    dbfile.close()
    
clients = {}
filename = "local-build-discover.sh"
diffs= {}

In [125]:
from hsclient import HydroShare
for target in targets:
    hs = HydroShare(host=target, port=443, protocol='https')
    hs.sign_in()
    hs.resources = []
    diffs[target] = []
    clients[target] = hs

In [126]:
from time import time
from functools import wraps
import numpy as np
import scipy
from copy import deepcopy

def timer_func(func): 
    @wraps(func)
    def wrap_func(*args, **kwargs): 
        t1 = time() 
        result = func(*args, **kwargs)
        t2 = time()
        target = kwargs['target']
        diff = t2-t1
        diffs[target].append(diff)
        print(f'Function {func.__name__!r} executed in {(diff):.4f}s') 
        return result 
    return wrap_func


def create_res(hs, time=True, target=None):

    @timer_func
    def create_res_time(hs=None, target=None):
        resource = hs.create()
        hs.resources.append(resource)
    
    if time:
        create_res_time(hs=hs, target=target)
    else:
        resource = hs.create()
        hs.resources.append(resource)

@timer_func
def delete_resources(hs=None, target=None):
    for res in hs.resources:
        res.delete()

@timer_func
def download_all_resources(hs=None, target=None):
    for res in hs.resources:
        try:
            res.download()
        except Exception as e:
            print(e)

def download_single_resource(hs=None, target=None):
    create_res(hs, False)
    res = hs.resources[-1]
    res.file_upload(filename)

    @timer_func
    def download_single_resource_inner(hs=res, target=None):
        res.download()

    download_single_resource_inner(hs=res, target=target)

def delete_single_resource(hs=None, target=None):
    create_res(hs, False)
    res = hs.resources[-1]
    res.file_upload(filename)

    @timer_func
    def delete_single_resource_inner(hs=res, target=None):
        res.delete()

    delete_single_resource_inner(hs=res, target=target)

def upload_file(hs, target=None):
    create_res(hs, False)
    res = hs.resources[-1]

    @timer_func
    def upload_file_inner(hs=res, target=None):
        res.file_upload(filename)

    upload_file_inner(hs=res, target=target)


def create_dir_upload_file(hs=None, target=None):
    create_res(hs, False)
    res = hs.resources[-1]

    @timer_func
    def create_dir_upload_file_inner(hs=res, target=None):
        res.folder_create('New_Folder')
        res.file_upload(filename, destination_path='New_Folder')
    create_dir_upload_file_inner(hs=res, target=target)

def upload_and_movefile(hs=None, target=None):
    create_res(hs, False)
    res = hs.resources[-1]
    
    @timer_func
    def upload_and_movefile_inner(hs=res, target=None):
        res.folder_create('New_Folder')
        res.file_upload(filename, destination_path='New_Folder')
        file = res.file(path=f"New_Folder/{filename}]")
        res.file_rename(file, filename)
    upload_and_movefile_inner(hs=res, target=target)

def download_single_file(hs=None, target=None):
    create_res(hs, False)
    res = hs.resources[-1]
    res.file_upload(filename)

    @timer_func
    def download_single_file_inner(hs=res, target=target):
        file = res.file(path=filename)
        res.file_download(file)
    download_single_file_inner(hs=res, target=target)

def delete_single_file(hs=None, target=None):
    create_res(hs, False)
    res = hs.resources[-1]
    res.file_upload(filename)

    @timer_func
    def delete_single_file_inner(hs=res, target=None):
        file = res.file(path=filename)
        res.file_delete(file)
    delete_single_file_inner(hs=res, target=target)
        

def run_comparisons(functions_to_run, runs=1):
    for funct in functions_to_run:
        print(f"********* Start {funct.__name__!r} *********")
        for i in range(runs):
            for target in targets:
                client = clients[target]
                print(f"Starting run {i} of {funct.__name__} on {target}...")
                funct(hs=client, target=target)
                print(diffs[target])
        print("\n\n")
        # Store the diffs for later
        meta_runs[meta_run_number][funct.__name__] = deepcopy(diffs)
        for target in targets:
            diffs[target].clear()

def clear_resources():
    for target in targets:
        client = clients[target]
        client.resources = []
        diffs[target].clear()
  

In [130]:
clear_resources()
run_comparisons([download_single_resource, create_dir_upload_file, delete_single_file ], runs=3)
run_comparisons([delete_resources], runs=1)

********* Start 'download_single_resource' *********
Starting run 0 of download_single_resource on www.hydroshare.org...
Function 'download_single_resource_inner' executed in 27.8414s
[27.841434955596924]
Starting run 1 of download_single_resource on www.hydroshare.org...
Function 'download_single_resource_inner' executed in 27.6452s
[27.841434955596924, 27.645233154296875]
Starting run 2 of download_single_resource on www.hydroshare.org...
Function 'download_single_resource_inner' executed in 27.4951s
[27.841434955596924, 27.645233154296875, 27.495064973831177]



********* Start 'create_dir_upload_file' *********
Starting run 0 of create_dir_upload_file on www.hydroshare.org...
Function 'create_dir_upload_file_inner' executed in 17.4058s
[17.405828714370728]
Starting run 1 of create_dir_upload_file on www.hydroshare.org...
Function 'create_dir_upload_file_inner' executed in 17.4545s
[17.405828714370728, 17.454525232315063]
Starting run 2 of create_dir_upload_file on www.hydroshare.or

In [131]:
clear_resources()
run_comparisons([upload_file], runs=10)

********* Start 'upload_file' *********
Starting run 0 of upload_file on www.hydroshare.org...
Function 'upload_file_inner' executed in 4.4619s
[4.461931943893433]
Starting run 1 of upload_file on www.hydroshare.org...
Function 'upload_file_inner' executed in 4.5401s
[4.461931943893433, 4.5400989055633545]
Starting run 2 of upload_file on www.hydroshare.org...
Function 'upload_file_inner' executed in 4.6014s
[4.461931943893433, 4.5400989055633545, 4.601425886154175]
Starting run 3 of upload_file on www.hydroshare.org...
Function 'upload_file_inner' executed in 4.6115s
[4.461931943893433, 4.5400989055633545, 4.601425886154175, 4.611505031585693]
Starting run 4 of upload_file on www.hydroshare.org...
Function 'upload_file_inner' executed in 4.6045s
[4.461931943893433, 4.5400989055633545, 4.601425886154175, 4.611505031585693, 4.604548931121826]
Starting run 5 of upload_file on www.hydroshare.org...
Function 'upload_file_inner' executed in 4.8412s
[4.461931943893433, 4.5400989055633545, 4.

In [133]:
clear_resources()
run_comparisons([create_res, download_all_resources, delete_single_resource], runs=5)

********* Start 'create_res' *********
Starting run 0 of create_res on www.hydroshare.org...
Function 'create_res_time' executed in 22.9165s
[22.91654920578003]
Starting run 1 of create_res on www.hydroshare.org...
Function 'create_res_time' executed in 22.7322s
[22.91654920578003, 22.732215642929077]
Starting run 2 of create_res on www.hydroshare.org...
Function 'create_res_time' executed in 22.7345s
[22.91654920578003, 22.732215642929077, 22.73453402519226]
Starting run 3 of create_res on www.hydroshare.org...
Function 'create_res_time' executed in 22.7235s
[22.91654920578003, 22.732215642929077, 22.73453402519226, 22.72350788116455]
Starting run 4 of create_res on www.hydroshare.org...
Function 'create_res_time' executed in 22.6281s
[22.91654920578003, 22.732215642929077, 22.73453402519226, 22.72350788116455, 22.628090143203735]



********* Start 'download_all_resources' *********
Starting run 0 of download_all_resources on www.hydroshare.org...
Function 'download_all_resources' ex

In [141]:
import pickle
file = open('pickle', 'ab')
    
# source, destination
pickle.dump(meta_runs, file)                    
file.close()

In [142]:
# dbfile = open('pickle', 'rb')    
# meta_runs_import = pickle.load(dbfile)
# dbfile.close()

In [143]:
meta_runs_import

[{'download_single_resource': {'www.hydroshare.org': [19.69610905647278,
    19.657349824905396,
    19.383678197860718]},
  'create_dir_upload_file': {'www.hydroshare.org': [10.0984628200531,
    10.677123785018921,
    10.388859987258911]},
  'delete_single_file': {'www.hydroshare.org': [15.132337808609009,
    15.295869827270508,
    14.919397115707397]},
  'delete_resources': {'www.hydroshare.org': [129.64671778678894]},
  'upload_file': {'www.hydroshare.org': [2.82197904586792,
    2.8427178859710693,
    2.9220480918884277,
    2.825402021408081,
    2.881467819213867,
    2.8290460109710693,
    2.7914412021636963,
    2.791268825531006,
    2.818359851837158,
    2.8164680004119873]},
  'create_res': {'www.hydroshare.org': [15.444867134094238,
    13.95179295539856,
    13.65056300163269,
    14.359339952468872,
    13.765339136123657]},
  'download_all_resources': {'www.hydroshare.org': [51.34781503677368,
    12.999413967132568,
    13.336390972137451,
    13.481292009353638,

In [None]:
# TODO: these ones still seem not to work...
#clear_resources()
#run_comparisons([upload_and_movefile, download_single_file ], runs=2)

In [164]:
def run_stats():
    for func in meta_runs[0].keys():
        for idx, meta_run in enumerate(meta_runs):
            print(f"Analyzing function {func} on meta run number {idx}")
            target_obj = meta_run[func]
            for target, runs in target_obj.items():
                print(f"Average time for {func} on {target}: {sum(runs)/len(runs)}")
                print(f"Devation for {func} on {target}: {np.std(runs)}")
            if len(target_obj.keys()) == 2:
                print(f"Comparing runs of {func} between targets {target_obj.keys()}")
                # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html
                result = scipy.stats.ttest_ind(target_obj.values()[0], target_obj.values()[1])
                print(result)
        if len(meta_runs) == 2:
            print("Comparing between two meta runs")
            if func in meta_runs[0] and func in meta_runs[1]:
                for target in meta_runs[0][func].keys():
                    print(f"Comparing meta runs for target {target}")
                    result = scipy.stats.ttest_ind(meta_runs[0][func][target], meta_runs[1][func][target])
                    print(result)
            else:
                print(f"{func} values are missing from one of the meta runs")
        else:
            print("Meta run number not = 2 so can't compare between meta runs")
        print("\n\n")

In [165]:
run_stats()

Analyzing function download_single_resource on meta run number 0
Average time for download_single_resource on www.hydroshare.org: 19.57904569307963
Devation for download_single_resource on www.hydroshare.org: 0.1390489437788182
Analyzing function download_single_resource on meta run number 1
Average time for download_single_resource on www.hydroshare.org: 27.660577694574993
Devation for download_single_resource on www.hydroshare.org: 0.1418206196911226
Comparing between two meta runs
Comparing meta runs for target www.hydroshare.org
TtestResult(statistic=-57.543711289881045, pvalue=5.461172675021771e-07, df=4.0)



Analyzing function create_dir_upload_file on meta run number 0
Average time for create_dir_upload_file on www.hydroshare.org: 10.388148864110311
Devation for create_dir_upload_file on www.hydroshare.org: 0.23623788486169744
Analyzing function create_dir_upload_file on meta run number 1
Average time for create_dir_upload_file on www.hydroshare.org: 17.39472730954488
Devation 

  svar = ((n1 - 1) * v1 + (n2 - 1) * v2) / df
