In [None]:
import time
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import flywheel

def get_resources(fw, series, jobs):
    for job in jobs:
        print(job.id, job.destination.id)
        # time.sleep(1)  # might help for slow VPNs
        try:
            analysis = fw.get_analysis(job.destination.id)
            group_id = analysis.parents.group
            project_label = fw.get_project(analysis.parents.project).label
            if analysis.parents.session:
                session_label = fw.get_session(analysis.parents.session).label
            else:
                session_label = "none"
            if analysis.parents.subject:
                subject_label = fw.get_subject(analysis.parents.subject).label
            else:
                subject_label = "none"
            if 'resources used' in analysis.info:
                row = {'job_id': job.id,
                       'job_created': job.created,
                       'group_id': group_id, 
                       'project_label': project_label, 
                       'subject_label': subject_label, 
                       'session_label': session_label, 
                       'analysis_id': analysis.id, 
                       'gear_name':analysis['gear_info']['name'], 
                       'gear_version':analysis['gear_info']['version']}
                row.update(pd.Series(analysis.info['resources used']))
                series.append(row)
        except Exception as e:
            print("oops", str(e))
    print(f'Found {len(series)} gear runs with resource data')
    df = pd.DataFrame(series)
    return series, df

def get_job_resources(gear_name):
    fw = flywheel.Client()
    print(fw.get_config().site.api_url)
    series = []
    jobs = fw.jobs.find(f'gear_info.name={gear_name}', limit=5000)
    print(f"found {len(jobs)} {gear_name} jobs")
    series, df = get_resources(fw, series, jobs)
    return df

In [None]:
df = get_job_resources("bids-fmriprep")
df[["job_created",
    "project_label",
    "subject_label",
    "session_label",
    "Maximum resident set size (kbytes)", 
    "Exit status", 
    "Elapsed (wall clock) time (h:mm:ss or m:ss)", 
    "File system inputs", 
    "File system outputs", 
    "Percent of CPU this job got", 
    "Swaps"]]

In [None]:
df.to_csv('fMRIPrep_Resources.tsv', sep='\t', encoding='utf-8')