In [1]:
import os

In [2]:
import numpy as np

In [3]:
from shutil import copyfile

In [4]:
from pyfileindex import PyFileIndex 

In [5]:
from pyiron import Project

In [6]:
def new_job_id(job_id, job_translate_dict):
    if isinstance(job_id, float) and not np.isnan(job_id):
        job_id = int(job_id)
    if isinstance(job_id, int):
        return job_translate_dict[job_id]
    else:
        return None

In [7]:
def getdir(path): 
    path_base_name = os.path.basename(path)
    if path_base_name == "":
        return os.path.basename(os.path.dirname(path))
    else: 
        return path_base_name

In [8]:
def update_project(directory_to_transfer, archive_directory, df):
    pr_transfer = Project(directory_to_transfer)
    dir_name_transfer = getdir(path=directory_to_transfer)
    dir_name_archive = getdir(path=archive_directory)
    path_rel_lst = [os.path.relpath(p, pr_transfer.project_path) for p in df["project"].values]
    return [os.path.join(dir_name_archive, dir_name_transfer, p) if p != "." else os.path.join(dir_name_archive, dir_name_transfer) for p in path_rel_lst]

In [9]:
def filter_function(file_name):
    return '.h5' in file_name

In [10]:
def generate_list_of_directories(df_files, directory_to_transfer, archive_directory):
    path_rel_lst = [os.path.relpath(d, directory_to_transfer) for d in df_files.dirname.unique()]
    dir_name_transfer = getdir(path=directory_to_transfer)
    return [os.path.join(archive_directory, dir_name_transfer, p) if p != "." else os.path.join(archive_directory, dir_name_transfer) for p in path_rel_lst]

In [11]:
def copy_files_to_archive(directory_to_transfer, archive_directory):
    pfi = PyFileIndex(path=directory_to_transfer, filter_function=filter_function)
    df_files = pfi.dataframe[~pfi.dataframe.is_directory]
    
    # Create directories 
    dir_lst = generate_list_of_directories(df_files=df_files, directory_to_transfer=directory_to_transfer, archive_directory=archive_directory)
    for d in dir_lst: 
        os.makedirs(d, exist_ok=True)
    
    # Copy files 
    dir_name_transfer = getdir(path=directory_to_transfer)
    for f in df_files.path.values:
        copyfile(f, os.path.join(archive_directory, dir_name_transfer, os.path.relpath(f, directory_to_transfer)))

In [12]:
def export_database(directory_to_transfer, archive_directory):
    pr = Project(directory_to_transfer)
    df = pr.job_table()
    job_ids_sorted = sorted(df.id.values)
    new_job_ids = list(range(len(job_ids_sorted)))
    job_translate_dict = {j:n for j, n in zip(job_ids_sorted, new_job_ids)}
    df['id'] = [new_job_id(job_id=job_id, job_translate_dict=job_translate_dict) for job_id in df.id]
    df['masterid'] = [new_job_id(job_id=job_id, job_translate_dict=job_translate_dict) for job_id in df.masterid]
    df['parentid'] = [new_job_id(job_id=job_id, job_translate_dict=job_translate_dict) for job_id in df.parentid]
    df['project'] = update_project(directory_to_transfer=directory_to_transfer, archive_directory=archive_directory, df=df)
    del df["projectpath"]
    return df

In [13]:
directory_to_transfer = os.path.abspath(os.path.join(os.curdir, "murn")) 

In [14]:
archive_directory = os.path.abspath(os.path.join(os.curdir, "archive")) 

In [15]:
copy_files_to_archive(directory_to_transfer=directory_to_transfer, archive_directory=archive_directory)

In [16]:
df = export_database(directory_to_transfer=directory_to_transfer, archive_directory=archive_directory)
df

Unnamed: 0,id,status,chemicalformula,job,subjob,project,timestart,timestop,totalcputime,computer,hamilton,hamversion,parentid,masterid
0,0,finished,Al,murn,/murn,archive/murn,2020-11-10 11:14:21.689916,2020-11-10 11:14:45.113006,23.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1#11/11,Murnaghan,0.3.0,,
1,1,finished,Al,strain_0_9,/strain_0_9,archive/murn/murn_hdf5,2020-11-10 11:14:22.912227,2020-11-10 11:14:23.365173,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
2,2,finished,Al,strain_0_92,/strain_0_92,archive/murn/murn_hdf5,2020-11-10 11:14:24.927981,2020-11-10 11:14:25.394044,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
3,3,finished,Al,strain_0_94,/strain_0_94,archive/murn/murn_hdf5,2020-11-10 11:14:26.931778,2020-11-10 11:14:27.421954,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
4,4,finished,Al,strain_0_96,/strain_0_96,archive/murn/murn_hdf5,2020-11-10 11:14:29.007742,2020-11-10 11:14:29.431261,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
5,5,finished,Al,strain_0_98,/strain_0_98,archive/murn/murn_hdf5,2020-11-10 11:14:30.930220,2020-11-10 11:14:31.406885,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
6,6,finished,Al,strain_1_0,/strain_1_0,archive/murn/murn_hdf5,2020-11-10 11:14:32.937797,2020-11-10 11:14:33.441668,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
7,7,finished,Al,strain_1_02,/strain_1_02,archive/murn/murn_hdf5,2020-11-10 11:14:34.940175,2020-11-10 11:14:35.461496,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
8,8,finished,Al,strain_1_04,/strain_1_04,archive/murn/murn_hdf5,2020-11-10 11:14:37.038421,2020-11-10 11:14:37.536668,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0
9,9,finished,Al,strain_1_06,/strain_1_06,archive/murn/murn_hdf5,2020-11-10 11:14:39.139254,2020-11-10 11:14:39.683094,0.0,pyiron@jupyter-jan-2djanssen-2dpyi-2dort-2dto-2dmybinder-2d2qja9tbo#1,Lammps,0.1,,0.0


In [17]:
df.to_csv("export.csv")