# Uploading project files to the OSF. 

Install the osfclient via: 

```
> pip install osfclient
```

Now, edit the information in the **Project specific variables** section and run the notebook. 

# Utility functions

In [None]:
import osfclient 
from osfclient.models.storage import Storage
import os
from pathlib import Path
from typing import Callable, Optional


def get_storage(username: str, password: str, project_id: str, storage_provider: str = 'osfstorage') -> Storage:
    
    osf = osfclient.OSF(username=username, password=password)
    proj = osf.project(project_id)
    storage = proj.storage(provider=storage_provider)
    return storage


def upload_project_files(project_root: Path, storage: Storage, is_uploadable: Callable, dry_run: Optional[bool]=True) -> None:
    
    for (root, dirs, files) in os.walk(project_root, topdown=True):
        for file in files:
            root = Path(root)
            file = Path(file)
            if is_uploadable(root, file):
                file_path = root.joinpath(file)
                new_file_path = root.relative_to(project_root).joinpath(file)
                
                print(f'     uploading: {file_path}\n            to: {new_file_path}\n at storage id: {storage.id}\n')
                
                if not dry_run:
                    with file_path.open(mode='rb') as f:
                        storage.create_file(path=str(new_file_path), fp=f)

# Project specific variables 

1. Enter your credentials for the OSF and the project ID (look at the URL for the project. If it has https://osf.io/dkz83/ then the project ID is dkz83). Also enter the full path (as a pathlib.Path object) below: 

In [None]:
pw = "***"
un = "***"
pid = "dkz83"
project_root = Path("/Users/robertarbon/Documents/Consulting/helicity_e_lang/12-Folding_2/")

2. Create a `keep_file` function which returns true/false depending on whether you want to upload that file or not. 

In [None]:

def keep_file(path: Path, file: Path) -> bool:
    """
    'path' is the full path to the file 'file'
    """
    # Uploads all files called 'prot1_capped.pdb' in directories which have '2-Run1' as a parent directory
    file_match = 'prot1_capped.pdb'
    path_match = '*/2-Run1/*'
    return (file.match(file_match) and path.match(path_match))



good_path = '12-Folding_2/1-A4_K4E4_A4_K4E4_A4/2-Run1/igb1_ff98/'
good_file = 'prot1_capped.pdb'
bad_path = '12-Folding_2/1-A4_K4E4_A4_K4E4_A4/5-Reproducibility/igb1_ff98/'
bad_file = 'prot1.pdb'

print(keep_file(Path(good_path), Path(good_file)))
print(keep_file(Path(bad_path), Path(good_file)))
print(keep_file(Path(good_path), Path(bad_file)))

3. Upload files (set `dry_run=False` to actually upload the files, otherwise it just prints which files go where):

In [None]:
storage = get_storage(username=un, password=pw, project_id=pid)
upload_project_files(project_root, storage, keep_file, dry_run=True)