<a href="https://colab.research.google.com/github/fireicewolf/HFtoMS/blob/main/HFtoMS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title Install dependencies
!pip install --no-cache-dir -U huggingface-hub modelscope
!pip cache purge

In [None]:
# @title Remove useless python package for more storage space.
!pip uninstall tensorboard tensorboard-data-server tensorflow tensorflow-datasets tensorflow-estimator tensorflow-gcs-config tensorflow-hub tensorflow-io-gcs-filesystem tensorflow-metadata tensorflow-probability tensorstore -y
!pip uninstall torch torchaudio torchsummary torchtext torchvision triton -y
!pip uninstall opencv-python-headless opencv-python opencv-contrib-python -y
!pip uninstall Sphinx sphinxcontrib-applehelp sphinxcontrib-devhelp sphinxcontrib-htmlhelp sphinxcontrib-jsmath sphinxcontrib-qthelp sphinxcontrib-serializinghtml -y

In [None]:
# @title Cleanup workspace
%cd /content
!rm -rf /content/*

In [None]:
#@title HF to MS
import math
import os

from huggingface_hub import HfApi,login, list_repo_tree, hf_hub_download
from huggingface_hub.hf_api import RepoFile, RepoFolder

from modelscope.hub.api import HubApi
from modelscope.hub.constants import ModelVisibility

WORKSPACE = "/content"
HF_TOKEN = "" # @param {type:"string"}
HF_REPO_ID = "" # @param {type:"string"}
HF_RECURSIVE = False # @param {type:"boolean"}
HF_REPO_TYPE = "model" # @param {type:"string"}
HF_REVISION = "main" # @param {type:"string"}
MS_TOKEN = "" # @param {type:"string"}
MS_REPO_ID = "" # @param {type:"string"}
MS_REPO_PRIVATE = True # @param {type:"boolean"}
MS_REVISION = "master" # @param {type:"string"}
MS_COMMIT = "Upload to ModelScope" # @param {type:"string"}

def login_hf(hf_token: str):
    login(token=hf_token)

def list_hf_repo_tree(
        hf_repo_id: str,
        hf_recursive: bool = True,
        hf_repo_type: str = "model",
        hf_revision: str = "main"
    ):
    hf_repo_tree = list_repo_tree(hf_repo_id, recursive=hf_recursive, repo_type=hf_repo_type, revision=hf_revision)
    return list(hf_repo_tree)

def hf_file_download(hf_repo_id: str, hf_path: str, hf_repo_type: str, hf_revision: str,local_dir: str):
    hf_filename = os.path.basename(hf_path)
    hf_subfolder = os.path.dirname(hf_path)
    hf_hub_download(hf_repo_id, hf_filename, subfolder=hf_subfolder, repo_type=hf_repo_type, revision=hf_revision, local_dir=local_dir, resume_download=True)

def ms_create_repo(
        ms_token: str,
        ms_repo_id: str,
        ms_repo_private: bool,
    ):
    api = HubApi()
    ms_git_token = (api.login(ms_token))[0]
    print(f'Creating repo "{ms_repo_id}" on ModelScope...')
    try:
        api.get_model(ms_repo_id)
        print(f'"{ms_repo_id}" already exist on ModelScope!')
    except Exception:
        model_visibility = ModelVisibility.PRIVATE if ms_repo_private else ModelVisibility.PUBLIC
        api.create_model(ms_repo_id, visibility=model_visibility)
        print(f'"{ms_repo_id}" created on ModelScope')

    return ms_git_token


def ms_git_clone_repo(ms_git_token, ms_repo_id, ms_revision):
    os.chdir(WORKSPACE)
    repo_on_local_path = os.path.join(WORKSPACE, os.path.basename(ms_repo_id))
    print('Disabling Git LFS...')
    os.environ["GIT_LFS_SKIP_SMUDGE"] = "1"
    !git lfs uninstall
    if os.path.exists(repo_on_local_path):
        print(f'"{repo_on_local_path}" already exist, will delete it!!!')
        !rm -rf "{repo_on_local_path}"

    !git clone -b {ms_revision} http://outh2:{ms_git_token}@www.modelscope.cn/{ms_repo_id}.git {repo_on_local_path}
    os.chdir(repo_on_local_path)
    !git switch {ms_revision}
    print('Enabling Git LFS...')
    os.environ["GIT_LFS_SKIP_SMUDGE"] = "0"
    !git lfs install
    return repo_on_local_path

def format_file_size(size_bytes):
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return f"{s} {size_name[i]}"

def main():
    os.chdir(WORKSPACE)
    !git config --global user.email "you@example.com"
    !git config --global user.name "Your Name"
    ms_git_token = ms_create_repo(MS_TOKEN, MS_REPO_ID, MS_REPO_PRIVATE)
    repo_on_local = ms_git_clone_repo(ms_git_token, MS_REPO_ID, MS_REVISION)
    login_hf(HF_TOKEN)
    hf_repo_tree = list_hf_repo_tree(HF_REPO_ID)

    file_use_git_lfs = []
    total = len(hf_repo_tree)
    i = 0
    for file in hf_repo_tree:
        if type(file) == RepoFolder:
            path = file.path
            path_on_local = os.path.join(repo_on_local, path)
            os.makedirs(path_on_local, exist_ok=True)
        elif type(file) == RepoFile:
            path = file.path
            size = file.size
            file_on_local = os.path.join(repo_on_local, path)

            if size < 25 * 1024 * 1024:
                i+=1
                print(f'Processing {i}/{total}: {path}...')
                if os.path.exists(file_on_local):
                    print(f'"{file_on_local}" already exist, will delete it!!!')
                    !rm -rf "{file_on_local}"
                hf_file_download(HF_REPO_ID, path, HF_REPO_TYPE, HF_REVISION, repo_on_local)

            else:
                print(f'"{path}" is {format_file_size(size)},  will add it to Git LFS list.')
                file_use_git_lfs.append(path)

    os.chdir(repo_on_local)
    !git add -A .
    git_commit_cmd = f'git commit -m "{MS_COMMIT}"'
    !{git_commit_cmd}
    !git push --set-upstream origin {MS_REVISION}


    totol_non_lfs = i
    print(f'Total non Git-LFS files: {totol_non_lfs}.')

    for file in file_use_git_lfs:
        i+=1
        repo_on_local = ms_git_clone_repo(ms_git_token, MS_REPO_ID, MS_REVISION)
        file_on_local = os.path.join(repo_on_local, file)

        if os.path.exists(file_on_local):
            print(f'"{file_on_local}" already exist, will delete it!!!')
            !rm -rf "{file_on_local}"
        print(f'Processing {i}/{total}: {file}...')
        os.chdir(repo_on_local)
        hf_file_download(HF_REPO_ID, file, HF_REPO_TYPE, HF_REVISION, repo_on_local)
        if os.path.exists(os.path.join(repo_on_local, ".huggingface")):
            !rm -rf {os.path.join(repo_on_local, ".huggingface")}
        !git lfs track {file_on_local}
        !git add {file_on_local}
        !{git_commit_cmd}
        !git push --set-upstream origin {MS_REVISION}
        os.chdir(WORKSPACE)

    total_lfs = len(file_use_git_lfs)
    print(f'Total Git-LFS files: {total_lfs}.')
    print(f'Total files: {total}.')

main()