From 96c44a39f5889fdfd44c2c6e0f641758517ee943 Mon Sep 17 00:00:00 2001 From: rex <1073853456@qq.com> Date: Mon, 18 Mar 2024 10:08:58 +0800 Subject: [PATCH] add hfmf cli --- hf_mirror_fetch/cli.py | 1 + hf_mirror_fetch/mirror_download.py | 15 ++++++++++----- setup.cfg | 4 ++++ setup.py | 15 ++++++++------- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/hf_mirror_fetch/cli.py b/hf_mirror_fetch/cli.py index 98cff60..34b2953 100644 --- a/hf_mirror_fetch/cli.py +++ b/hf_mirror_fetch/cli.py @@ -1,6 +1,7 @@ """Console script for hf_mirror_fetch.""" import sys import click +from hf_mirror_fetch.mirror_download import get_url2names, download_files @click.command() diff --git a/hf_mirror_fetch/mirror_download.py b/hf_mirror_fetch/mirror_download.py index bf0cdf7..5fc7502 100644 --- a/hf_mirror_fetch/mirror_download.py +++ b/hf_mirror_fetch/mirror_download.py @@ -20,7 +20,7 @@ import click import requests from bs4 import BeautifulSoup -from urllib.parse import unquote, quote_plus +from urllib.parse import unquote, quote_plus, quote from pathlib import Path ROOT = "https://hf-mirror.com" @@ -57,8 +57,13 @@ def get_next_page_items(soup, url): all_items = current_items download_url = url.replace('tree/main', 'resolve/main') - url2names = [(f"{download_url}/{item['path']}?download=true", item['path']) - for item in all_items if item.get('type') == 'file'] + url2names = [] + for item in all_items: + if item.get('type') == 'file': + name = item['path'] + encoded_name = quote(name, safe='') # 对文件名进行编码,确保URL有效 + _url = f"{download_url}/{encoded_name}?download=true" # 使用编码后的文件名构造URL + url2names.append((_url, name)) return url2names def get_url2names(url): @@ -92,8 +97,8 @@ def save_with_wget(url, file): os.system(f"wget -c {url} -O {file}") @click.command() -@click.option('--url', required=True, help='The URL of the model\'s page on hf-mirror.com or huggingface.co.') -@click.option('--tgt_folder', default=None, type=str, help='The target folder to save the downloaded files.') +@click.option('-u', '--url', required=True, help='The URL of the model\'s page on hf-mirror.com or huggingface.co.') +@click.option('-f', '--tgt_folder', default=None, type=str, help='The target folder to save the downloaded files.') @click.option('--update', is_flag=True, help='Update existing files except for weights.') def download_from_mirror_page(url, tgt_folder, update): """Downloads models from hf-mirror.com, supporting file resumption.""" diff --git a/setup.cfg b/setup.cfg index 1fa1592..920503d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,3 +18,7 @@ universal = 1 exclude = docs [tool:pytest] addopts = --ignore=setup.py + +[options.entry_points] +console_scripts = + hfmf = hf_mirror_fetch.mirror_download:download_from_mirror_page \ No newline at end of file diff --git a/setup.py b/setup.py index 8615201..b858ad1 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,3 @@ -#!/usr/bin/env python - -"""The setup script.""" - from setuptools import setup, find_packages VERSION = "0.1.0" @@ -9,7 +5,11 @@ with open('README.md') as readme_file: readme = readme_file.read() -requirements = ['Click>=7.0', ] +requirements = [ + 'Click>=7.0', + 'requests', + 'beautifulsoup4' +] test_requirements = ['pytest>=3', ] @@ -30,19 +30,20 @@ description="A command-line tool designed to streamline the process of downloading machine learning models and related files from the Hugging Face model hub mirror site.", entry_points={ 'console_scripts': [ - 'hf_mirror_fetch=hf_mirror_fetch.cli:main', + 'hfmf=hf_mirror_fetch.mirror_download:download_from_mirror_page', ], }, install_requires=requirements, license="MIT license", long_description=readme, + long_description_content_type='text/markdown', include_package_data=True, keywords='hf_mirror_fetch', name='hf_mirror_fetch', packages=find_packages(include=['hf_mirror_fetch', 'hf_mirror_fetch.*']), test_suite='tests', tests_require=test_requirements, - url='https://github.com/Qing25/hf_mirror_fetch', + url='https://github.com/CubeNLP/hf_mirror_fetch', version=VERSION, zip_safe=False, )