From 96c44a39f5889fdfd44c2c6e0f641758517ee943 Mon Sep 17 00:00:00 2001
From: rex <1073853456@qq.com>
Date: Mon, 18 Mar 2024 10:08:58 +0800
Subject: [PATCH] add hfmf cli

---
 hf_mirror_fetch/cli.py             |  1 +
 hf_mirror_fetch/mirror_download.py | 15 ++++++++++-----
 setup.cfg                          |  4 ++++
 setup.py                           | 15 ++++++++-------
 4 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/hf_mirror_fetch/cli.py b/hf_mirror_fetch/cli.py
index 98cff60..34b2953 100644
--- a/hf_mirror_fetch/cli.py
+++ b/hf_mirror_fetch/cli.py
@@ -1,6 +1,7 @@
 """Console script for hf_mirror_fetch."""
 import sys
 import click
+from hf_mirror_fetch.mirror_download import get_url2names, download_files
 
 
 @click.command()
diff --git a/hf_mirror_fetch/mirror_download.py b/hf_mirror_fetch/mirror_download.py
index bf0cdf7..5fc7502 100644
--- a/hf_mirror_fetch/mirror_download.py
+++ b/hf_mirror_fetch/mirror_download.py
@@ -20,7 +20,7 @@
 import click
 import requests
 from bs4 import BeautifulSoup
-from urllib.parse import unquote, quote_plus
+from urllib.parse import unquote, quote_plus, quote
 from pathlib import Path
 
 ROOT = "https://hf-mirror.com"
@@ -57,8 +57,13 @@ def get_next_page_items(soup, url):
         all_items = current_items
 
     download_url = url.replace('tree/main', 'resolve/main')
-    url2names = [(f"{download_url}/{item['path']}?download=true", item['path'])
-                 for item in all_items if item.get('type') == 'file']
+    url2names = []
+    for item in all_items:
+        if item.get('type') == 'file':
+            name = item['path']
+            encoded_name = quote(name, safe='')  # 对文件名进行编码，确保URL有效
+            _url = f"{download_url}/{encoded_name}?download=true"  # 使用编码后的文件名构造URL
+            url2names.append((_url, name))
     return url2names
 
 def get_url2names(url):
@@ -92,8 +97,8 @@ def save_with_wget(url, file):
     os.system(f"wget -c {url} -O {file}")
 
 @click.command()
-@click.option('--url', required=True, help='The URL of the model\'s page on hf-mirror.com or huggingface.co.')
-@click.option('--tgt_folder', default=None, type=str, help='The target folder to save the downloaded files.')
+@click.option('-u', '--url', required=True, help='The URL of the model\'s page on hf-mirror.com or huggingface.co.')
+@click.option('-f', '--tgt_folder', default=None, type=str, help='The target folder to save the downloaded files.')
 @click.option('--update', is_flag=True, help='Update existing files except for weights.')
 def download_from_mirror_page(url, tgt_folder, update):
     """Downloads models from hf-mirror.com, supporting file resumption."""
diff --git a/setup.cfg b/setup.cfg
index 1fa1592..920503d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -18,3 +18,7 @@ universal = 1
 exclude = docs
 [tool:pytest]
 addopts = --ignore=setup.py
+
+[options.entry_points]
+console_scripts =
+    hfmf = hf_mirror_fetch.mirror_download:download_from_mirror_page
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 8615201..b858ad1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,3 @@
-#!/usr/bin/env python
-
-"""The setup script."""
-
 from setuptools import setup, find_packages
 
 VERSION = "0.1.0"
@@ -9,7 +5,11 @@
 with open('README.md') as readme_file:
     readme = readme_file.read()
 
-requirements = ['Click>=7.0', ]
+requirements = [
+    'Click>=7.0',
+    'requests',
+    'beautifulsoup4'
+]
 
 test_requirements = ['pytest>=3', ]
 
@@ -30,19 +30,20 @@
     description="A command-line tool designed to streamline the process of downloading machine learning models and related files from the Hugging Face model hub mirror site.",
     entry_points={
         'console_scripts': [
-            'hf_mirror_fetch=hf_mirror_fetch.cli:main',
+            'hfmf=hf_mirror_fetch.mirror_download:download_from_mirror_page',
         ],
     },
     install_requires=requirements,
     license="MIT license",
     long_description=readme,
+    long_description_content_type='text/markdown',
     include_package_data=True,
     keywords='hf_mirror_fetch',
     name='hf_mirror_fetch',
     packages=find_packages(include=['hf_mirror_fetch', 'hf_mirror_fetch.*']),
     test_suite='tests',
     tests_require=test_requirements,
-    url='https://github.com/Qing25/hf_mirror_fetch',
+    url='https://github.com/CubeNLP/hf_mirror_fetch',
     version=VERSION,
     zip_safe=False,
 )