参考链接：https://zhuanlan.zhihu.com/p/369531344

文件操作：https://blog.csdn.net/Baozijiaruqing/article/details/103900387

关于 `VOT` 数据集下载，直接看官方源码：https://github.com/votchallenge/toolkit

下载链接：https://data.votchallenge.net/vot2019/longterm/description.json

另外一种下载数据集的方法：https://blog.csdn.net/yiyayi1/article/details/103605762

`VOT` 数据集格式：
    vot[lt]year
        - sequences
            - airplane
                - color
                    - ....jpge
                    - ...
                - groundtruth.txt
            - ...
        - list.txt
        - [description.json]
        - [vot[lt]yesr.json]

In [None]:
# !pip freeze > requirements.txt

In [None]:
# !pip install requests pandas
# !pip install -U "urllib3<1.25"

In [None]:
import os
import requests
from urllib.parse import urlparse, urljoin


def get_data(stack="vot-lt2022"):
    VOT_DATASETS = {
        "vot2013": "http://data.votchallenge.net/vot2013/dataset/description.json",
        "vot2014": "http://data.votchallenge.net/vot2014/dataset/description.json",
        "vot2015": "http://data.votchallenge.net/vot2015/dataset/description.json",
        "vot-tir2015": "http://www.cvl.isy.liu.se/research/datasets/ltir/version1.0/ltir_v1_0_8bit.zip",
        "vot2016": "http://data.votchallenge.net/vot2016/main/description.json",
        "vot-tir2016": "http://data.votchallenge.net/vot2016/vot-tir2016.zip",
        "vot2017": "http://data.votchallenge.net/vot2017/main/description.json",
        "vot-st2018": "http://data.votchallenge.net/vot2018/main/description.json",
        "vot-lt2018": "http://data.votchallenge.net/vot2018/longterm/description.json",
        "vot-st2019": "http://data.votchallenge.net/vot2019/main/description.json",
        "vot-lt2019": "http://data.votchallenge.net/vot2019/longterm/description.json",
        "vot-rgbd2019": "http://data.votchallenge.net/vot2019/rgbd/description.json",
        "vot-rgbt2019": "http://data.votchallenge.net/vot2019/rgbtir/meta/description.json",
        "vot-st2020": "https://data.votchallenge.net/vot2020/shortterm/description.json",
        "vot-rgbt2020": "http://data.votchallenge.net/vot2020/rgbtir/meta/description.json",
        "vot-st2021": "https://data.votchallenge.net/vot2021/shortterm/description.json",
        "vot-lt2022": "https://data.votchallenge.net/vot2022/lt/description.json",
        "test": "http://data.votchallenge.net/toolkit/test.zip",
        "segmentation": "http://box.vicos.si/tracking/vot20_test_dataset.zip",
    }
    url = VOT_DATASETS[stack]
    base_url = url.rsplit("/", 1)[0] + "/"

    try:
        meta = requests.get(url).json()
    except requests.exceptions.RequestException as e:
        raise Exception("Unable to read JSON file {}".format(e))

    global frames_url, annos_url, fnames
    frames_url, annos_url, fnames = [], [], []
    for sequence in meta["sequences"]:
        # get data name
        fnames.append(sequence["name"])

        # get groundtruth zip file
        url = sequence["annotations"]["url"]
        if bool(urlparse(url).netloc):
            anno_url = url
        else:
            anno_url = urljoin(base_url, url)

        # get pic zip file
        url = sequence["channels"]["color"]["url"]
        if bool(urlparse(url).netloc):
            frame_url = url
        else:
            frame_url = urljoin(base_url, url)

        frames_url.append(frame_url)
        annos_url.append(anno_url)


In [None]:
stack = "vot-lt2019"  ## 这里指定下载的数据集类型
get_data(stack)

for fname, url in zip(fnames, frames_url):
        print(f"{fname}: {url}")

In [None]:
# 将数据写入 csv 文件
import csv
import os
import re

import pandas as pd


# 方法一：使用 csv 自带的接口，适合每行长度不一定全相等的情况
def write2csv1(csvfile, fnames, urls):
    if os.path.exists(csvfile):
        print(f"deleting {csvfile}...")
        os.remove(csvfile)

    with open(csvfile, "a+") as csvfile:
        writer = csv.writer(csvfile)
        # 1.写入 columns names
        writer.writerow(["filename", "urls", "state"])
        
        for fname, url in zip(fnames, urls):
            # 2.一行一行写入文件
            # print(f"正在将{fname}: {url} 写入到{csvfile}...\n")
            writer.writerow([fname, url, "False"])

# 方法二：使用 pandas
def write2csv2(csvfile, fnames, urls):
    if os.path.exists(csvfile):
        print(f"deleting {csvfile}...")
        os.remove(csvfile)
    
    # 1.创建一个 DataFrame 作为一行写入，以键值对——字典的形式存储
    df = pd.DataFrame({"filename": fnames, "urls": urls, "state": [False]*50})
    # 2.将 DataFrame 存储为 csv 文件，index 表示是否显示行名称（可以是数字，也可以是自定义的字符串）default=True
    df.to_csv(csvfile, index=fnames, sep=",")


In [None]:
version = re.sub("[^0-9]", "", stack)
csvfile = {
    "frames": "votlt" + version + "_frames.csv",
    "annos": "votlt" + version + "_annos.csv",
}

def run_writer():
    write2csv1(csvfile["frames"], fnames, frames_url)
    write2csv1(csvfile["annos"], fnames, annos_url)

In [None]:
run_writer()

In [None]:
import pandas as pd
import time

import requests, os
from tqdm import tqdm

# 屏蔽warning信息
requests.packages.urllib3.disable_warnings()
# 构建自己的代理 IP 池
proxies = {
    # 这里修改为自己的代理端口号，可在代理软件中进行查看更改，clash 默认是7890
    "http": "http://127.0.0.1:7890",
    "https": "http://127.0.0.1:7890",
}
# response = requests.get(url, proxies=proxies)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36",
}

In [None]:
## 初级版：最原始的下载方法
def download_v0(url, folder_path, fname, use_proxy=True):
    """
    @description: download video frames and annotations
    ---------
    @param: - fname: [color][groundtruth]
                - color: the frame of video
                - groundtruth: the text annotation of bounding box
    -------
    @Returns: None
    -------
    """
    fname = os.path.join(folder_path, f"{fname}.zip")
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)  # 创建存放每一个图片集的单独文件夹

    # if not os.path.exists(fname):
    if not os.path.isfile(fname):
        response = requests.get(url, stream=True, proxies=proxies if use_proxy else "", headers=headers)
        with open(fname, "wb") as code:
            for chunk in response.iter_content(chunk_size=1024 * 32):  # 边下载边存硬盘
                if chunk:
                    code.write(chunk)
            time.sleep(0.1)
    else:
        print(f"{fname.title()} exists and have totaly been downloaded!")


In [None]:
## 进阶版：使用 tqdm 显示下载进度
def download_v1(url, folder_path, fname, use_proxy=True):
    """
    @description: download video frames and annotations
    ---------
    @param: - fname: [color][groundtruth]
                - color: the frame of video
                - groundtruth: the text annotation of bounding box
    -------
    @Returns: None
    -------
    """
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)  # 创建存放每一个图片集的单独文件夹

    fname = os.path.join(folder_path, f"{fname}.zip")

    ############# 断点续传实现 ##################
    # 第一次请求是为了得到文件总大小
    response = requests.get(
        url, stream=True, verify=False, proxies=proxies if use_proxy else ""
    )
    total_size = int(response.headers["Content-Length"])

    if os.path.isfile(fname):
        temp_size = os.path.getsize(fname)  # 本地已经下载的文件大小
        if temp_size == total_size:
            print(
                # 注意双引号中不能包括双引号！！！只能使用外面双引号，内部单引号
                f"{fname.split('/')[-2] + '.zip'} exists and have totaly been downloaded!"
            )
            return
    else:
        temp_size = 0

    # 显示一下下载了多少
    print(
        f"{fname.split('/')[-2] + '.zip'} downloaded: {temp_size/(1024*1024):.2f}MB || Total size: {total_size/(1024*1024):.2f}MB || Remaining download rate {1 - temp_size/total_size:.2f}"
    )
    # 核心部分，这个是请求下载时，从本地文件已经下载过的后面下载
    # headers = {'Range': 'bytes=%d-' % temp_size}
    headers = {
        "Range": f"bytes={temp_size}-{total_size}",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36",
    }
    # 重新请求网址，加入新的请求头的
    response = requests.get(
        url,
        stream=True,
        verify=False,
        headers=headers,
        proxies=proxies if use_proxy else "",
    )
    ############################################

    with tqdm.wrapattr(
        open(fname, "ab"),  # 以 ab 追加的形式写入！！！
        "write",
        miniters=1,
        # desc=url.split("/")[-1],
        desc=fname.split("/")[-2] + ".zip",
        total=int(response.headers.get("content-length", 0)),
    ) as fout:
        for chunk in response.iter_content(chunk_size=4096):
            if chunk:
                temp_size += len(chunk)
                fout.write(chunk)
                fout.flush()


改进3，参考：https://blog.csdn.net/yiyayi1/article/details/103605762

使用 `wget` 进行下载。

需要注意一点的就是代理池的设置，代理软件不同，端口号不同，根据个人实际情况进行修改！

使用连接池可以使得下载更加稳定，并且断线的风险比较小，当然，本项目中使用了断点续传的技术，断线后也不会重新下载，只会下载未下载的部分。

断点续传参考链接：
- https://blog.csdn.net/qq_35203425/article/details/80987880
- https://blog.csdn.net/thewindkee/article/details/80189434
- https://huyi-aliang.blog.csdn.net/article/details/120926552?spm=1001.2101.3001.6650.1&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_paycolumn_v3&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7ECTRLIST%7ERate-1.pc_relevant_paycolumn_v3&utm_relevant_index=2 这篇比较准确

改进：边下载边解压，这里的意思是下载完成一个，解压缩一个。

`download` 之后再进行的 `unzip` 操作，由于 `download` 时我们采用了断点续传的技术，所以会保证这是一个完整的压缩包。

如果在解压缩之后进行删除，那么就需要进行判断，用一个字典等来保存是否解压，是否成功下载完成。

In [None]:
import zipfile


def unzip_filev0(zip_src: str, dst_dir: str):
    try:
        zip_file = zipfile.ZipFile(zip_src, "r")
        print(f"Start unzip {zip_src.split('/')[-2]}")
        for file in zip_file.namelist():
            zip_file.extract(file, dst_dir)

        zip_file.close()
        print(f"Done! {zip_src.split('/')[-2].upper()} has been unziped fully!")
        os.remove(zip_src)
        return True
    except:
        print("This is not zip file!")
        return False


def unzip_filev1(zip_src: str, dst_dir: str):
    try:
        with zipfile.ZipFile(file=zip_src) as zip_file:
            # Loop over each file
            print(f"Start unzip {zip_src.split('/')[-2]}")
            for file in zip_file.namelist():
                # Extract each file to another directory
                # If you want to extract to current working directory, don't specify path
                zip_file.extract(member=file, path=dst_dir)
                
            print(f"Done! {zip_src.split('/')[-2].upper()} has been unziped fully!")
            os.remove(zip_src)
            return True
    except:
        print("This is not zip file!")
        return False


- 有关 `Python` 传参的一些知识：https://blog.csdn.net/xylin1012/article/details/81236122
- 解压 `zip` 文件：https://wanglinyong.github.io/2018/06/28/Python%E5%8E%8B%E7%BC%A9%E8%A7%A3%E5%8E%8Bzip%E6%96%87%E4%BB%B6/

In [None]:
root = f"./VOT{version}_LT/sequences"
frames = pd.read_csv(csvfile["frames"], header=0, index_col=0, usecols=[0, 1, 2])
annos = pd.read_csv(csvfile["annos"], header=0, index_col=0, usecols=[0, 1, 2])

# print(frames.loc['bag', 'state'])
# for fname, (url, state) in annos.iterrows():
#     print(url, state)

## V0.使用一个状态变量来记录是否已经下载成功
def download_annos(use_proxy=True):
    for fname, (url, state) in annos.iterrows():
        folder_path = os.path.join(os.getcwd(), root, fname)

        # 判断是否已经下载完成
        if state:
            print(f"{fname} exists and have totaly been downloaded and unziped! Going to download and unzip the next.")
        else:
            ## download
            download_v1(url, folder_path, "groundtruth", use_proxy)
            ## unzip
            zip_src = folder_path + "/groundtruth.zip"
            dest_dir = os.path.join(os.getcwd(), root, fname)
            state = unzip_filev1(zip_src, dest_dir)
            ## 将新的状态添加进入文件中
            annos.loc[fname, "state"] = state
            # print(annos.loc[fname][1])
            annos.to_csv(csvfile["annos"], index=fnames, encoding="utf-8")


def download_frames(use_proxy=True):
    for fname, (url, state) in frames.iterrows():
        folder_path = os.path.join(os.getcwd(), root, fname)

        # 判断是否已经下载完成
        if state:
            print(f"{fname} exists and have totaly been downloaded and unziped! Going to download and unzip the next.")
        else:
            ## download
            download_v1(url, folder_path, "color", use_proxy)
            ## unzip
            zip_src = folder_path + "/color.zip"
            dest_dir = os.path.join(os.getcwd(), root, fname, "color")
            state = unzip_filev0(zip_src, dest_dir)
            frames.loc[fname, "state"] = state
            frames.to_csv(csvfile["annos"], index=fnames, encoding="utf-8")


遇到的问题，成功解决参考链接：

- https://stackoverflow.com/questions/20625582/how-to-deal-with-settingwithcopywarning-in-pandas
- https://jiaxiaochu.gitee.io/ibook/%E7%AC%AC2%E8%AF%BE/python%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90%E7%AC%AC%E4%BA%8C%E8%AF%BE.html
- https://www.pythonf.cn/read/139075

这里有第二种下载方案：就不用一个新的 `state` 来保存是否下载成功，而是直接使用 `os.path.exists(file)` 进行判断，因为只有成功下载并解压后，这文件才存在：`groundtruth.txt` 以及 `color/00000001.jpg`.

这样更改的话会导致修改的地方有很多：

1. `csv` 文件的写入格式要重新更改；
2. `unzip` 部分需要修改，因为用不到了解压成功的状态；
3. `download` 部分需要修改，包括使用 `pandas` 读取部分，以及下载代码块部分，删去状态标记以及重新写入 `csv` 文件的部分，然后更改判断条件即可。

此次感悟：表层做判断，底层做细节处理，不要把判断放在后面（这里的后面不是说的最新添加的代码块，而是之前写的部分），原因是判断需要传值，越往里需要重复传入的参数次数越多，本来不是很复杂，但是经过这么一番操作之后，逻辑变得特别复杂，不容易看懂，降低了可读性。因此，应该尽量将逻辑处理部分放在最外层！！！


执行下载：如果在下载完成后自动解压，没有实现断点续传，只能将该值设置为 `False` !

In [15]:
# 下载 groundtruth.txt
download_annos(use_proxy=True)
print("Done, groundtruth has been downloaded!")

ballet exists and have totaly been downloaded and unziped! Going to download and unzip the next.
bicycle exists and have totaly been downloaded and unziped! Going to download and unzip the next.
bike1 exists and have totaly been downloaded and unziped! Going to download and unzip the next.
bird1 exists and have totaly been downloaded and unziped! Going to download and unzip the next.
boat exists and have totaly been downloaded and unziped! Going to download and unzip the next.
bull exists and have totaly been downloaded and unziped! Going to download and unzip the next.
car1 exists and have totaly been downloaded and unziped! Going to download and unzip the next.
car3 exists and have totaly been downloaded and unziped! Going to download and unzip the next.
car6 exists and have totaly been downloaded and unziped! Going to download and unzip the next.
car8 exists and have totaly been downloaded and unziped! Going to download and unzip the next.
car9 exists and have totaly been downloaded

kitesurfing.zip: 100%|██████████| 121k/121k [00:00<00:00, 140kB/s]  


Start unzip kitesurfing
Done! KITESURFING has been unziped fully!
liverRun.zip downloaded: 0.00MB || Total size: 0.72MB || Remaining download rate 1.00


liverRun.zip: 100%|██████████| 739k/739k [00:01<00:00, 503kB/s]  


Start unzip liverRun
Done! LIVERRUN has been unziped fully!
longboard.zip downloaded: 0.00MB || Total size: 0.14MB || Remaining download rate 1.00


longboard.zip: 100%|██████████| 148k/148k [00:00<00:00, 193kB/s]  


Start unzip longboard
Done! LONGBOARD has been unziped fully!
nissan.zip downloaded: 0.00MB || Total size: 0.09MB || Remaining download rate 1.00


nissan.zip: 100%|██████████| 95.0k/95.0k [00:00<00:00, 125kB/s] 


Start unzip nissan
Done! NISSAN has been unziped fully!
parachute.zip downloaded: 0.00MB || Total size: 0.07MB || Remaining download rate 1.00


parachute.zip: 100%|██████████| 73.8k/73.8k [00:00<00:00, 129kB/s] 


Start unzip parachute
Done! PARACHUTE has been unziped fully!
person2.zip downloaded: 0.00MB || Total size: 0.06MB || Remaining download rate 1.00


person2.zip: 100%|██████████| 57.6k/57.6k [00:00<00:00, 114kB/s] 


Start unzip person2
Done! PERSON2 has been unziped fully!
person4.zip downloaded: 0.00MB || Total size: 0.06MB || Remaining download rate 1.00


person4.zip: 100%|██████████| 59.5k/59.5k [00:00<00:00, 106kB/s] 


Start unzip person4
Done! PERSON4 has been unziped fully!
person5.zip downloaded: 0.00MB || Total size: 0.05MB || Remaining download rate 1.00


person5.zip: 100%|██████████| 47.0k/47.0k [00:00<00:00, 93.8kB/s]


Start unzip person5
Done! PERSON5 has been unziped fully!
person7.zip downloaded: 0.00MB || Total size: 0.04MB || Remaining download rate 1.00


person7.zip: 100%|██████████| 44.9k/44.9k [00:00<00:00, 83.3kB/s]


Start unzip person7
Done! PERSON7 has been unziped fully!
person14.zip downloaded: 0.00MB || Total size: 0.06MB || Remaining download rate 1.00


person14.zip: 100%|██████████| 62.7k/62.7k [00:00<00:00, 121kB/s] 


Start unzip person14
Done! PERSON14 has been unziped fully!
person17.zip downloaded: 0.00MB || Total size: 0.05MB || Remaining download rate 1.00


person17.zip: 100%|██████████| 50.3k/50.3k [00:00<00:00, 105kB/s] 


Start unzip person17
Done! PERSON17 has been unziped fully!
person19.zip downloaded: 0.00MB || Total size: 0.09MB || Remaining download rate 1.00


person19.zip: 100%|██████████| 93.3k/93.3k [00:00<00:00, 157kB/s] 


Start unzip person19
Done! PERSON19 has been unziped fully!
person20.zip downloaded: 0.00MB || Total size: 0.04MB || Remaining download rate 1.00


person20.zip: 100%|██████████| 41.2k/41.2k [00:00<00:00, 81.2kB/s]


Start unzip person20
Done! PERSON20 has been unziped fully!
rollerman.zip downloaded: 0.00MB || Total size: 0.03MB || Remaining download rate 1.00


rollerman.zip: 100%|██████████| 35.8k/35.8k [00:00<00:00, 153kB/s] 


Start unzip rollerman
Done! ROLLERMAN has been unziped fully!
sitcom.zip downloaded: 0.00MB || Total size: 0.08MB || Remaining download rate 1.00


sitcom.zip: 100%|██████████| 81.4k/81.4k [00:00<00:00, 142kB/s] 


Start unzip sitcom
Done! SITCOM has been unziped fully!
skiing.zip downloaded: 0.00MB || Total size: 0.06MB || Remaining download rate 1.00


skiing.zip: 100%|██████████| 57.1k/57.1k [00:00<00:00, 108kB/s] 


Start unzip skiing
Done! SKIING has been unziped fully!
sup.zip downloaded: 0.00MB || Total size: 0.08MB || Remaining download rate 1.00


sup.zip: 100%|██████████| 86.1k/86.1k [00:00<00:00, 162kB/s] 


Start unzip sup
Done! SUP has been unziped fully!
tightrope.zip downloaded: 0.00MB || Total size: 0.06MB || Remaining download rate 1.00


tightrope.zip: 100%|██████████| 56.8k/56.8k [00:00<00:00, 107kB/s] 


Start unzip tightrope
Done! TIGHTROPE has been unziped fully!
uav1.zip downloaded: 0.00MB || Total size: 0.07MB || Remaining download rate 1.00


uav1.zip: 100%|██████████| 72.4k/72.4k [00:00<00:00, 146kB/s] 


Start unzip uav1
Done! UAV1 has been unziped fully!
volkswagen.zip downloaded: 0.00MB || Total size: 0.18MB || Remaining download rate 1.00


volkswagen.zip: 100%|██████████| 183k/183k [00:00<00:00, 213kB/s]  


Start unzip volkswagen
Done! VOLKSWAGEN has been unziped fully!
warmup.zip downloaded: 0.00MB || Total size: 0.10MB || Remaining download rate 1.00


warmup.zip: 100%|██████████| 103k/103k [00:00<00:00, 137kB/s]  


Start unzip warmup
Done! WARMUP has been unziped fully!
wingsuit.zip downloaded: 0.00MB || Total size: 0.06MB || Remaining download rate 1.00


wingsuit.zip: 100%|██████████| 58.1k/58.1k [00:00<00:00, 109kB/s] 


Start unzip wingsuit
Done! WINGSUIT has been unziped fully!
yamaha.zip downloaded: 0.00MB || Total size: 0.07MB || Remaining download rate 1.00


yamaha.zip: 100%|██████████| 72.7k/72.7k [00:00<00:00, 130kB/s] 

Start unzip yamaha
Done! YAMAHA has been unziped fully!
Done, groundtruth has been downloaded!





In [None]:
## 用于清楚下载的状态，重置为 False
run_writer()

In [None]:
download_frames(use_proxy=True)  # 这里注释是为了方便之后演示，里面的文件太大
print("Done, color.zip downloaded!")

In [None]:
# 解压文件
# !./traverse.sh

安装 `tree` 工具：`sudo apt-get install tree -y` .

In [None]:
# 查看当前目录结构
!tree ./VOT2022_LT -L 4

可以看到有 `100` 个文件（50 个 `zip`，50 个 `.txt`）

在执行以下命令删除压缩包之前，最好先保存一份，毕竟压缩包还是挺大的，防止意外发生。

In [None]:
# 删除当前目录中所有的 .zip 文件！！！这里需要谨慎操作
# !find -name "groundtruth.zip" | xargs rm -r
# !find -name "color.zip" | xargs rm -r

# 也可以一次删除
# !find -name "*.zip" | xargs rm -r

`Python` 爬虫教程：http://c.biancheng.net/view/2011.html

`Python` 文件读写：
  - http://www.itheima.com/news/20210412/113009.html
  - https://www.cnblogs.com/zdz8207/p/python-updateFile-re-sub.html


`VOT2022-LT`: https://data.votchallenge.net/vot2022/lt/description.json

`sequence` 文件：
```
channels.color=color/%08d.jpg
format=default
fps=30
name=agility

```

In [None]:
root = r"./VOT2022_LT/sequences"
sequence = ["channels.color=color/%08d.jpg\r\n", "format=default\r\n", "fps=30\r\n"]

# 测试代码
# fsequence = "./test.txt"
# fsequence = open(fsequence, encoding="utf-8", mode="w")
# fsequence.writelines(sequence)
# fsequence.flush()
# fsequence.close()

## 添加 list.txt 文件
def write2list():
    if not os.path.exists(root):
        os.makedirs(root)
    flist = os.path.join(root, "list.txt")

    with open(flist, "w") as fl:
        fl.writelines([fname + "\n" for fname in fnames])


def write2squence():
    for fname in fnames:
        ## 添加 sequence 文件
        fsequence = os.path.join(root, fname, "sequence")
        # print(fsequence)
        if not os.path.exists(fsequence):
            fsequence = open(fsequence, encoding="utf-8", mode="w")
            fsequence.writelines(sequence)
            fsequence.writelines(f"name={fname}\r\n")
            fsequence.flush()
            fsequence.close()

In [None]:
write2list()
write2squence()

In [None]:
!tree -L 4

## 补充：有关多进程下载以及下载进度条显示

## 一、下载进度条显示

### Python tqdm 工具包使用

> 官网：https://pypi.org/project/tqdm/#examples-and-advanced-usage

> 有关 `tqdm` 用法参考链接：https://pypi.org/project/tqdm/#examples-and-advanced-usage


```python
import urllib, os
from tqdm import tqdm

eg_link = "https://caspersci.uk.to/matryoshka.zip"
response = getattr(urllib, 'request', urllib).urlopen(eg_link)
with tqdm.wrapattr(open(os.devnull, "wb"), "write",
                   miniters=1, desc=eg_link.split('/')[-1],
                   total=getattr(response, 'length', None)) as fout:
    for chunk in response:
        fout.write(chunk)
```

还可以使用：
```python
import requests, os
from tqdm import tqdm

eg_link = "https://caspersci.uk.to/matryoshka.zip"
response = requests.get(eg_link, stream=True)
with tqdm.wrapattr(open(os.devnull, "wb"), "write",
                   miniters=1, desc=eg_link.split('/')[-1],
                   total=int(response.headers.get('content-length', 0))) as fout:
    for chunk in response.iter_content(chunk_size=4096):
        fout.write(chunk)
```

## 二、Python 多进程下载

## 三、使用 `MD5` 进行文件完整性校验

`MD5` 是一种数据加密手段，但可以通过该值进行完整性校验。

> 参考链接：https://blog.csdn.net/python_neophyte/article/details/102645477

```python
import hashlib
import os


f_path = input('File path: ')
SETUP_FILE = [file for file in os.listdir(f_path) if os.path.splitext(file)[1] == '.bin' or
              (os.path.splitext(file)[1] == '.exe' and '%' not in os.path.splitext(file)[0])]
MD5_FILE = [file for file in os.listdir(f_path) if os.path.splitext(file)[1] == '.md5']

print('所有安装文件：', SETUP_FILE)
print('MD5储存文件：', MD5_FILE)


def get_correct_md5():
    all_md5 = []

    for file in MD5_FILE:
        with open(os.path.join(f_path, file)) as f:
            data = f.readlines()
        all_md5.extend(data)

    return all_md5

def get_file_md5(file):
    full_file_path = os.path.join(f_path, file)
    m = hashlib.md5()
    file_size = '{:.2f}'.format(os.path.getsize(full_file_path) / (1024 ** 2))
    print('正在验证文件名称：%s， 文件大小：%s Mb' % (file, file_size))
    with open(full_file_path, 'rb') as f:
        while True:
            data = f.read(99999999)
            print('验证速度：%.2f Mb/s' % (len(data) / (1024 ** 2)), end='\r')
            if not data:
                break
            m.update(data)
    file_md5 = m.hexdigest().upper()

    return file_md5


def main():
    all_md5 = get_correct_md5()
    bad_file = 0
    print('开始验证：')
    for file in SETUP_FILE:
        md5 = get_file_md5(file)
        for m in all_md5:
            if file in m:
                m = m.split(' ')
                if md5 == m[0]:
                    print(file, '\n验证通过！\n')
                    break
                else:
                    print(file, '\n文件损坏！\n')
                    bad_file += 1
                    break
        else:
            print('此文件没有找到对应的md5，因此跳过验证。')

    print('所有文件验证完成！')

    if bad_file != 0:
        print('共有 %s 个文件损坏，请重新下载损坏文件！' % bad_file)
    else:
        print('所有文件全部通过验证，可以直接安装！')

    return


main()
```

## `Git` 使用教程

- VSCode上传本地项目到github https://www.cxyzjd.com/article/Le___Le/103585617
- https://blog.csdn.net/qq_32578989/article/details/87994300

### Step1: 在 GitHub 创建一个新的仓库，用于存储要提交的项目
```bash
cd workspace
```

### Step2: 与 GitHub 远程仓库建立联系
```bash
git init
git remote rm origin
git remote add origin https://github.com/blainetse/dataset_toolkits.git [ssh/https地址（要保存在 GitHub 的仓库位置）]
git remote -v  # 查看状态
```

### Step3: push 到主分支
```bash
git pull origin master
```

注意：如果项目里已经有东西了，就可能会出现什么远程仓库和本地仓库不相关的错误，所以要
```shell
git pull origin master --allow-unrelated-histories
```
将 README等已有的文件强行拉下来！

```bash
git commit -m 注释内容——说明提交的状态等信息，字符串格式
```

如果有什么 nothing added to commit but untracked files present 的事，就直接 git add xxx.txt 或者 git add xxx/ 或者直接 git add -A 加所有，再 commit 

然后再 push 上去，git push -u origin master

## `Git` 配置问题记录

`ERROR: Repository not found. Fatal: Could not read from remote repository.`
  - https://blog.csdn.net/weixin_40886892/article/details/80725071