# Stanford Dogs 数据集

数据集树形结构
```
./datasets/
    stanford_dogs/
        annotation.tar
        Annotation/
        Annotation/...

        lists.tar
        lists/
            file_list.mat
            test_list.mat
            train_list.mat

        images.tar
        Images/
            n02085620-Chihuahua/
                n02085620_7.jpg
                ...
            ...
```

In [9]:
import os
from os.path import join

# 数据集下载相关常量
DOWNLOAD_URL_PREFIX = 'http://vision.stanford.edu/aditya86/ImageNetDogs'

DATASET_ROOT = "./datasets/"
DATASET_NAME = "stanford_dogs"
DATASET_PATH = join(DATASET_ROOT, DATASET_NAME)

## 1. 数据集下载与解压

In [None]:
from torchvision.datasets.utils import download_url, list_dir, list_files
import tarfile

IMAGES_PATH = join(DATASET_PATH, "Images")
ANNOTATION_PATH = join(DATASET_PATH, "Annotation")

def download():
    if os.path.exists(IMAGES_PATH) and os.path.exists(ANNOTATION_PATH):
        if len(os.listdir(IMAGES_PATH)) == len(os.listdir(ANNOTATION_PATH)) == 120:
            print('Files already downloaded and verified')
            return

    for tar_filename in ['annotation.tar', 'lists.tar', 'images.tar']:
        tar_file_path = join(DATASET_PATH, tar_filename)
        if not os.path.exists(tar_file_path):
            url = DOWNLOAD_URL_PREFIX + '/' + tar_filename
            download_url(url, DATASET_PATH, tar_filename, None)

            print('Extracting downloaded file: ' + tar_file_path)
            with tarfile.open(tar_file_path, 'r') as tar_file:
                tar_file.extractall(DATASET_PATH)

        # os.remove(DATASET_PATH)

download()

## 2. 训练集 与 测试集 分割

In [None]:
import scipy
import shutil

stage2file = {
    "train" : "train_list.mat",
    "test" : "test_list.mat",
}

for stage, file in stage2file.items():
    split_file = join(DATASET_PATH, "lists", file)
    data = scipy.io.loadmat(split_file)["annotation_list"]
    split = [it[0][0] for it in data]

    for file_path in split:
        dir_name, file_name = file_path.split("/")
        if not os.path.isdir(join(DATASET_PATH, stage, dir_name)):
            os.makedirs(join(DATASET_PATH, stage, dir_name))
        file1 = join(DATASET_PATH, "Images", file_path+".jpg")
        file2 = join(DATASET_PATH, stage, file_path+".jpg")
        shutil.copy(file1, file2)