MNISTをダウンロードしてCSVにします

In [2]:
import numpy as np
import os
import gzip

# MNISTデータをダウンロードする関数
def download_mnist(url, filename):
    if not os.path.exists(filename):
        from urllib.request import urlretrieve
        urlretrieve(url, filename)

# MNISTデータをロードする関数
def load_mnist(images_path, labels_path):
    with gzip.open(labels_path, 'rb') as labels_file:
        labels = np.frombuffer(labels_file.read(), dtype=np.uint8, offset=8)
    with gzip.open(images_path, 'rb') as images_file:
        images = np.frombuffer(images_file.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)
    return images, labels

# MNISTデータのダウンロードとロード
download_mnist('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', 'train-images.gz')
download_mnist('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 'train-labels.gz')
download_mnist('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', 'test-images.gz')
download_mnist('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 'test-labels.gz')
train_images, train_labels = load_mnist('train-images.gz', 'train-labels.gz')
test_images, test_labels = load_mnist('test-images.gz', 'test-labels.gz')

# 訓練用データをCSVファイルに保存
with open('train.csv', 'w') as file:
    for i in range(len(train_labels)):
        row = ','.join([str(train_labels[i])] + [str(pixel) for pixel in train_images[i]])
        file.write(row + '\n')

# テスト用データをCSVファイルに保存
with open('test.csv', 'w') as file:
    for i in range(len(test_labels)):
        row = ','.join([str(test_labels[i])] + [str(pixel) for pixel in test_images[i]])
        file.write(row + '\n')

# CSVファイルをgzipで圧縮
with open('train.csv', 'rb') as file_in, gzip.open('train_csv.gz', 'wb') as file_out:
    file_out.writelines(file_in)

with open('test.csv', 'rb') as file_in, gzip.open('test_csv.gz', 'wb') as file_out:
    file_out.writelines(file_in)

# 圧縮前のCSVファイルを削除
os.remove('train.csv')
os.remove('test.csv')
