In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import warnings

from skimage.io import imread, imshow, show, imshow_collection
from skimage.transform import resize, rescale, rotate

warnings.filterwarnings('ignore')

# Prepare Data

In [2]:
categories = os.listdir("train")

data_train = []
labels_train = []

for category in categories:
    for filename in os.listdir("train/" + category):
        image_path = "train/{}/{}".format(category, filename)
        data_train.append(imread(image_path).flatten())
        labels_train.append(category)
    print(category, "DONE")

data_train = np.asarray(data_train)
labels_train = np.asarray(labels_train)

airplane DONE
automobile DONE
bird DONE
cat DONE
deer DONE
dog DONE
frog DONE
horse DONE
ship DONE
truck DONE


In [3]:
categories = os.listdir("test")

data_test = []
labels_test = []

for category in categories:
    for filename in os.listdir("test/" + category):
        image_path = "test/{}/{}".format(category, filename)
        data_test.append(imread(image_path).flatten())
        labels_test.append(category)
    print(category, "DONE")

data_test = np.asarray(data_test)
labels_test = np.asarray(labels_test)

airplane DONE
automobile DONE
bird DONE
cat DONE
deer DONE
dog DONE
frog DONE
horse DONE
ship DONE
truck DONE


## Split in two batches

In [4]:
data_train_1 = data_train[: 25000]
data_train_2 = data_train[25000: ]

labels_train_1 = labels_train[: 25000]
labels_train_2 = labels_train[25000: ]

## Save data with pickle

In [5]:
pickle.dump(data_train_1, open("data_train_flatten_batch_1.pkl", "wb"))
pickle.dump(data_train_2, open("data_train_flatten_batch_2.pkl", "wb"))
pickle.dump(labels_train_1, open("labels_train_batch_1.pkl", "wb"))
pickle.dump(labels_train_2, open("labels_train_batch_2.pkl", "wb"))

pickle.dump(data_test, open("data_test_flatten.pkl", "wb"))
pickle.dump(labels_test, open("labels_test.pkl", "wb"))

# Black And White Data

In [6]:
categories = os.listdir("train")

data_train_bw = []
labels_train_bw = []

for category in categories:
    for filename in os.listdir("train/" + category):
        image_path = "train/{}/{}".format(category, filename)
        data_train_bw.append(imread(image_path, as_gray=True).flatten())
        labels_train_bw.append(category)
    print(category, "DONE")

data_train_bw = np.asarray(data_train_bw)
labels_train_bw = np.asarray(labels_train_bw)

airplane DONE
automobile DONE
bird DONE
cat DONE
deer DONE
dog DONE
frog DONE
horse DONE
ship DONE
truck DONE


In [7]:
categories = os.listdir("test")

data_test_bw = []
labels_test_bw = []

for category in categories:
    for filename in os.listdir("test/" + category):
        image_path = "test/{}/{}".format(category, filename)
        data_test_bw.append(imread(image_path, as_gray=True).flatten())
        labels_test_bw.append(category)
    print(category, "DONE")

data_test_bw = np.asarray(data_test_bw)
labels_test_bw = np.asarray(labels_test_bw)

airplane DONE
automobile DONE
bird DONE
cat DONE
deer DONE
dog DONE
frog DONE
horse DONE
ship DONE
truck DONE


## Split in five batches

In [8]:
data_train_bw_1 = data_train_bw[: 10000]
data_train_bw_2 = data_train_bw[10000: 20000]
data_train_bw_3 = data_train_bw[20000: 30000]
data_train_bw_4 = data_train_bw[30000: 40000]
data_train_bw_5 = data_train_bw[40000: ]

labels_train_bw_1 = labels_train_bw[: 10000]
labels_train_bw_2 = labels_train_bw[10000: 20000]
labels_train_bw_3 = labels_train_bw[20000: 30000]
labels_train_bw_4 = labels_train_bw[30000: 40000]
labels_train_bw_5 = labels_train_bw[40000: ]

## Save data with pickle

In [9]:
# Save train
pickle.dump(data_train_bw_1, open("data_train_bw_flatten_batch_1.pkl", "wb"))
pickle.dump(data_train_bw_2, open("data_train_bw_flatten_batch_2.pkl", "wb"))
pickle.dump(data_train_bw_3, open("data_train_bw_flatten_batch_3.pkl", "wb"))
pickle.dump(data_train_bw_4, open("data_train_bw_flatten_batch_4.pkl", "wb"))
pickle.dump(data_train_bw_5, open("data_train_bw_flatten_batch_5.pkl", "wb"))

pickle.dump(labels_train_bw_1, open("labels_train_bw_batch_1.pkl", "wb"))
pickle.dump(labels_train_bw_2, open("labels_train_bw_batch_2.pkl", "wb"))
pickle.dump(labels_train_bw_3, open("labels_train_bw_batch_3.pkl", "wb"))
pickle.dump(labels_train_bw_4, open("labels_train_bw_batch_4.pkl", "wb"))
pickle.dump(labels_train_bw_5, open("labels_train_bw_batch_5.pkl", "wb"))

# Save test
pickle.dump(data_test_bw, open("data_test_bw_flatten.pkl", "wb"))
pickle.dump(labels_test_bw, open("labels_test_bw.pkl", "wb"))