In [12]:
import os
import pickle
import numpy as np

# Get the current working directory
current_dir = os.getcwd()

# Specify the name of the folder containing CIFAR-10 batches
folder_name = 'cifar10_batches'  # Replace with your actual folder name

# Construct the path to the folder
folder_path = os.path.join(current_dir, folder_name)

# Initialize empty lists to store data and labels
all_data = []
all_labels = []

# Load and concatenate training batches
for i in range(1, 6):
    batch_file_path = os.path.join(folder_path, f'data_batch_{i}')
    with open(batch_file_path, 'rb') as file:
        batch_data = pickle.load(file, encoding='bytes')
        all_data.append(batch_data[b'data'])
        all_labels.extend(batch_data[b'labels'])

# Load test batch
test_batch_file_path = os.path.join(folder_path, 'test_batch')
with open(test_batch_file_path, 'rb') as file:
    test_batch_data = pickle.load(file, encoding='bytes')
    all_data.append(test_batch_data[b'data'])
    all_labels.extend(test_batch_data[b'labels'])

# Convert the lists to NumPy arrays
x_train_and_test = np.concatenate(all_data, axis=0).astype(float)
y_train_and_test = np.array(all_labels).astype(float)

# Reshape the data if needed (CIFAR-10 images are 32x32x3)
x_train_and_test = x_train_and_test.reshape((len(x_train_and_test), 32, 32, 3))

# Split into training and test sets
x_train = x_train_and_test[:-len(test_batch_data[b'labels'])]
y_train = y_train_and_test[:-len(test_batch_data[b'labels'])]
x_test = x_train_and_test[-len(test_batch_data[b'labels']):]
y_test = y_train_and_test[-len(test_batch_data[b'labels']):]

# Print the shape of the loaded data
print("Training data shape:", x_train.shape)
print("Training labels shape:", y_train.shape)
print("Test data shape:", x_test.shape)
print("Test labels shape:", y_test.shape)


Training data shape: (50000, 32, 32, 3)
Training labels shape: (50000,)
Test data shape: (10000, 32, 32, 3)
Test labels shape: (10000,)


In [13]:
x_train_flat = x_train.reshape((len(x_train), -1))

In [9]:
print(x_train[1])

[[[154. 126. 105.]
  [102. 125. 155.]
  [172. 180. 142.]
  ...
  [ 88. 103.  94.]
  [ 65.  83.  90.]
  [ 79.  68.  67.]]

 [[136. 137. 122.]
  [132. 151. 181.]
  [203. 208. 208.]
  ...
  [ 92.  88.  78.]
  [ 87.  98.  76.]
  [ 67.  81.  91.]]

 [[146. 124.  88.]
  [ 85.  87.  84.]
  [ 75.  78.  69.]
  ...
  [169. 113.  89.]
  [ 84.  65.  56.]
  [ 88.  81.  63.]]

 ...

 [[158.  83.  55.]
  [ 46.  51.  52.]
  [ 48.  46.  57.]
  ...
  [134. 121.  61.]
  [ 51.  33.  21.]
  [ 53.  51. 107.]]

 [[172. 166. 123.]
  [140. 160. 155.]
  [139. 131. 126.]
  ...
  [ 96. 101. 135.]
  [ 87.  78.  29.]
  [ 84.  73.  94.]]

 [[166. 160. 170.]
  [163. 165. 171.]
  [180. 186. 174.]
  ...
  [ 42.  67. 101.]
  [122. 133. 136.]
  [139. 142. 144.]]]


In [16]:
import os
import pickle
import numpy as np

# Get the current working directory
current_dir = os.getcwd()

# Specify the name of the folder containing CIFAR-10 batches
folder_name = 'cifar10_batches'  # Replace with your actual folder name

# Construct the path to the folder
folder_path = os.path.join(current_dir, folder_name)

# Initialize empty lists to store data and labels
all_data = []
all_labels = []

# Load and concatenate training batches
for i in range(1, 6):
    batch_file_path = os.path.join(folder_path, f'data_batch_{i}')
    with open(batch_file_path, 'rb') as file:
        batch_data = pickle.load(file, encoding='bytes')
        all_data.append(batch_data[b'data'])
        all_labels.extend(batch_data[b'labels'])

# Load test batch
test_batch_file_path = os.path.join(folder_path, 'test_batch')
with open(test_batch_file_path, 'rb') as file:
    test_batch_data = pickle.load(file, encoding='bytes')
    all_data.append(test_batch_data[b'data'])
    all_labels.extend(test_batch_data[b'labels'])

# Convert the lists to NumPy arrays and cast to float
x_train_and_test = np.concatenate(all_data, axis=0).astype(float)
y_train_and_test = np.array(all_labels).astype(float)

# Reshape the data if needed (CIFAR-10 images are 32x32x3)
x_train_and_test = x_train_and_test.reshape((len(x_train_and_test), 32, 32, 3))

# Split into training and test sets
x_train = x_train_and_test[:-len(test_batch_data[b'labels'])]
y_train = y_train_and_test[:-len(test_batch_data[b'labels'])]
x_test = x_train_and_test[-len(test_batch_data[b'labels']):]
y_test = y_train_and_test[-len(test_batch_data[b'labels']):]

# Flatten each image in the training set
x_train_flat = x_train.reshape((len(x_train), -1))  # -1 automatically computes the size of the remaining dimensions

# Print the data types and the shape of the loaded and flattened training data
print("Data type of x_train_flat:", x_train_flat.dtype)
print("Flattened training data shape:", x_train_flat.shape)


Data type of x_train_flat: float64
Flattened training data shape: (50000, 3072)


In [17]:
print(x_train_flat[1])

[154. 126. 105. ... 139. 142. 144.]


In [18]:
import os
import pickle
import numpy as np

# Get the current working directory
current_dir = os.getcwd()

# Specify the name of the folder containing CIFAR-10 batches
folder_name = 'cifar10_batches'  # Replace with your actual folder name

# Construct the path to the folder
folder_path = os.path.join(current_dir, folder_name)

# Initialize empty lists to store data and labels
all_data = []
all_labels = []

# Load and concatenate training batches
for i in range(1, 6):
    batch_file_path = os.path.join(folder_path, f'data_batch_{i}')
    with open(batch_file_path, 'rb') as file:
        batch_data = pickle.load(file, encoding='bytes')
        all_data.append(batch_data[b'data'])
        all_labels.extend(batch_data[b'labels'])

# Load test batch
test_batch_file_path = os.path.join(folder_path, 'test_batch')
with open(test_batch_file_path, 'rb') as file:
    test_batch_data = pickle.load(file, encoding='bytes')
    all_data.append(test_batch_data[b'data'])
    all_labels.extend(test_batch_data[b'labels'])

# Convert the lists to NumPy arrays, cast to float, and normalize
x_train_and_test = np.concatenate(all_data, axis=0).astype(float) / 255.0
y_train_and_test = np.array(all_labels).astype(float)

# Reshape the data if needed (CIFAR-10 images are 32x32x3)
x_train_and_test = x_train_and_test.reshape((len(x_train_and_test), 32, 32, 3))

# Split into training and test sets
x_train = x_train_and_test[:-len(test_batch_data[b'labels'])]
y_train = y_train_and_test[:-len(test_batch_data[b'labels'])]
x_test = x_train_and_test[-len(test_batch_data[b'labels']):]
y_test = y_train_and_test[-len(test_batch_data[b'labels']):]

# Flatten each image in the training set
x_train_flat = x_train.reshape((len(x_train), -1))  # -1 automatically computes the size of the remaining dimensions

# Print the data types and the shape of the loaded, normalized, and flattened training data
print("Data type of x_train_flat:", x_train_flat.dtype)
print("Normalized and flattened training data shape:", x_train_flat.shape)


Data type of x_train_flat: float64
Normalized and flattened training data shape: (50000, 3072)


In [19]:
print(x_train_flat[1])

[0.60392157 0.49411765 0.41176471 ... 0.54509804 0.55686275 0.56470588]
