In [2]:
import tensorflow.keras as keras

In [3]:
(xtrain, ytrain), (xtest,ytest) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [6]:
print(xtrain.shape)
print(ytrain.shape)
print(xtest.shape)
print(ytest.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [11]:
xtrain = xtrain.astype('float32')/255
xtest = xtest.astype('float32')/255

In [12]:
import sqlite3

In [13]:
conn = sqlite3.connect('fashion_mnist.db')

In [14]:
conn.execute('''CREATE TABLE IF NOT EXISTS images
             (id INTEGER PRIMARY KEY AUTOINCREMENT,
             image BLOB NOT NULL, label INTEGER NOT NULL);''')

<sqlite3.Cursor at 0x7fecb6faeab0>

In [15]:
for i in range(xtrain.shape[0]):
    conn.execute('INSERT INTO images (image, label) VALUES (?, ?)',
                [sqlite3.Binary(xtrain[i]), ytrain[i]])
    
conn.commit()

In [16]:
for i in range(xtest.shape[0]):
    conn.execute('INSERT INTO images (image, label) VALUES (?, ?)',
                [sqlite3.Binary(xtest[i]), xtest[i]])
    
conn.commit()
conn.close()

In [17]:
# The first line imports a library called sqlite3, which allows us to work with SQLite databases in Python;
# We then create a connection to the database;
# Next, we create a table in the database called “images”;
# We use a loop to loop through each image in the training data and insert it into the “images” table (along with the labels);
# We use the commit() method to save the changes we made to the database;
# We then use another loop to loop through each image in the test data and insert it into the “images” table (along with the labels);
# We use the commit() method again to save the changes we made to the database;
# Finally, we close the connection to the data

In [18]:
# So this is how we can create a Data ETL pipeline using Python. Our ETL pipeline takes the 
# Fashion MNIST dataset and stores it in an SQLite database so that we can easily access and manipulate 
# the data later.

In [19]:
import sqlite3

In [20]:
conn = sqlite3.connect('fashion_mnist.db')
cursor = conn.cursor()

In [21]:
cursor.execute('SELECT * FROM images')
rows = cursor.fetchall()

In [22]:
import pandas as pd

In [23]:
data = pd.read_sql_query('SELECT * FROM images', conn)

In [25]:
data.head()

Unnamed: 0,id,image,label
0,1,"b""\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...",b'\t'
1,2,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...,b'\x00'
2,3,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...,b'\x00'
3,4,"b""\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...",b'\x03'
4,5,b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00...,b'\x00'
