In [1]:
DATASET_URL      = "https://thor.robots.ox.ac.uk/datasets/flowers-102/102flowers.tgz"
DATASET_PATH     = "../dataset"
DATABASE_PATH    = "../database/flower.db.sqlite"
SAVED_MODEL_PATH = "./saved"


In [2]:
# Huggingface trained model: https://huggingface.co/dima806/oxford_flowers_image_detection
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-classification", model="dima806/oxford_flowers_image_detection")

# Load model directly from internet
from transformers import AutoModelForImageClassification
model = AutoModelForImageClassification.from_pretrained("dima806/oxford_flowers_image_detection")


  from .autonotebook import tqdm as notebook_tqdm





In [4]:
# Download datasets from: https://www.robots.ox.ac.uk/%7Evgg/data/flowers/102/
# import os
# import requests
import tarfile
import urllib.request
import tarfile


ftpstream = urllib.request.urlopen(DATASET_URL)
thetarfile = tarfile.open(fileobj=ftpstream, mode="r|gz")
thetarfile.extractall(DATASET_PATH)

# download_and_extract(DATASET_URL, DATASET_PATH)

In [None]:
# Show all label of this model
print(model.config.id2label)

In [None]:
# Predict/ Extract feature from the whole datasets

from PIL import Image
from pathlib import Path

data     = {}
pathlist = Path(DATASET_PATH).glob('*.jpg')

for path in pathlist:
    path_str = str(path)
    data_it = pipe(Image.open(path_str))
    data[path_str] = data_it

In [None]:
# Setup DB
import sqlite3

# conn = sqlite3.connect(':memory:')
conn = sqlite3.connect('../database/flower.db.sqlite')
c = conn.cursor()

# Create table
c.execute('''CREATE TABLE IF NOT EXISTS flower_img(pid INTEGER PRIMARY KEY AUTOINCREMENT, filename TEXT)''')
c.execute('''CREATE VIRTUAL TABLE IF NOT EXISTS flowers_vector USING fts5(pid, tokens)''')

In [None]:
# Process predict data and save to DB
import os

def process_label(in_label: str):
    return in_label.replace("flower", "").strip()

def save_img_to_db(filename, predicts):
    c.execute("INSERT INTO flower_img(filename) VALUES (?)", [filename])
    conn.commit()

    for predict in predicts:
        if predict["score"] < 0.1:
            continue
        c.execute("INSERT INTO flowers_vector(pid, tokens, rank) VALUES (?, ?, ?)",
                  [c.lastrowid, process_label(predict["label"]), predict["score"]])
        conn.commit()

for path in data:
    filename = os.path.split(path)[1]
    save_img_to_db(filename, data[path])

In [None]:
# Close database
conn.close()

In [None]:
# Save pipeline
pipe.save_pretrained(SAVED_MODEL_PATH)