# Training with AutoML

Exercise:
* Generate a CSV describing the dataset and upload the file to the inputs bucket
* Create a dataset in the UI using your dataset file
* Train a model using AutoML (max 1 hour, please only do this once)
* Deploy the trained model to an endpoint
* Submit some test samples to test the endpoint

Bonus:
* Implement the training + deployment of the model with AutoML as a Vertex Pipeline (see here for an example: https://cloud.google.com/vertex-ai/docs/pipelines/build-pipeline). 

In [None]:
from dataclasses import dataclass
from typing import Iterable

from google.cloud.storage import Client

@dataclass
class DatasetItem:
    ml_use: str  # How item should be used in the model (should be TRAINING or TEST).
    url: str     # GCS url to the image (e.g. gs://<bucket>/path/to/image.jpg).
    label: str   # Label of the image (e.g. bag, sneaker).
    

ML_USE_MAP = {
    "train": "TRAINING",
    "test": "TEST"
}

def generate_items_from_bucket(bucket_name: str) -> Iterable[DatasetItem]:
    """
    Generates a list of train/test dataset items from the given bucket.
    """
    
    client = Client()

    for blob in client.list_blobs(bucket_name):
        if (blob.name.startswith("train") or blob.name.startswith("test")) and blob.name.endswith(".jpg"):
            ml_use, label, _ = blob.name.split("/")
            url = f"gs://{bucket_name}/{blob.name}"
            yield DatasetItem(ml_use=ML_USE_MAP[ml_use], url=url, label=label)
            
# Should return something like:
#   DatasetItem(ml_use='TEST', url='gs://gdd-cb-vertex-fashion-inputs/test/bag/bag0.jpg', label='bag')
next(generate_items("gdd-cb-vertex-fashion-inputs"))

In [None]:
import csv

def write_items_to_csv(dataset_items: Iterable[DatasetItem], output_path: Path):
    with open(output_path, "w", encoding="utf-8") as file_:
        writer = csv.writer(file_, quoting=csv.QUOTE_MINIMAL)

        for item in dataset_items:
            writer.writerow([item.ml_use, item.url, item.label])
            
write_items_to_csv(
    generate_items_from_bucket("gdd-cb-vertex-fashion-inputs"),
    output_path="fashion_dataset.csv"
)

In [None]:
! tail fashion_dataset.csv

In [None]:
! gsutil cp fashion_dataset.csv gs://gdd-cb-vertex-fashion-inputs