# Imports

In [2]:
import os 
import sys
import jsonlines
from google.cloud import aiplatform, storage
from google.protobuf import json_format
from datetime import datetime

%env GOOGLE_APPLICATION_CREDENTIALS /media/david/warehaus1/youtube_series/proven-script.json

REGION = 'us-central1'
PROJECT_ID = 'proven-script-347020'

# initiate project
aiplatform.init(project=PROJECT_ID, location=REGION)

env: GOOGLE_APPLICATION_CREDENTIALS=/media/david/warehaus1/youtube_series/proven-script.json


# Create and import dataset

- make bucket
- upload data
- create dataset (conforms to text data preparation)

In [2]:
!gsutil mb -l us-central1 gs://text-classification-347020 

Creating gs://text-classification-347020/...


In [3]:
!gsutil cp bbc-text.csv gs://text-classification-347020

Copying file://bbc-text.csv [Content-Type=text/csv]...
\ [1 files][  4.8 MiB/  4.8 MiB]                                                
Operation completed over 1 objects/4.8 MiB.                                      


In [2]:
# USe a timestap to ensure unique names
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
src_uri = "gs://text-classification-347020/bbc-text.csv"
display_name = f"bbc-text-data-{timestamp}"

In [9]:
# create dataset
ds = aiplatform.TextDataset.create(
    display_name= display_name,
    gcs_source=src_uri,
    import_schema_uri=aiplatform.schema.dataset.ioformat.text.single_label_classification,
    sync=True
    
)

INFO:google.cloud.aiplatform.datasets.dataset:Creating TextDataset
INFO:google.cloud.aiplatform.datasets.dataset:Create TextDataset backing LRO: projects/167901156608/locations/us-central1/datasets/4506251649416691712/operations/1540783645873143808
INFO:google.cloud.aiplatform.datasets.dataset:TextDataset created. Resource name: projects/167901156608/locations/us-central1/datasets/4506251649416691712
INFO:google.cloud.aiplatform.datasets.dataset:To use this TextDataset in another session:
INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.TextDataset('projects/167901156608/locations/us-central1/datasets/4506251649416691712')
INFO:google.cloud.aiplatform.datasets.dataset:Importing TextDataset data: projects/167901156608/locations/us-central1/datasets/4506251649416691712
INFO:google.cloud.aiplatform.datasets.dataset:Import TextDataset data backing LRO: projects/167901156608/locations/us-central1/datasets/4506251649416691712/operations/3526871081543532544
INFO:google.cloud.aipl

# Develop a text classification model with automl

In [25]:
# Define Training job 
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
training_job_display_name = f"text-training-job-{timestamp}"
job = aiplatform.AutoMLTextTrainingJob(
    display_name=training_job_display_name,
    prediction_type="classification",
    multi_label=False
    )

In [26]:
# Run the Training job
model_display_name = f"text-classification-model-{timestamp}"

text_dataset = aiplatform.TextDataset("4506251649416691712")

model = job.run(
    dataset= text_dataset,
    model_display_name=model_display_name,
    training_fraction_split=0.7,
    validation_fraction_split=0.2,
    test_fraction_split=0.1,
    sync=True
)

View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/1436875330282323968?project=167901156608
AutoMLTextTrainingJob projects/167901156608/locations/us-central1/trainingPipelines/1436875330282323968 current state:
PipelineState.PIPELINE_STATE_RUNNING
AutoMLTextTrainingJob projects/167901156608/locations/us-central1/trainingPipelines/1436875330282323968 current state:
PipelineState.PIPELINE_STATE_RUNNING
AutoMLTextTrainingJob projects/167901156608/locations/us-central1/trainingPipelines/1436875330282323968 current state:
PipelineState.PIPELINE_STATE_RUNNING
AutoMLTextTrainingJob projects/167901156608/locations/us-central1/trainingPipelines/1436875330282323968 current state:
PipelineState.PIPELINE_STATE_RUNNING
AutoMLTextTrainingJob projects/167901156608/locations/us-central1/trainingPipelines/1436875330282323968 current state:
PipelineState.PIPELINE_STATE_RUNNING
AutoMLTextTrainingJob projects/167901156608/locations/us-central1/trainingPipelines/1436

KeyboardInterrupt: 

# Deploying Model

In [3]:
model = aiplatform.Model.list(filter='display_name="text-classification-model-20220430072547"')

In [5]:
model[0]

<google.cloud.aiplatform.models.Model object at 0x7fe0eb074bb0> 
resource name: projects/167901156608/locations/us-central1/models/882094198499573760

In [8]:
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
deployed_model_display_name = f"deployed-text-classification-model-{timestamp}"
endpoint = model[0].deploy(
    deployed_model_display_name=deployed_model_display_name, sync=True
)

Creating Endpoint
Create Endpoint backing LRO: projects/167901156608/locations/us-central1/endpoints/569116014629879808/operations/3537769440798048256
Endpoint created. Resource name: projects/167901156608/locations/us-central1/endpoints/569116014629879808
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/167901156608/locations/us-central1/endpoints/569116014629879808')
Deploying model to Endpoint : projects/167901156608/locations/us-central1/endpoints/569116014629879808
Deploy Endpoint model backing LRO: projects/167901156608/locations/us-central1/endpoints/569116014629879808/operations/4132244591610953728
Endpoint model deployed. Resource name: projects/167901156608/locations/us-central1/endpoints/569116014629879808
