# Google AutoML Evaluation
At the time of execution, Google AutoML was trainable through the UI, and then you can evaluate the test dataset offline as shown here.

In [None]:
from google.cloud import automl_v1beta1
from google.cloud.automl_v1beta1.proto import service_pb2
from google.protobuf.json_format import MessageToDict


import os
import sys
import pandas as pd
import numpy as np
import datetime
import pickle
import time
import requests
import json
from joblib import Parallel, delayed

# Config Params
This script assumes you already have an AutoML project set up with data uploaded and run through training. The script will then connect to AutoML and run predictions.

In [None]:
skip_sets = []  # List ones to skip here
root_data = '{PATH_TO_ROOT_DATASET_DIR}''
max_predict = 1000
project_id = '{PROJECT_NAME}'
skip_tests = []

# List of dataset name and model IDs that were trained (from rsanders2@urbn.com account)
dataset_models = {
     'fashion_mnist_10p': '{DATASET_API_KEY}',    
     'cifar10_20p': '{DATASET_API_KEY}',    
     'uo_dress': '{DATASET_API_KEY}',
     'fashion_mnist_tiny': '{DATASET_API_KEY}',    
     'cifar10_tiny': '{DATASET_API_KEY}',    
     'uo_dress_tiny': '{DATASET_API_KEY}',  
     'mnist_tiny': '{DATASET_API_KEY}',
    }

# Prediction API Helper Function

In [None]:
def get_prediction(file_path, project_id, model_id):
    prediction_client = automl_v1beta1.PredictionServiceClient()
    with open(file_path, 'rb') as ff:
        content = ff.read()

    name = 'projects/{}/locations/us-central1/models/{}'.format(project_id, model_id)
    payload = {
        'image': {
            'image_bytes': content
        }
    }
    params = {'score_threshold': '0'}
    request = prediction_client.predict(name, payload, params)
    return request  # waits till request is returned

# Function to evaluate dataset using AutoML model given above params

In [None]:
def evaluate(dataset_path, project_id, model_id):
    
    d = os.path.basename(dataset_path)
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    project_name = '{}_{}'.format(d, timestamp)    
    
    # parse data
    labels = pd.read_csv(os.path.join(dataset_path, 'labels.csv'), header=None, dtype=str)
    train_files = labels[labels[0].str.contains('(train|val)')].values
    test_files = labels[labels[0].str.contains('test')].values    
    
    y_train = train_files[:,1]
    class_labels = np.sort(np.unique(y_train))
    
    # Assign ground truth for test
    y_true = test_files[:,1]

    # Load in files for prediction 
    y_pred = []
    scores = []
    files = {}
    test_filenames = test_files[:, 0]
    test_filenames = [os.path.join(dataset_path, c) for c in test_filenames]
    n_pred = len(test_filenames)
    print('{} prediction files found.'.format(n_pred))
    
    # Clean up sizing
    if n_pred > max_predict:
        print('Number of prediction files ({}) exceeds maximum ({}). sampling by class down to max.'.format(n_pred, max_predict))
        inds=np.random.choice(n_pred, max_predict, replace=False)
        y_true = y_true[inds]
        test_filenames = np.array(test_filenames)[inds].tolist() 
        print("New length: {}".format(len(test_filenames)))
    
    # Worker for predictions
    def predict_worker(filename):
        basename = os.path.basename(filename)      
        t_pred = None
        t_scores = None         
        try:
            p = get_prediction(filename, project_id, model_id)
            out = MessageToDict(p)
            pred_set = out['payload']          
            
            # Get sorted scores from prediction set           
            t_names = np.array([z['displayName'] for z in pred_set])
            t_scores = np.array([z['classification']['score'] for z in pred_set])
            sort_inds = np.argsort(t_names)
            t_names = t_names[sort_inds]
            t_scores = t_scores[sort_inds]
            t_names = t_names[1::] # remove --other--
            t_scores = t_scores[1::] # remove --other--
            t_pred = t_names[np.argmax(t_scores)]        
        except:
            print('Error occured on prediction: {}. Skipping save.'.format(filename))

        return basename, t_pred, t_scores

    # Run parallel calls to make faster
    t_start = time.time()
    with Parallel(n_jobs=-1, verbose=5) as parallel:
        results = parallel(delayed(predict_worker)(f) for f in test_filenames)
    t_elapsed = time.time() - t_start
    print(len(results))
    print('{:0.3f} secs elapsed for predicting {} images'.format(t_elapsed, len(test_filenames)))

    # Parse the parallel output
    returned_files = [r[0] for r in results]
    y_pred = [r[1] for r in results]
    scores = [r[2] for r in results]
    scores = np.array(scores)

    print('Number of predictions: {}'.format(len(y_pred)))
    print('Number of fails: {}.'.format(sum([yy is None for yy in y_pred])))

    #
    # Save results
    #
    save_file = '{}-results.p'.format(project_name)
    save_dict = {
        'y_true': y_true,
        'y_pred': y_pred,
        'scores': scores,
        'class_labels': class_labels,
        'model_name': project_name,
        'model': None,
        'train_files': train_files,
        'test_files': test_files,
        'returned_files': returned_files
        }
    with open(save_file, 'wb') as f:
        pickle.dump(save_dict, f)
    print('Saved to {}'.format(save_file))

    return save_file

# Loop through datasets and run AutoML evaluation

In [None]:
for d,model_id in dataset_models.items():
    if d in skip_sets:
        continue
        
    # Get dataset key and directory
    dataset_path = os.path.join(root_data, d)
    print('EXECUTING DATASET: {}'.format(dataset_path))    
    
    # Run mega routine
    evaluate(dataset_path, project_id, model_id)
    
    # Output
    print('Done!')