In [7]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
from google.cloud import storage
from io import StringIO
import pandas as pd
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

def get_df_from_gcs_blob(blob, bucket='recipe-data-bucket'):
    # START: COPIED FROM https://github.com/googleapis/python-storage/blob/HEAD/samples/snippets/storage_fileio_write_read.py
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket)

    blob = bucket.blob(blob)
    blob = blob.download_as_string()
    blob = blob.decode()
    blob = StringIO(blob)  #tranform bytes to string here
    df = pd.read_csv(blob)
    return df
    # END: COPIED FROM https://github.com/googleapis/python-storage/blob/HEAD/samples/snippets/storage_fileio_write_read.py

Demo to view results on train/test data.

In [9]:
# CODE NOT COPIED BUT INSPIRED FROM IPYWIDGETS DOCUMENTATION
mod_path_w = widgets.Text(
            value=None,
            placeholder='Enter local path to model and tokenizer.',
            description='MOD PATH:',
            disabled=False   
          )

df_path_w = widgets.Dropdown(
            options=['train_only_cal.csv', 'test_only_cal.csv', 'train.csv', 'test.csv', 'train_all_nutrition.csv', 'test_all_nutrition.csv', 'train_only_cal_final_results.csv', 'test_only_cal_final_results.csv'],
            value='train_only_cal_final_results.csv',
            description='DATA PATH:',
          )

device_w = widgets.Dropdown(
            options=['cuda', 'cpu'],
            value='cpu',
            description='DEVICE:',
          )

i_w = widgets.IntSlider(
      value=0,
      min=0,
      max=10,
      step=1,
      description='DATA INDEX:',
      disabled=False,
      continuous_update=False,
      orientation='horizontal',
      readout=True,
      readout_format='d'
    )

num_beams_w = widgets.IntSlider(
      value=1,
      min=0,
      max=5,
      step=1,
      description='# BEAMS:',
      disabled=False,
      continuous_update=False,
      orientation='horizontal',
      readout=True,
      readout_format='d'
    )

@interact(mod_path=mod_path_w, df_path=df_path_w, i=i_w, num_beams=num_beams_w, device=device_w)
def run_inference(mod_path, df_path, i, num_beams, device):
    DEVICE = device
    MOD_PATH = mod_path
    
    df = get_df_from_gcs_blob(df_path)
    i_w.max = df.shape[0] - 1
    
    if MOD_PATH and len(df.columns) == 2:
      base_model = T5ForConditionalGeneration.from_pretrained('t5-small').to(DEVICE)
      model = T5ForConditionalGeneration.from_pretrained(MOD_PATH).to(DEVICE)
      tokenizer = T5Tokenizer.from_pretrained(MOD_PATH)

      inp = df.iloc[i]['input']
      target = df.iloc[i]['output']
      input_ids = tokenizer(inp, return_tensors="pt").input_ids.to(DEVICE)
      outputs = model.generate(input_ids, max_length=df['output'].map(len).max(), num_beams=num_beams)
      base_outputs = base_model.generate(input_ids, max_length=df['output'].map(len).max(), num_beams=num_beams)
      out = tokenizer.decode(outputs[0], skip_special_tokens=True)
      base_out = tokenizer.decode(base_outputs[0], skip_special_tokens=True)
      with pd.option_context('display.max_colwidth', None):
        display(pd.Series({'Input': inp, 'Target': target, 'T5 Fine-Tuned Generation': out, 'T5 OOB Generation': base_out}).to_frame().rename(columns={0: 'Example'}))
    
    else:
       with pd.option_context('display.max_colwidth', None):
        display(pd.Series({'Input': df.iloc[i]['input'], 'Target': df.iloc[i]['output_gt'], 'T5 Fine-Tuned Generation': df.iloc[i][f'output_t5_b{num_beams}'], 'T5 OOB Generation': df.iloc[i][f'output_t5_oob_b{num_beams}'], 'GPT Generation': df.iloc[i]['output_gpt']}).to_frame().rename(columns={0: 'Example'}))
    

interactive(children=(Text(value='', description='MOD PATH:', placeholder='Enter local path to model and token…

Demo for custom input.

In [6]:
input_w = widgets.Text(
            value=None,
            placeholder='Enter input here.',
            description='INPUT:',
            disabled=False   
          )

mod_path_w = widgets.Text(
            value=None,
            placeholder='Enter local path to model and tokenizer.',
            description='MOD PATH:',
            disabled=False   
          )

device_w = widgets.Dropdown(
            options=['cuda', 'cpu'],
            value='cpu',
            description='DEVICE:',
          )


num_beams_w = widgets.IntSlider(
      value=1,
      min=0,
      max=5,
      step=1,
      description='# BEAMS:',
      disabled=False,
      continuous_update=False,
      orientation='horizontal',
      readout=True,
      readout_format='d'
    )

max_len_w = widgets.IntSlider(
      value=20,
      min=1,
      max=200,
      step=1,
      description='MAX LEN:',
      disabled=False,
      continuous_update=False,
      orientation='horizontal',
      readout=True,
      readout_format='d'
    )

@interact(mod_path=mod_path_w, inp=input_w, num_beams=num_beams_w, device=device_w, max_len=max_len_w)
def run_inference(mod_path, inp, num_beams, device, max_len):
    DEVICE = device
    MOD_PATH = mod_path
    
    if MOD_PATH and inp:
      base_model = T5ForConditionalGeneration.from_pretrained('t5-small').to(DEVICE)
      model = T5ForConditionalGeneration.from_pretrained(MOD_PATH).to(DEVICE)
      tokenizer = T5Tokenizer.from_pretrained(MOD_PATH)

      input_ids = tokenizer(inp, return_tensors="pt").input_ids.to(DEVICE)
      outputs = model.generate(input_ids, max_length=max_len, num_beams=num_beams)
      base_outputs = base_model.generate(input_ids, max_length=max_len, num_beams=num_beams)
      out = tokenizer.decode(outputs[0], skip_special_tokens=True)
      base_out = tokenizer.decode(base_outputs[0], skip_special_tokens=True)
      with pd.option_context('display.max_colwidth', None):
        display(pd.Series({'Input': inp, 'T5 Fine-Tuned Generation': out, 'T5 OOB Generation': base_out}).to_frame().rename(columns={0: 'Example'}))

interactive(children=(Text(value='', description='MOD PATH:', placeholder='Enter local path to model and token…