# Test reloading saved model and use for inference


In [4]:
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load finetuned model

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"

#MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
MODEL_NAME = "benjaminzwhite/phi-3-mini-4k-instruct-ABSA-QUAD"

model = AutoModelForCausalLM.from_pretrained( 
    MODEL_NAME,  
    device_map=device,  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.31k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/455 [00:00<?, ?B/s]

In [13]:
generation_template = """<|system|>
You are a computer program who only replies with valid JSON lists.<|end|>
<|user|>
Perform a full aspect-based sentiment analysis of the following restaurant review:

{review_text}
<|end|>
<|assistant|>
"""

In [15]:
example_review = "This place's burgers are the absolute best in town, and even though the service is incredibly slow I'd definitely come back - I want to try the tomato sauce that my friend had which looked delicious!"

In [16]:
# toggle Return full text NO
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=500, return_full_text=False) 

result = pipe(generation_template.format(review_text=example_review))

print(result[0]['generated_text'])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


 [{'opinion term': 'burgers', 'aspect category': 'food quality','sentiment': 'positive', 'justification': 'best'},{'opinion term':'service', 'aspect category':'service general','sentiment': 'negative', 'justification':'slow'},{'opinion term': 'tomato sauce', 'aspect category': 'food quality','sentiment': 'positive', 'justification': 'delicious'}]


# Compare with baseline phi (non finetuned)

- note that at the moment the finetuning process is very simple, only 2000 SFT examples 1 epoch etc
- let's see if baseline phi3 understands output requirement etc

In [17]:
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
#MODEL_NAME = "benjaminzwhite/phi-3-mini-4k-instruct-ABSA-QUAD"

model = AutoModelForCausalLM.from_pretrained( 
    MODEL_NAME,  
    device_map=device,  
    torch_dtype="auto",  
    trust_remote_code=True,  
) 

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [19]:
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=500, return_full_text=False) 

result = pipe(generation_template.format(review_text=example_review))

print(result[0]['generated_text'])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


 ```json

{

  "aspects": [

    {

      "aspect": "burgers",

      "sentiment": "positive",

      "sentiment_justification": "The reviewer describes the burgers as the 'absolute best in town', indicating a very positive sentiment."

    },

    {

      "aspect": "service",

      "sentiment": "negative",

      "sentiment_justification": "The service is described as 'incredibly slow', which is a negative sentiment."

    },

    {

      "aspect": "returning",

      "sentiment": "positive",

      "sentiment_justification": "Despite the slow service, the reviewer states they 'would definitely come back', showing a positive sentiment towards the overall experience."

    },

    {

      "aspect": "tomato sauce",

      "sentiment": "positive",

      "sentiment_justification": "The reviewer expresses a desire to try the tomato sauce because it 'looked delicious', which is a positive sentiment."

    }

  ]

}

```
