In [5]:
pip install datasets

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [6]:
from datasets import load_dataset

dataset = load_dataset("zeroshot/twitter-financial-news-sentiment")

In [7]:
# split the dataset into training and evaluation set
train_dataset = dataset['train']
# randomize the training set
train_dataset = train_dataset.shuffle(seed=42)


eval_dataset = dataset['validation']

print('Number of training samples: ', len(train_dataset))
print('Number of evaluation samples: ', len(eval_dataset))

Number of training samples:  9543
Number of evaluation samples:  2388


In [8]:
LABEL = ["Bearish","Bullish","Neutral"]

In [9]:
prompt_text = "Please categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral" 
def json_dataset(dataset):
    record = []
    for index in range(len(dataset)):
        single_prompt_record = ({"instruction": prompt_text,"context": train_dataset[index]["text"], "response": LABEL[int(dataset[index]["label"])]})
        # Add this list as a new element in record
        record.append(single_prompt_record)
    return record

In [10]:
eval_dataset[0]

{'text': '$ALLY - Ally Financial pulls outlook https://t.co/G9Zdi1boy5',
 'label': 0}

In [12]:
eval_record = json_dataset(eval_dataset)
eval_record[0]

{'instruction': 'Please categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral',
 'context': 'Stocks - Tiffany, Disney, Tesla Rise Premarket; Uber Falls',
 'response': 'Bearish'}

In [16]:
import json

template = {
    "prompt": "Below is an instruction that describes a task, paired with an input that provides further context. "
    "Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{instruction}\n\n### Input:\n{context}\n\n",
    "completion": " {response}",
}

{'inputs': 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nStocks - Tiffany, Disney, Tesla Rise Premarket; Uber Falls\n\n\n\n### Response:\n', 'parameters': {'max_new_tokens': 100}}


In [68]:
payloads = {'inputs': 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nStocks - Tiffany, Disney, Tesla Rise Premarket; Uber Falls\n\n\n\n### Response:\n', 'parameters': {'max_new_tokens': 100}}

### Pretrained model without fine-tuning 

In [69]:
from sagemaker import Predictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

# Replace 'my-endpoint' with your endpoint name
llama2_predictor = Predictor(endpoint_name='meta-textgeneration-llama-2-7b-2023-12-04-09-49-26-731',
                             serializer=JSONSerializer(),
                             deserializer=JSONDeserializer())

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [70]:
pretrained_response = llama2_predictor.predict(payloads)

In [71]:
pretrained_response[0]

{'generated_text': '\n\n\n### Explanation:\n\n\n### Hints:\n\n\n### Notes:\n'}

In [72]:
### Fine-tuned model 

In [73]:
# Replace 'my-endpoint' with your endpoint name
llama2_finetuned_predictor = Predictor(endpoint_name='meta-textgeneration-llama-2-7b-2023-12-05-07-40-44-906',
                             serializer=JSONSerializer(),
                             deserializer=JSONDeserializer())

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [75]:
finetuned_response = llama2_finetuned_predictor.predict(payloads)

In [76]:
finetuned_response

[{'generated_text': 'Bullish'}]

In [77]:
inputs, ground_truth_responses, responses_before_finetuning, responses_after_finetuning = (
    [],
    [],
    [],
    [],
)


In [80]:
def predict_and_print(datapoint):
    # For instruction fine-tuning, we insert a special key between input and output
    input_output_demarkation_key = "\n\n### Response:\n"

    payload = {
        "inputs": template["prompt"].format(
            instruction=datapoint["instruction"], context=datapoint["context"]
        )
        + input_output_demarkation_key,
        "parameters": {"max_new_tokens": 100},
    }
    inputs.append(payload["inputs"])
    ground_truth_responses.append(datapoint["response"])
    
    pretrained_response = llama2_predictor.predict(payload)
    responses_before_finetuning.append(pretrained_response[0]["generated_text"])

    finetuned_response = llama2_finetuned_predictor.predict(payload)
    responses_after_finetuning.append(finetuned_response[0]["generated_text"])

In [81]:
predict_and_print(eval_record[0])

In [86]:
import pandas as pd
from IPython.display import display, HTML


try:
    for i, datapoint in enumerate(eval_record[10:20]):
        predict_and_print(datapoint)

    df = pd.DataFrame(
        {
            "Inputs": inputs,
            "Ground Truth": ground_truth_responses,
            "Response from non-finetuned model": responses_before_finetuning,
            "Response from fine-tuned model": responses_after_finetuning,
        }
    )
    display(HTML(df.to_html()))
except Exception as e:
    print(e)

Unnamed: 0,Inputs,Ground Truth,Response from non-finetuned model,Response from fine-tuned model
0,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nStocks - Tiffany, Disney, Tesla Rise Premarket; Uber Falls\n\n\n\n### Response:\n",Bearish,\n\n\n### Explanation:\n\n\n### Hints:\n\n\n### Notes:\n,Bullish
1,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nStocks - Tiffany, Disney, Tesla Rise Premarket; Uber Falls\n\n\n\n### Response:\n",Bearish,\n\n\n### Explanation:\n\n\n### Hints:\n\n\n### Notes:\n,Bullish
2,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nStocks - Tiffany, Disney, Tesla Rise Premarket; Uber Falls\n\n\n\n### Response:\n",Bearish,\n\n\n### Explanation:\n\n\n### Hints:\n\n\n### Notes:\n,Bullish
3,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nLeafly Jobs Report: Cannabis is the fastest-growing American industry, surpassing 240,000 jobs\n\n\n\n### Response:\n",Bearish,\n\n\n### Explanation:\n\n\n### Hints:\n\n\n### Notes:\n,Bullish
4,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nAdded $STNE , $GSX and $BKE to the #PowerEarningsGap list \n\nThese 3 names look very promising going forward! Look f… https://t.co/yABsTNVDIO\n\n\n\n### Response:\n",Bearish,"\n\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nAdded $STNE , $GSX and $BKE to the #PowerEarningsGap list \n\nThese 3 names look very promising going forward! Look f… https://t.co/yABsTNVDIO\n\n\n\n",Neutral
5,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nDigital Realty Trust declares $1.08 dividend\n\n\n\n### Response:\n",Bearish,"Bearish\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nDigital Realty Trust declares $1.08 dividend\n\n\n\n### Response:\nBullish\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neut",Neutral
6,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nHere's What MSA Safety Incorporated's (NYSE:MSA) ROCE Can Tell Us\n\n\n\n### Response:\n",Bearish,"\nBearish\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nHere's What MSA Safety Incorporated's (NYSE:MSA) ROCE Can Tell Us\n\n\n\n### Response:\n\nBullish\n\n### Instruction:\nPlease categorize the following Twitter financial news",Neutral
7,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nStocks - Tiffany, Disney, Tesla Rise Premarket; Uber Falls\n\n\n\n### Response:\n",Bearish,\n\n\n### Explanation:\n\n\n### Hints:\n\n\n### Notes:\n,Bullish
8,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nLeafly Jobs Report: Cannabis is the fastest-growing American industry, surpassing 240,000 jobs\n\n\n\n### Response:\n",Bearish,\n\n\n### Explanation:\n\n\n### Hints:\n\n\n### Notes:\n,Bullish
9,"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nAdded $STNE , $GSX and $BKE to the #PowerEarningsGap list \n\nThese 3 names look very promising going forward! Look f… https://t.co/yABsTNVDIO\n\n\n\n### Response:\n",Bearish,"\n\n\n### Instruction:\nPlease categorize the following Twitter financial news into one of these three categories: Bearish, Bullish, or Neutral\n\n### Input:\nAdded $STNE , $GSX and $BKE to the #PowerEarningsGap list \n\nThese 3 names look very promising going forward! Look f… https://t.co/yABsTNVDIO\n\n\n\n",Neutral
