In [2]:
import pandas as pd

tweets = pd.read_csv('data/train.csv', encoding='unicode_escape')
tweets.shape

(27481, 10)

In [3]:
def extract_text_for_llm(tweet_arr):
    tweet_txt = tweet_arr[0][1]

    tweet_info = f'''The tweet has the following information:\n
Tweet Content:\n{tweet_txt}'''

    return tweet_info

# for providing N-shot examples for the LLM in the prompt
def extract_sentiment_for_llm(tweet_arr):
    relevant_tokens = tweet_arr[0][2]
    sentiment = tweet_arr[0][3]

    tweet_sentiment = f'''Sentiment for the above tweet is:\n[{sentiment}]\n
Relevant text from the original tweet that leads to the conclusion that the sentiment in the tweet is [{sentiment}] are:\n[{relevant_tokens}]\n\n
Final answer: {sentiment}'''

    return tweet_sentiment

In [4]:
sample = tweets.sample(1).values
sample

array([['17e8450ef0', 'Tomorrow is house shopping...',
        'Tomorrow is house shopping...', 'neutral', 'noon', '60-70',
        "Côte d'Ivoire", 26378274, 318000.0, 83]], dtype=object)

In [8]:
sample_tweet_content = extract_text_for_llm(sample)
print(sample_tweet_content, end='\n\n')

sample_tweet_sentiment = extract_sentiment_for_llm(sample)
print(sample_tweet_sentiment)

The tweet has the following information:

Tweet Content:
Tomorrow is house shopping...

Sentiment for the above tweet is:
[neutral]

Relevant text from the original tweet that leads to the conclusion that the sentiment in the tweet is [neutral] are:
[Tomorrow is house shopping...]


Final answer: neutral


In [17]:
system_prompt = f'''You are an expert at linguistic analysis \
and your area of expertise is interpreting the sentiment of \
texts from tweets published online. You are very intelligent and \
are extremely good at your job. 

You task will go as follows: you will be provided \
with some information containing the text from a tweet.\
You will analyze it expertly and respond with 2 pieces of information.

First, the sentiment of the tweet. It could be \
one of three options: [positive, negative, neutral].\

Secondly, you will point out the words \
from the original tweet that supports that sentiment inference \
in your expert and accurate analysis.'''

one_shot_example = f'''\n\nFor example:-\n
{sample_tweet_content}\n\n\
{sample_tweet_sentiment}'''

one_shot = True 

if one_shot:
    system_prompt = system_prompt + one_shot_example

In [18]:
print(system_prompt)

You are an expert at linguistic analysis and your area of expertise is interpreting the sentiment of texts from tweets published online. You are very intelligent and are extremely good at your job. 

You task will go as follows: you will be provided with some information containing the text from a tweet.You will analyze it expertly and respond with 2 pieces of information.

First, the sentiment of the tweet. It could be one of three options: [positive, negative, neutral].
Secondly, you will point out the words from the original tweet that supports that sentiment inference in your expert and accurate analysis.

For example:-

The tweet has the following information:

Tweet Content:
Tomorrow is house shopping...

Sentiment for the above tweet is:
[neutral]

Relevant text from the original tweet that leads to the conclusion that the sentiment in the tweet is [neutral] are:
[Tomorrow is house shopping...]


Final answer: neutral


In [20]:
from llama_cpp import Llama

mpath = r'C:/Users/chinm/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf'

llm = Llama(model_path=mpath, system_prompt=system_prompt)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from C:/Users/chinm/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u

In [19]:
test_sample = tweets.sample(1).values
test_sample

array([['b4024682ca',
        '  hope you guys are having fun!  Can`t wait for ya`ll to be back, Wilmy isn`t the same without you',
        'g fun', 'positive', 'noon', '60-70', 'Chad', 16425864,
        1259200.0, 13]], dtype=object)

In [22]:
test_tweet_content = extract_text_for_llm(test_sample)
print(test_tweet_content, end='\n\n')

test_tweet_sentiment = extract_sentiment_for_llm(test_sample)
print(test_tweet_sentiment)

The tweet has the following information:

Tweet Content:
  hope you guys are having fun!  Can`t wait for ya`ll to be back, Wilmy isn`t the same without you

Sentiment for the above tweet is:
[positive]

Relevant text from the original tweet that leads to the conclusion that the sentiment in the tweet is [positive] are:
[g fun]


Final answer: positive


In [26]:
question_prompt = system_prompt + \
    '\n---------------------\n' +\
    'Now, take a deep breath and answer this for the following tweet.\n' +\
        test_tweet_content

output = llm(
    question_prompt, # Prompt
    max_tokens= None, # Generate up to [max_tokens] tokens, set to None to generate up to the end of the context window
    echo=True, # Echo the prompt back in the output
    temperature=0 # The temperature parameter (higher for more "creative" or "out of distribution" output)
    )

Llama.generate: prefix-match hit

llama_print_timings:        load time =    9374.52 ms
llama_print_timings:      sample time =      16.43 ms /    84 runs   (    0.20 ms per token,  5112.91 tokens per second)
llama_print_timings: prompt eval time =   21306.93 ms /   289 tokens (   73.73 ms per token,    13.56 tokens per second)
llama_print_timings:        eval time =   18553.91 ms /    83 runs   (  223.54 ms per token,     4.47 tokens per second)
llama_print_timings:       total time =   40115.72 ms /   372 tokens


In [27]:
output

{'id': 'cmpl-07ad5dd6-4d69-4751-b00b-040d0a72f6e9',
 'object': 'text_completion',
 'created': 1714526952,
 'model': 'C:/Users/chinm/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf',
 'choices': [{'text': 'You are an expert at linguistic analysis and your area of expertise is interpreting the sentiment of texts from tweets published online. You are very intelligent and are extremely good at your job. \n\nYou task will go as follows: you will be provided with some information containing the text from a tweet.You will analyze it expertly and respond with 2 pieces of information.\n\nFirst, the sentiment of the tweet. It could be one of three options: [positive, negative, neutral].\nSecondly, you will point out the words from the original tweet that supports that sentiment inference in your expert and accurate analysis.\n\nFor example:-\n\nThe tweet has the following information:\n\nTweet Content:\nTomorrow is house shopping...\n\nSentimen

In [28]:
print(output['choices'][0]['text'])

You are an expert at linguistic analysis and your area of expertise is interpreting the sentiment of texts from tweets published online. You are very intelligent and are extremely good at your job. 

You task will go as follows: you will be provided with some information containing the text from a tweet.You will analyze it expertly and respond with 2 pieces of information.

First, the sentiment of the tweet. It could be one of three options: [positive, negative, neutral].
Secondly, you will point out the words from the original tweet that supports that sentiment inference in your expert and accurate analysis.

For example:-

The tweet has the following information:

Tweet Content:
Tomorrow is house shopping...

Sentiment for the above tweet is:
[neutral]

Relevant text from the original tweet that leads to the conclusion that the sentiment in the tweet is [neutral] are:
[Tomorrow is house shopping...]


Final answer: neutral
---------------------
Now, take a deep breath and answer this