In [1]:
import pandas as pd

tweets = pd.read_csv('data/train.csv', encoding='unicode_escape')
tweets.shape

(27481, 10)

Pre-processing Tweet Text

In [2]:
def extract_text_for_llm(tweet_arr):
    tweet_txt = tweet_arr[0][1]

    tweet_info = f'''Tweet Content: {tweet_txt}'''

    return tweet_info

def extract_sentiment_for_llm(tweet_arr):
    '''for providing N-shot examples for the LLM in the prompt'''
    
    # relevant_tokens = tweet_arr[0][2]
    sentiment = tweet_arr[0][3]

    tweet_sentiment = f'''Sentiment: [{sentiment}]'''

    return tweet_sentiment

In [3]:
positive_sample = tweets[tweets.sentiment == "positive"].sample(1).values
print(positive_sample, end='\n\n')

negative_sample = tweets[tweets.sentiment == "negative"].sample(1).values
print(negative_sample, end='\n\n')

neutral_sample = tweets[tweets.sentiment == "neutral"].sample(1).values
print(neutral_sample)

[['77a409775e'
  'wooo am recovering from running race for life yest!!! i managed 36 mins 44secs, not bad for absolutley no trianing'
  'not bad fo' 'positive' 'noon' '21-30' 'Myanmar (formerly Burma)'
  54409800 653290.0 83]]

[['8fd6a28145'
  ' it`s tongue in cheek of some mentalities.There is another one which takes the piss of arabs, but its too long'
  '.There is another one which takes the piss of arabs,' 'negative'
  'morning' '46-60' 'Mali' 20250833 1220190.0 17]]

[['714d7c7a34'
  'On train with at least two gaggles of teenagers sitting & the commuters squished standing in the back...at least the teenagers let me sit'
  'On train with at least two gaggles of teenagers sitting & the commuters squished standing in the back...at least the teenagers let me sit'
  'neutral' 'morning' '0-20' 'Singapore' 5850342 700.0 8358]]


Creating the System Prompt

In [4]:
system_prompt = f'''You are an expert at linguistic analysis \
and your area of expertise is interpreting the sentiment of \
texts from tweets published online. You are very intelligent and \
are extremely good at your job. 

You task will go as follows: you will be provided \
with some information containing the text from a tweet.\
You will analyze it expertly and respond with 2 pieces of information.

First, the sentiment of the tweet. It could be \
one of three options: [positive, negative, neutral].\

Secondly, you will point out the words \
from the original tweet that supports that sentiment inference \
in your expert and accurate analysis.'''

1-shot example from each sentiment class in the Prompt

In [5]:
system_prompt = system_prompt + "\n\n3 examples, one from each class are given below:-\n"
    
n_shot_examples = [positive_sample, negative_sample, neutral_sample]

for sample in n_shot_examples:

    sample_tweet_content = extract_text_for_llm(sample)
    sample_tweet_sentiment = extract_sentiment_for_llm(sample)

    one_shot_example = f'''\n{sample_tweet_content}\n\n\
{sample_tweet_sentiment}\n'''
    
    system_prompt = system_prompt + one_shot_example + "\n"

system_prompt = system_prompt +\
'''\nNow, you are going to be given information for a single tweet.\n'''

In [6]:
print(system_prompt)

You are an expert at linguistic analysis and your area of expertise is interpreting the sentiment of texts from tweets published online. You are very intelligent and are extremely good at your job. 

You task will go as follows: you will be provided with some information containing the text from a tweet.You will analyze it expertly and respond with 2 pieces of information.

First, the sentiment of the tweet. It could be one of three options: [positive, negative, neutral].
Secondly, you will point out the words from the original tweet that supports that sentiment inference in your expert and accurate analysis.

3 examples, one from each class are given below:-

Tweet Content: wooo am recovering from running race for life yest!!! i managed 36 mins 44secs, not bad for absolutley no trianing

Sentiment: [positive]


Tweet Content:  it`s tongue in cheek of some mentalities.There is another one which takes the piss of arabs, but its too long

Sentiment: [negative]


Tweet Content: On train w

In [7]:
def get_prediction(prompt):
    return prompt.split('[')[-1].split(']')[0]

get_prediction(system_prompt)

'neutral'

Loading LLM Model (mistral-7b-instruct-v0.1.Q5_K_M.gguf)

In [8]:
from llama_cpp import Llama

mpath = r'C:/Users/chinm/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf'

llm = Llama(
        model_path=mpath, # model path
        system_prompt=system_prompt, # system prompt
        n_ctx=2048, # model context
        n_gpu_layers=-1 # numper of layers to offload for GPU acceleration 
        )

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from C:/Users/chinm/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u

In [9]:
test_sample = tweets.sample(1).values
test_sample

array([['ca77a6fba2',
        'had an answered prayer which caught me by surprise. LORD, you are amazing!',
        'amazing!', 'positive', 'night', '31-45', 'Equatorial Guinea',
        1402985, 28050.0, 50]], dtype=object)

In [10]:
test_tweet_content = extract_text_for_llm(test_sample)
print(test_tweet_content, end='\n\n')

test_tweet_sentiment = extract_sentiment_for_llm(test_sample)
print(test_tweet_sentiment)

Tweet Content: had an answered prayer which caught me by surprise. LORD, you are amazing!

Sentiment: [positive]


In [11]:
question_prompt = system_prompt + test_tweet_content + \
'''\nPredict the sentiment class from positive/negative/neutral.
Do not continue after making the prediction for this single tweet.\n
Sentiment for '''

output = llm(
    question_prompt, # Prompt
    max_tokens= 50, # Generate up to [max_tokens] tokens, set to None to generate up to the end of the context window
    echo=True, # Echo the prompt back in the output
    temperature=0, # The temperature parameter (higher for more "creative" or "out of distribution" output)
    )


llama_print_timings:        load time =   19228.15 ms
llama_print_timings:      sample time =       1.86 ms /     9 runs   (    0.21 ms per token,  4843.92 tokens per second)
llama_print_timings: prompt eval time =   19227.82 ms /   363 tokens (   52.97 ms per token,    18.88 tokens per second)
llama_print_timings:        eval time =    1598.62 ms /     8 runs   (  199.83 ms per token,     5.00 tokens per second)
llama_print_timings:       total time =   20854.83 ms /   371 tokens


In [12]:
output

{'id': 'cmpl-eed2aec7-479d-4709-aa4a-a7761af32831',
 'object': 'text_completion',
 'created': 1715923385,
 'model': 'C:/Users/chinm/.cache/lm-studio/models/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf',
 'choices': [{'text': 'You are an expert at linguistic analysis and your area of expertise is interpreting the sentiment of texts from tweets published online. You are very intelligent and are extremely good at your job. \n\nYou task will go as follows: you will be provided with some information containing the text from a tweet.You will analyze it expertly and respond with 2 pieces of information.\n\nFirst, the sentiment of the tweet. It could be one of three options: [positive, negative, neutral].\nSecondly, you will point out the words from the original tweet that supports that sentiment inference in your expert and accurate analysis.\n\n3 examples, one from each class are given below:-\n\nTweet Content: wooo am recovering from running race for life yest

In [13]:
print(output['choices'][0]['text'])

You are an expert at linguistic analysis and your area of expertise is interpreting the sentiment of texts from tweets published online. You are very intelligent and are extremely good at your job. 

You task will go as follows: you will be provided with some information containing the text from a tweet.You will analyze it expertly and respond with 2 pieces of information.

First, the sentiment of the tweet. It could be one of three options: [positive, negative, neutral].
Secondly, you will point out the words from the original tweet that supports that sentiment inference in your expert and accurate analysis.

3 examples, one from each class are given below:-

Tweet Content: wooo am recovering from running race for life yest!!! i managed 36 mins 44secs, not bad for absolutley no trianing

Sentiment: [positive]


Tweet Content:  it`s tongue in cheek of some mentalities.There is another one which takes the piss of arabs, but its too long

Sentiment: [negative]


Tweet Content: On train w

In [14]:
predicted_class = get_prediction(output['choices'][0]['text'])
print(f"Predicted = {predicted_class}\nTrue {test_tweet_sentiment}")

Predicted = positive
True Sentiment: [positive]
