In [1]:
#%pip install python-decouple

In [None]:
#import libraries
from decouple import config
from ibm_watsonx_ai import APIClient
from ibm_watsonx_ai import Credentials
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.foundation_models.prompts import PromptTemplate, PromptTemplateManager
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes
from ibm_watsonx_ai.foundation_models.schema import TextGenParameters
import pandas as pd
from sklearn.metrics import classification_report 
import import_ipynb
import sys
sys.path.append('../')
from Preprocessing import Train_Test_split as ts

In [2]:
#get the API key and project key
WX_API_KEY = config('WX_API_KEY')
PROJECT_ID = "f79c2d38-7ee2-4de6-931a-dad71b72d34f"

In [3]:
credentials = Credentials(
    url = "https://us-south.ml.cloud.ibm.com",
    api_key = WX_API_KEY
)

client = APIClient(
    credentials=credentials, 
    project_id=PROJECT_ID
)

In [None]:
PARAMS = TextGenParameters(
    temperature=0.2,      # Higher temperature means more randomness
    max_new_tokens=3, # Maximum number of tokens to generate
    min_new_tokens=1, # Minimum number of tokens to generate
)

model = ModelInference(
    api_client=client,
    model_id="mistralai/mistral-small-3-1-24b-instruct-2503",
    params=PARAMS
)
TextGenParameters.show()

In [None]:
#This is partly generated using Copilot suggestions

def sentiment_prompt(text):
    prompt = """ 
    Return only a number based on the sentiment for the text. Do not give an explanation. Your response should only be a single character:
    1 for Negative
    2 for Neural
    3 for Positive.

    text: {}

    """
    return prompt.format(text).replace('\n', '')

In [None]:
#This is also generated using Copilot suggestions

def get_sentiment_llm(review):
    prompt = sentiment_prompt(review)
    response = model.generate_text(prompt)
    return response

In [None]:
df = ts.BI_df[["business_id", "text"]]
df

Unnamed: 0,business_id,text
31,aJvxWyQIG5OLfBw3qAe8xA,"Ordered Caramel frappe at Drive thru, BIG MIST..."
46,MjZQqZAmJeMco_Vq-Y9h-g,Drum-roll please! Review #100 coming right up!...
54,u7MJKcNdZXYyTeb67vD5jw,We stopped here for my Chai and Hubby's coffee...
89,saJFbz12EnzanelpD8_xXQ,There's been three times that I've ordered a g...
104,KiE0h68HGOO7ZXAqkMBdiw,"I went in when they had 4 people working, wait..."
...,...,...
586567,-85kJMtb9wqNWDT8yLbitw,Fair service- messed up my drink cuppa times....
586568,Ddg-J_j0YFErk7wpMtH_0A,On my way out of Reno last week I noted a new ...
586638,2rmpfdyV2POqpXtmPHO_IQ,always closing drive thru. i've driven here so...
586646,GxuxCctcz3Hyk0wnuly7vQ,This is now one of two Starbucks in the shoppi...


In [13]:
df1 = df.iloc[0:5000].copy()
df1["llm_sentiment"] = df1["text"].apply(get_sentiment_llm)

In [14]:
df2 = df.iloc[5000:10000].copy()
df2["llm_sentiment"] = df2["text"].apply(get_sentiment_llm)

In [15]:
df3 = df.iloc[10000:15000].copy()
df3["llm_sentiment"] = df3["text"].apply(get_sentiment_llm)

In [16]:
df4 = df.iloc[15000:].copy()
df4["llm_sentiment"] = df4["text"].apply(get_sentiment_llm)

In [17]:
final_df = pd.concat([df1,df2,df3,df4])

In [18]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21575 entries, 31 to 586656
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   business_id    21575 non-null  object
 1   text           21575 non-null  object
 2   llm_sentiment  21575 non-null  object
dtypes: object(3)
memory usage: 674.2+ KB


In [19]:
final_df.head(10)

Unnamed: 0,business_id,text,llm_sentiment
31,aJvxWyQIG5OLfBw3qAe8xA,"Ordered Caramel frappe at Drive thru, BIG MIST...",1
46,MjZQqZAmJeMco_Vq-Y9h-g,Drum-roll please! Review #100 coming right up!...,3
54,u7MJKcNdZXYyTeb67vD5jw,We stopped here for my Chai and Hubby's coffee...,3
89,saJFbz12EnzanelpD8_xXQ,There's been three times that I've ordered a g...,1
104,KiE0h68HGOO7ZXAqkMBdiw,"I went in when they had 4 people working, wait...",1
135,saJFbz12EnzanelpD8_xXQ,Most of the time I go through the drive thru h...,3
165,BauybYsfqd0y6KDrJ6ZxjQ,i dont know what has happened to the in store ...,1
169,BauybYsfqd0y6KDrJ6ZxjQ,Nothing makes my busy day easy like my iced co...,1
205,RCy4M2ND4YK0uRbodV_v8g,Starbucks...so aren't they all just clones? a...,1
219,aJvxWyQIG5OLfBw3qAe8xA,Much better than the one on Bell Rd. near Red ...,3


In [21]:
%pip install pyarrow

Collecting pyarrow
  Downloading pyarrow-20.0.0-cp312-cp312-win_amd64.whl.metadata (3.4 kB)
Downloading pyarrow-20.0.0-cp312-cp312-win_amd64.whl (25.7 MB)
   ---------------------------------------- 0.0/25.7 MB ? eta -:--:--
   ------------- -------------------------- 8.9/25.7 MB 50.3 MB/s eta 0:00:01
   ------------------------- -------------- 16.3/25.7 MB 40.9 MB/s eta 0:00:01
   ----------------------------------- ---- 22.8/25.7 MB 38.0 MB/s eta 0:00:01
   ---------------------------------------- 25.7/25.7 MB 33.9 MB/s eta 0:00:00
Installing collected packages: pyarrow
Successfully installed pyarrow-20.0.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [22]:
final_df.to_parquet('llm_sentiment.parquet')

In [25]:
final_df.to_json("llm_sentiment.json", orient="records", lines=True)