In [1]:
!pip install datasets
!pip install -q -U google-generativeai
!pip install openai

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed datasets-2

In [10]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai
import openai

def query_gpt(prompts):
    openai.api_key = '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'
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with GPT-3"):
        messages = [
            {"role": "system", "content": f"You are an expert in predicting the sentiment of the given prompt. You will exactly return one of 'positive' or 'negative' in lowercases without any prefix."},
            {"role": "user", "content": f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."}
        ]

        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo-0125",
                messages=messages
            )
            predicted_sentiment = response.choices[0].message.content.split(":")[-1].strip()
            predicted_labels.append(predicted_sentiment)

        except Exception as e:
            print(f"An error occurred during the OpenAI API request: {e}")

    return predicted_labels

def query_gemini(prompts):
    GOOGLE_API_KEY = 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with Gemini"):
        messages = [
            f"You are an expert in predicting the sentiment of the given prompt without missing. You will exactly return one of 'positive' or 'negative' in lowercase without any prefix.",
            f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."
        ]

        try:
            response = model.generate_content(
                messages,
                safety_settings=[
                    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
                ])
            if response:
                predicted_sentiment = response.text.strip()
                predicted_labels.append(predicted_sentiment)
            else:
                print(f"Warning: No response received from Gemini API for prompt {i+1}.")

        except Exception as e:
            print(f"An error occurred during the Gemini API request for prompt {i+1}:{prompt} -> {e}")

    return predicted_labels


dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "arabic", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "french", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "german", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "hindi", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "italian", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "portuguese", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "spanish", split='test')

labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 25

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
print(f"True Labels: {true_labels},{len(true_labels)}")
gemini_predicted_labels = query_gemini(prompts)
print(f"Gemini Predicted Labels: {gemini_predicted_labels},{len(gemini_predicted_labels)}")
gpt_predicted_labels = query_gpt(prompts)
print(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")

if gemini_predicted_labels and true_labels:
      gemini_classification_report_output = classification_report(true_labels, gemini_predicted_labels, target_names=['positive', 'negative'])
      print(f"Gemini Classification Report:")
      print(gemini_classification_report_output)
else:
    print("Error: No predicted labels were returned by Gemini.")

if gpt_predicted_labels and true_labels:
      gpt_classification_report_output = classification_report(true_labels, gpt_predicted_labels, target_names=['positive', 'negative'])
      print(f"GPT-3 Classification Report:")
      print(gpt_classification_report_output)
else:
    print("Error: No predicted labels were returned by GPT-3.")


Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', "I've been listening to Leonard Cohen nonstop for the past mo

Processing prompts with Gemini: 100%|██████████| 60/60 [01:42<00:00,  1.72s/it]


Gemini Predicted Labels: ['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative'],60


Processing prompts with GPT-3: 100%|██████████| 60/60 [19:16<00:00, 19.27s/it]

GPT-3 Predicted Labels: ['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative'],60
Gemini Classification Report:
              precision    recall  f1-score   support

    positive       0.93      0.90      0.92        30
    negative       0.90      0.93      0.92        30

    accuracy                           0.92        60
   m




In [5]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai
import openai

def query_gpt(prompts):
    openai.api_key = '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'
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with GPT-3"):
        messages = [
            {"role": "system", "content": f"You are an expert in predicting the sentiment of the given prompt. You will exactly return one of 'positive' or 'negative' in lowercases without any prefix."},
            {"role": "user", "content": f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."}
        ]

        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo-0125",
                messages=messages
            )
            predicted_sentiment = response.choices[0].message.content.split(":")[-1].strip()
            predicted_labels.append(predicted_sentiment)

        except Exception as e:
            print(f"An error occurred during the OpenAI API request: {e}")

    return predicted_labels

def query_gemini(prompts):
    GOOGLE_API_KEY = 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with Gemini"):
        messages = [
            f"You are an expert in predicting the sentiment of the given prompt without missing. You will exactly return one of 'positive' or 'negative' in lowercase without any prefix.",
            f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."
        ]

        try:
            response = model.generate_content(
                messages,
                safety_settings=[
                    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
                ])
            if response:
                predicted_sentiment = response.text.strip()
                predicted_labels.append(predicted_sentiment)
            else:
                print(f"Warning: No response received from Gemini API for prompt {i+1}.")

        except Exception as e:
            print(f"An error occurred during the Gemini API request for prompt {i+1}:{prompt} -> {e}")

    return predicted_labels


#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "arabic", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "french", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "german", split='test')
dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "hindi", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "italian", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "portuguese", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "spanish", split='test')

labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 25

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
print(f"True Labels: {true_labels},{len(true_labels)}")
gemini_predicted_labels = query_gemini(prompts)
print(f"Gemini Predicted Labels: {gemini_predicted_labels},{len(gemini_predicted_labels)}")

if gemini_predicted_labels and true_labels:
      gemini_classification_report_output = classification_report(true_labels, gemini_predicted_labels, target_names=['positive', 'negative'])
      print(f"Gemini Classification Report:")
      print(gemini_classification_report_output)
else:
    print("Error: No predicted labels were returned by Gemini.")

gpt_predicted_labels = query_gpt(prompts)
print(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")

if gpt_predicted_labels and true_labels:
      gpt_classification_report_output = classification_report(true_labels, gpt_predicted_labels, target_names=['positive', 'negative'])
      print(f"GPT-3 Classification Report:")
      print(gpt_classification_report_output)
else:
    print("Error: No predicted labels were returned by GPT-3.")


Prompts: ['ha ha ha ha ha ha ha .  .  .  .  . :d ye mast tha .  .  .  . cute aur comic .  .  .  . :)', 'jai hind', 'rt @mukhijanidhi: hai teri hasti aisi jo dekhe wo mar mite #msgyouthicon #msgrevolution', 'aisa school ho to me kabhi ghar hi na jau .  .  . ', "i'll never forget that first girl i was crazy about in 5th grade .  i still got her valentine day card in my secret safe . ", 'hahaha sai m .  .  ek baar class se nikal jate the to pura school naap ke wapas ate the', 'Jaanma main bol rahi hu ki,tum mere twits dekho :/', 'rt psharma2525: rt komalinsaan: gurmeetramrahim #lovetoseemsg #msgincinemas schi agr duniya ye movi dekh le to yaha swrg bn skta h', "rare ,  300kg meteorite discovered in poland ,  biggest in eastern europe and may provide clues about earth's core: http/URL", 'durdarshan wah wah  .  .  .  . ', 'Salman bai ap kese hen ma b ap ka fen hun', 'kiranji ko delhi ka cm ummidwar ghosit karna modiji ki doodarshita wa samghdaari ko darshata hai . ', 'aage se jab bhi wc mei

Processing prompts with Gemini: 100%|██████████| 50/50 [01:01<00:00,  1.22s/it]


Gemini Predicted Labels: ['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative'],50
Gemini Classification Report:
              precision    recall  f1-score   support

    positive       0.94      0.68      0.79        25
    negative       0.75      0.96      0.84        25

    accuracy                           0.82        50
   macro avg       0.85      0.82      0.82        50
weighted avg       0.85      0.82      0.82        50



Processing prompts with GPT-3:   8%|▊         | 4/50 [00:23<06:55,  9.03s/it]

An error occurred during the OpenAI API request: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-0125 in organization org-wlQQnezHpDw43ZOcpWhMra0h on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}


Processing prompts with GPT-3:  10%|█         | 5/50 [00:45<10:15, 13.68s/it]

An error occurred during the OpenAI API request: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-0125 in organization org-wlQQnezHpDw43ZOcpWhMra0h on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}


Processing prompts with GPT-3:  12%|█▏        | 6/50 [01:07<12:03, 16.45s/it]

An error occurred during the OpenAI API request: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-3.5-turbo-0125 in organization org-wlQQnezHpDw43ZOcpWhMra0h on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}


Processing prompts with GPT-3:  12%|█▏        | 6/50 [01:13<09:00, 12.27s/it]


KeyboardInterrupt: 

## **UI**

In [3]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai
import openai
import random

def query_gpt(prompts):

    openai.api_key = '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'
    messages = [{"role": "system", "content": f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."}]

    user_prompts = []
    for i, prompt in enumerate(prompts):
        #user_prompt = f" Predict for this {i}/{len(prompts)} prompt: '{prompt}'."
        user_prompt = f" You must Predict for this {i} of {len(prompts)}prompt: '{prompt}'."
        user_prompts.append(user_prompt)

    messages.append({"role": "user", "content": "\n".join(user_prompts)})
    return messages

    '''try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=messages
        )

        print(f"Response:{response}")
        predicted_labels = []

        # Extracting predicted sentiments
        for sentiment in response.choices[0].message.content.split("\n"):
            if sentiment:  # Exclude empty lines
                # Split by ":" and take the last part
                #print(f"Predicted Sentiment: {sentiment}")
                sentiment = sentiment.split(": ")[-1].strip()
                #print(f"Modified Sentiment: {sentiment}")
                predicted_labels.append(sentiment)
        #print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the OpenAI API request: {e}")
        return ""'''


#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "arabic", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "french", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "german", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "hindi", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "italian", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "portuguese", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "spanish", split='test')

labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 30

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive']
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative']

# Shuffle the positive and negative samples
random.shuffle(positive_samples)
random.shuffle(negative_samples)

# Select N random samples from each category
positive_samples = positive_samples[:N]
negative_samples = negative_samples[:N]

prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")

gpt_predicted_labels = ['negative','positive','negative','positive','negative','positive','positive','positive','negative','positive','positive','positive','negative','positive','positive','positive','positive','negative','positive','negative','positive','positive','negative','negative','positive','negative','positive','negative','negative','negative','positive','negative','negative','negative','negative','negative','negative','negative','positive','negative','negative','negative','negative','negative','negative','positive','negative','negative','negative','negative','positive','positive','negative','negative','negative','positive','negative','negative','positive','negative','negative']
gemini_predicted_labels =
print(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data:   0%|          | 0.00/173k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/32.4k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/74.9k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1839 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/324 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/870 [00:00<?, ? examples/s]

Prompts: ['الحريري: نعول على حكمة وتجربة الرئيس ميشال\xa0عون! http http', 'ضحكنا اوي اوي و استفدنا اوي اوي 💗😘#في_بيتنا_ضيف #نادين_نسيب_نجيم @user http', '@user @user متل نجومية نوال الزغبي ماحدا محقق و نقطه عالسطر http', 'ربآااه ما الذي أرى أمامي 😱😱😱😳.بعد جمالها مستحيل يحلى بعيوني شي 😍😍😍@user ❤#نادين_نسيب_نجيم http', 'عم بحكي مع حالي #نوال_الزغبي http', 'يسعد رب التواضع و الجمال مع سواا 💓 @user  #نادين_نسيب_نجيم http', 'جنرال الجمهورية باحلى خطاب بأغلى عيد بعيد استقلال وطني لبنان شكرا ميشال عون لكلماتك المحبة والوطنية التي انتظرتها 26 سنة  بنت لبنان العظيم', 'I liked a @user video from @user http للمرة الأولى ميريام فارس تنشر فيديو', '"@user: @user “بدك تسأل عليي” جديد نوال الزغبي قريباً http', 'ما في اجمل من نوال الزغبي وما بيخلق 😂#غرد_كانك_في_التسعينيات', '@user - ﺑﻴﻦ گُل " ﻧِﻔﺲ ﻓﻴّﻨﻲ " صُوتگگ"  #نوآل_آلزغبي #آغلى_آلحبآيب 💙💗💗😍❤ http', 'ملكة الصحراء والجمال والدراما#نادين_نسيب_نجيمفي كواليس جلسة تصوير مجلة #ماري_كلير العربية في #دبي@nadinenjeim ❤👑 http', 'لبنان | الرئيس ميشال عون يستق

In [None]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai
import openai

def query_gpt(prompts):
    openai.api_key = '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'
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with GPT-3"):
        messages = [
            {"role": "system", "content": f"You are an expert in predicting the sentiment of the given prompt. You will exactly return one of 'positive' or 'negative' in lowercases without any prefix."},
            {"role": "user", "content": f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."}
        ]

        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo-0125",
                messages=messages
            )
            predicted_sentiment = response.choices[0].message.content.split(":")[-1].strip()
            predicted_labels.append(predicted_sentiment)

        except Exception as e:
            print(f"An error occurred during the OpenAI API request: {e}")

    return predicted_labels

def query_gemini(prompts):
    GOOGLE_API_KEY = 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with Gemini"):
        messages = [
            f"You are an expert in predicting the sentiment of the given prompt without missing. You will exactly return one of 'positive' or 'negative' in lowercase without any prefix.",
            f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."
        ]

        try:
            response = model.generate_content(
                messages,
                safety_settings=[
                    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
                ])
            if response:
                predicted_sentiment = response.text.strip()
                predicted_labels.append(predicted_sentiment)
            else:
                print(f"Warning: No response received from Gemini API for prompt {i+1}.")

        except Exception as e:
            print(f"An error occurred during the Gemini API request for prompt {i+1}:{prompt} -> {e}")

    return predicted_labels


#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "arabic", split='test')
dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "french", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "german", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "hindi", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "italian", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "portuguese", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "spanish", split='test')

labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 30

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
print(f"True Labels: {true_labels},{len(true_labels)}")
gemini_predicted_labels = query_gemini(prompts)
print(f"Gemini Predicted Labels: {gemini_predicted_labels},{len(gemini_predicted_labels)}")
gpt_predicted_labels = query_gpt(prompts)
print(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")

if gemini_predicted_labels and true_labels:
      gemini_classification_report_output = classification_report(true_labels, gemini_predicted_labels, target_names=['positive', 'negative'])
      print(f"Gemini Classification Report:")
      print(gemini_classification_report_output)
else:
    print("Error: No predicted labels were returned by Gemini.")

if gpt_predicted_labels and true_labels:
      gpt_classification_report_output = classification_report(true_labels, gpt_predicted_labels, target_names=['positive', 'negative'])
      print(f"GPT-3 Classification Report:")
      print(gpt_classification_report_output)
else:
    print("Error: No predicted labels were returned by GPT-3.")


In [59]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import openai

def extract_alone_sentiment(response):
    sentiment = response.choices[0].message.content.split("\n")[1].strip()
    return sentiment

def query_gpt(prompts):
    openai.api_key = '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'
    messages = [{"role": "system", "content": "You are an expert in predicting the sentiment of a given prompt"}]

    user_prompts = []
    for i, prompt in enumerate(prompts):
        user_prompt = f"Predict the sentiment of the following prompt ({i+1}/{len(prompts)}): '{prompt}' into one of the following three categories: neutral, negative, positive. The sentiment shouldn't be blank or ''. For example, your output should be in this format 'positive'."
        user_prompts.append(user_prompt)

    messages.append({"role": "user", "content": "\n".join(user_prompts)})

    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=messages
        )

        print(f"Response:{response}")
        predicted_labels = []

        # Extracting predicted sentiments
        for i, sentiment in enumerate(response.choices[0].message.content.split("\n")[1:], 1):
            print(f"Sentiment:{sentiment}")
            sentiment = sentiment.split(".")[1]
            print(f"Modified Sentiment:{sentiment}")
            if sentiment:  # Exclude empty lines
                predicted_labels.append(sentiment.strip())
        print(f"predicted_labels:{predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the OpenAI API request: {e}")
        return ""

dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:25]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:25]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * 25 + ['negative'] * 25)

print(f"Prompts: {prompts}")
predicted_labels = query_gpt(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"Predicted Labels: {predicted_labels},{len(predicted_labels)}")

if predicted_labels and true_labels:
    accuracy = accuracy_score(true_labels, predicted_labels)
    metrics = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    classification_report_output = classification_report(true_labels, predicted_labels, target_names=['positive', 'negative'])

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {metrics[0]}")
    print(f"Recall: {metrics[1]}")
    print(f"F1-score: {metrics[2]}")
    print("Classification Report:")
    print(classification_report_output)
else:
    print("Error: No predicted labels were returned.")

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', "I've been listening to Leonard Cohen nonstop for the past mo

In [101]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import openai

def extract_alone_sentiment(response):
    sentiment = response.choices[0].message.content.split("\n")[1].strip()
    return sentiment

def query_gpt(prompts):
    openai.api_key = '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'
    messages = [{"role": "system", "content": f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."}]

    user_prompts = []
    for i, prompt in enumerate(prompts):
        user_prompt = f"Predict for this prompt: '{prompt}'."
        user_prompts.append(user_prompt)

    messages.append({"role": "user", "content": "\n".join(user_prompts)})

    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=messages
        )

        print(f"Response:{response}")
        predicted_labels = []

        # Extracting predicted sentiments
        for sentiment in response.choices[0].message.content.split("\n"):
            if sentiment:  # Exclude empty lines
                # Split by ":" and take the last part
                print(f"Predicted Sentiment: {sentiment}")
                sentiment = sentiment.split(": ")[-1].strip()
                print(f"Modified Sentiment: {sentiment}")
                predicted_labels.append(sentiment)
        print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the OpenAI API request: {e}")
        return ""

dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 5

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
predicted_labels = query_gpt(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"Predicted Labels: {predicted_labels},{len(predicted_labels)}")

if predicted_labels and true_labels:
    accuracy = accuracy_score(true_labels, predicted_labels)
    metrics = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    classification_report_output = classification_report(true_labels, predicted_labels, target_names=['positive', 'negative'])

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {metrics[0]}")
    print(f"Recall: {metrics[1]}")
    print(f"F1-score: {metrics[2]}")
    print("Classification Report:")
    print(classification_report_output)
else:
    print("Error: No predicted labels were returned.")

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'Trying to have a conversation with my dad about vegetarianism is the most pointless infuriating thing ever #caveman ', "@user @user @user Looks like Flynn isn't too pleased with me, he blocked me. You blocked by Flynn too @user ", '@user for al the crying you do about how middle America is left out-they have twice as much voting power ', 'Israel deems comatose Gaza man who needs treatment in West Bank  a security threat. #Palestine  via @user ', 'Iraqi Forces set to storm 3 areas of #Mosul, #AlQah

In [6]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import openai

def extract_alone_sentiment(response):
    sentiment = response.choices[0].message.content.split("\n")[1].strip()
    return sentiment

def query_gpt(prompts):
    openai.api_key = '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'
    messages = [{"role": "system", "content": f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."}]

    user_prompts = []
    for i, prompt in enumerate(prompts):
        user_prompt = f"Predict for this prompt: '{prompt}'."
        user_prompts.append(user_prompt)

    messages.append({"role": "user", "content": "\n".join(user_prompts)})

    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=messages
        )

        print(f"Response:{response}")
        predicted_labels = []

        # Extracting predicted sentiments
        for sentiment in response.choices[0].message.content.split("\n"):
            if sentiment:  # Exclude empty lines
                # Split by ":" and take the last part
                print(f"Predicted Sentiment: {sentiment}")
                sentiment = sentiment.split(": ")[-1].strip()
                print(f"Modified Sentiment: {sentiment}")
                predicted_labels.append(sentiment)
        print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the OpenAI API request: {e}")
        return ""

dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 15

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
predicted_labels = query_gpt(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"Predicted Labels: {predicted_labels},{len(predicted_labels)}")

if predicted_labels and true_labels:
    accuracy = accuracy_score(true_labels, predicted_labels)
    metrics = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    classification_report_output = classification_report(true_labels, predicted_labels, target_names=['positive', 'negative'])

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {metrics[0]}")
    print(f"Recall: {metrics[1]}")
    print(f"F1-score: {metrics[2]}")
    print("Classification Report:")
    print(classification_report_output)
else:
    print("Error: No predicted labels were returned.")

Downloading data:   0%|          | 0.00/155k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/29.1k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/64.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1839 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/324 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/870 [00:00<?, ? examples/s]

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', "I've been listening to Leonard Cohen nonstop for the past mo

In [116]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import openai

def extract_alone_sentiment(response):
    sentiment = response.choices[0].message.content.split("\n")[1].strip()
    return sentiment

def query_gpt(prompts):
    openai.api_key = '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'
    messages = [{"role": "system", "content": f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."}]

    user_prompts = []
    for i, prompt in enumerate(prompts):
        user_prompt = f"Predict for this {i}/{len(prompts)} prompt: '{prompt}'."
        user_prompts.append(user_prompt)

    messages.append({"role": "user", "content": "\n".join(user_prompts)})

    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=messages
        )

        print(f"Response:{response}")
        predicted_labels = []

        # Extracting predicted sentiments
        for sentiment in response.choices[0].message.content.split("\n"):
            if sentiment:  # Exclude empty lines
                # Split by ":" and take the last part
                print(f"Predicted Sentiment: {sentiment}")
                sentiment = sentiment.split(": ")[-1].strip()
                print(f"Modified Sentiment: {sentiment}")
                predicted_labels.append(sentiment)
        print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the OpenAI API request: {e}")
        return ""

dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 20

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
predicted_labels = query_gpt(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"Predicted Labels: {predicted_labels},{len(predicted_labels)}")

if predicted_labels and true_labels:
    accuracy = accuracy_score(true_labels, predicted_labels)
    metrics = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    classification_report_output = classification_report(true_labels, predicted_labels, target_names=['positive', 'negative'])

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {metrics[0]}")
    print(f"Recall: {metrics[1]}")
    print(f"F1-score: {metrics[2]}")
    print("Classification Report:")
    print(classification_report_output)
else:
    print("Error: No predicted labels were returned.")

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', "I've been listening to Leonard Cohen nonstop for the past mo

In [117]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import openai

def extract_alone_sentiment(response):
    sentiment = response.choices[0].message.content.split("\n")[1].strip()
    return sentiment

def query_gpt(prompts):
    openai.api_key = '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'
    messages = [{"role": "system", "content": f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."}]

    user_prompts = []
    for i, prompt in enumerate(prompts):
        user_prompt = f"Predict for this {i}/{len(prompts)} prompt: '{prompt}'."
        user_prompts.append(user_prompt)

    messages.append({"role": "user", "content": "\n".join(user_prompts)})

    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=messages
        )

        print(f"Response:{response}")
        predicted_labels = []

        # Extracting predicted sentiments
        for sentiment in response.choices[0].message.content.split("\n"):
            if sentiment:  # Exclude empty lines
                # Split by ":" and take the last part
                print(f"Predicted Sentiment: {sentiment}")
                sentiment = sentiment.split(": ")[-1].strip()
                print(f"Modified Sentiment: {sentiment}")
                predicted_labels.append(sentiment)
        print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the OpenAI API request: {e}")
        return ""

dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 25

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
predicted_labels = query_gpt(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"Predicted Labels: {predicted_labels},{len(predicted_labels)}")

if predicted_labels and true_labels:
    accuracy = accuracy_score(true_labels, predicted_labels)
    metrics = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    classification_report_output = classification_report(true_labels, predicted_labels, target_names=['positive', 'negative'])

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {metrics[0]}")
    print(f"Recall: {metrics[1]}")
    print(f"F1-score: {metrics[2]}")
    print("Classification Report:")
    print(classification_report_output)
else:
    print("Error: No predicted labels were returned.")

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', "I've been listening to Leonard Cohen nonstop for the past mo

# **Gemini**

In [2]:
!pip install datasets
!pip install -q -U google-generativeai

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed dataset

In [20]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai


def query_gemini(prompts):
    # Replace with your Gemini API key
    GOOGLE_API_KEY= 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'

    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')

    messages = [f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."]
    user_prompts = []
    for i, prompt in enumerate(prompts):
        user_prompt = f"Predict for this {i}/{len(prompts)} prompt: '{prompt}'"
        user_prompts.append(user_prompt)
    messages.append("\n".join(user_prompts))

    try:
        response = model.generate_content(messages)
        print(f"Response:{response}")
        predicted_labels = []

        # Access response text directly (assuming predictions are comma-separated)
        if response:
            predicted_labels = response.text.split("\n")
            #predicted_labels = [label.strip() for label in predicted_labels if label.strip()]  # Clean and filter empty labels

        else:
            print("Warning: No response received from Gemini API.")

        print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the Gemini API request: {e}")
        return ""


dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 5

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
predicted_labels = query_gemini(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"Predicted Labels: {predicted_labels},{len(predicted_labels)}")

if predicted_labels and true_labels:
    accuracy = accuracy_score(true_labels, predicted_labels)
    metrics = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
    classification_report_output = classification_report(true_labels, predicted_labels, target_names=['positive', 'negative'])

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {metrics[0]}")
    print(f"Recall: {metrics[1]}")
    print(f"F1-score: {metrics[2]}")
    print("Classification Report:")
    print(classification_report_output)
else:
    print("Error: No predicted labels were returned.")

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'Trying to have a conversation with my dad about vegetarianism is the most pointless infuriating thing ever #caveman ', "@user @user @user Looks like Flynn isn't too pleased with me, he blocked me. You blocked by Flynn too @user ", '@user for al the crying you do about how middle America is left out-they have twice as much voting power ', 'Israel deems comatose Gaza man who needs treatment in West Bank  a security threat. #Palestine  via @user ', 'Iraqi Forces set to storm 3 areas of #Mosul, #AlQah

In [25]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai


def query_gemini(prompts):
    # Replace with your Gemini API key
    GOOGLE_API_KEY= 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'

    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')

    messages = [f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts without missing. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."]
    user_prompts = []
    for i, prompt in enumerate(prompts):
        user_prompt = f"You must Predict for prompt: '{prompt}'"
        user_prompts.append(user_prompt)
    messages.append("\n".join(user_prompts))

    try:
        response = model.generate_content(messages)
        print(f"Response:{response}")
        predicted_labels = []

        # Access response text directly (assuming predictions are comma-separated)
        if response:
            predicted_labels = response.text.split("\n")
            #predicted_labels = [label.strip() for label in predicted_labels if label.strip()]  # Clean and filter empty labels

        else:
            print("Warning: No response received from Gemini API.")

        print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the Gemini API request: {e}")
        return ""


dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 10

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
gemini_predicted_labels = query_gemini(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"Gemini Predicted Labels: {gemini_predicted_labels},{len(gemini_predicted_labels)}")

if predicted_labels and true_labels:
    accuracy = accuracy_score(true_labels, gemini_predicted_labels)
    metrics = precision_recall_fscore_support(true_labels, gemini_predicted_labels, average='weighted')
    classification_report_output = classification_report(true_labels, gemini_predicted_labels, target_names=['positive', 'negative'])

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {metrics[0]}")
    print(f"Recall: {metrics[1]}")
    print(f"F1-score: {metrics[2]}")
    print("Classification Report:")
    print(classification_report_output)
else:
    print("Error: No predicted labels were returned.")

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', 'Trying to have a conversation with my dad about vegetarianis

## **GPT-3 and Gemini Integrated**

In [67]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai
import openai

def query_gpt(prompts):
    openai.api_key = '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'
    messages = [{"role": "system", "content": f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."}]

    user_prompts = []
    for i, prompt in enumerate(prompts):
        #user_prompt = f" Predict for this {i}/{len(prompts)} prompt: '{prompt}'."
        user_prompt = f" You must Predict for this {i} of {len(prompts)}prompt: '{prompt}'."
        user_prompts.append(user_prompt)

    messages.append({"role": "user", "content": "\n".join(user_prompts)})

    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=messages
        )

        print(f"Response:{response}")
        predicted_labels = []

        # Extracting predicted sentiments
        for sentiment in response.choices[0].message.content.split("\n"):
            if sentiment:  # Exclude empty lines
                # Split by ":" and take the last part
                #print(f"Predicted Sentiment: {sentiment}")
                sentiment = sentiment.split(": ")[-1].strip()
                #print(f"Modified Sentiment: {sentiment}")
                predicted_labels.append(sentiment)
        #print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the OpenAI API request: {e}")
        return ""

def query_gemini(prompts):
    # Replace with your Gemini API key
    GOOGLE_API_KEY= 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'

    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')

    messages = [f"You are an expert in predicting the sentiment of all the given '{len(prompts)}' prompts without missing. You will exactly return one of 'positive' or 'negative' in lowercases for each of the prompt without any prefix."]
    user_prompts = []
    for i, prompt in enumerate(prompts):
        #user_prompt = f"You must Predict for prompt: '{prompt}'"
        user_prompt = f" You must Predict for this {i} of {len(prompts)}prompt: '{prompt}'."
        user_prompts.append(user_prompt)
    messages.append("\n".join(user_prompts))

    try:
        response = model.generate_content(messages)
        print(f"Response:{response}")
        predicted_labels = []

        # Access response text directly (assuming predictions are comma-separated)
        if response:
            predicted_labels = response.text.split("\n")
            #predicted_labels = [label.strip() for label in predicted_labels if label.strip()]  # Clean and filter empty labels

        else:
            print("Warning: No response received from Gemini API.")

        #print(f"predicted_labels: {predicted_labels}")
        return predicted_labels

    except Exception as e:
        print(f"An error occurred during the Gemini API request: {e}")
        return ""


dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "arabic", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "french", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "german", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "hindi", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "italian", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "portuguese", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "spanish", split='test')

labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 10

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
gpt_predicted_labels = query_gpt(prompts)
gemini_predicted_labels = query_gemini(prompts)
print(f"True Labels: {true_labels},{len(true_labels)}")
print(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")
print(f"Gemini Predicted Labels: {gemini_predicted_labels},{len(gemini_predicted_labels)}")

if gpt_predicted_labels and true_labels:
      gpt_classification_report_output = classification_report(true_labels, gpt_predicted_labels, target_names=['positive', 'negative'])
      print(f"GPT-3 Classification Report:")
      print(gpt_classification_report_output)
else:
    print("Error: No predicted labels were returned by GPT-3.")

if gemini_predicted_labels and true_labels:
      gemini_classification_report_output = classification_report(true_labels, gemini_predicted_labels, target_names=['positive', 'negative'])
      print(f"Gemini Classification Report:")
      print(gemini_classification_report_output)
else:
    print("Error: No predicted labels were returned by Gemini.")

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', 'Trying to have a conversation with my dad about vegetarianis

ValueError: Found input variables with inconsistent numbers of samples: [20, 19]

Separate Calls:


In [2]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai
import openai

def query_gpt(prompts):
    openai.api_key = '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'
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with GPT-3"):
        messages = [
            {"role": "system", "content": f"You are an expert in predicting the sentiment of the given prompt. You will exactly return one of 'positive' or 'negative' in lowercases without any prefix."},
            {"role": "user", "content": f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."}
        ]

        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo-0125",
                messages=messages
            )
            predicted_sentiment = response.choices[0].message.content.split(":")[-1].strip()
            predicted_labels.append(predicted_sentiment)

        except Exception as e:
            print(f"An error occurred during the OpenAI API request: {e}")

    return predicted_labels

def query_gemini(prompts):
    GOOGLE_API_KEY = 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with Gemini"):
        messages = [
            f"You are an expert in predicting the sentiment of the given prompt without missing. You will exactly return one of 'positive' or 'negative' in lowercase without any prefix.",
            f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."
        ]

        try:
            response = model.generate_content(messages)
            if response:
                predicted_sentiment = response.text.strip()
                predicted_labels.append(predicted_sentiment)
            else:
                print(f"Warning: No response received from Gemini API for prompt {i+1}.")

        except Exception as e:
            print(f"An error occurred during the Gemini API request for prompt {i+1}:{prompt} -> {e}")

    return predicted_labels


dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "arabic", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "french", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "german", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "hindi", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "italian", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "portuguese", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "spanish", split='test')

labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 25

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
print(f"True Labels: {true_labels},{len(true_labels)}")
gemini_predicted_labels = query_gemini(prompts)
print(f"Gemini Predicted Labels: {gemini_predicted_labels},{len(gemini_predicted_labels)}")
if gemini_predicted_labels and true_labels:
      gemini_classification_report_output = classification_report(true_labels, gemini_predicted_labels, target_names=['positive', 'negative'])
      print(f"Gemini Classification Report:")
      print(gemini_classification_report_output)
else:
    print("Error: No predicted labels were returned by Gemini.")


'''gpt_predicted_labels = query_gpt(prompts)
print(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")
if gpt_predicted_labels and true_labels:
      gpt_classification_report_output = classification_report(true_labels, gpt_predicted_labels, target_names=['positive', 'negative'])
      print(f"GPT-3 Classification Report:")
      print(gpt_classification_report_output)
else:
    print("Error: No predicted labels were returned by GPT-3.")'''


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data:   0%|          | 0.00/155k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/29.1k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/64.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1839 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/324 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/870 [00:00<?, ? examples/s]

Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', "I've been listening to Leonard Cohen nonstop for the past mo

Processing prompts with Gemini:  90%|█████████ | 45/50 [01:19<00:08,  1.69s/it]

An error occurred during the Gemini API request for prompt 45:"Are we out of touch, sexually deviant, decadent dilettantes who can't own up to our misrule?No it is the deplora…  -> The `response.parts` quick accessor only works for a single candidate, but none were returned. Check the `response.prompt_feedback` to see if the prompt was blocked.


Processing prompts with Gemini:  96%|█████████▌| 48/50 [01:24<00:03,  1.68s/it]

An error occurred during the Gemini API request for prompt 48:#TV #Bullshit They are saying #Brexit was not about Closing borders and leaving the single market?? FUNNY THAT ?? STOP BULLSHITING US  -> The `response.parts` quick accessor only works for a single candidate, but none were returned. Check the `response.prompt_feedback` to see if the prompt was blocked.


Processing prompts with Gemini: 100%|██████████| 50/50 [01:27<00:00,  1.76s/it]

Gemini Predicted Labels: ['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative'],48





ValueError: Found input variables with inconsistent numbers of samples: [50, 48]

In [8]:
from transformers import AutoTokenizer
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support
from tqdm import tqdm
import google.generativeai as genai
import openai

def query_gpt(prompts):
    openai.api_key = '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'
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with GPT-3"):
        messages = [
            {"role": "system", "content": f"You are an expert in predicting the sentiment of the given prompt. You will exactly return one of 'positive' or 'negative' in lowercases without any prefix."},
            {"role": "user", "content": f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."}
        ]

        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo-0125",
                messages=messages
            )
            predicted_sentiment = response.choices[0].message.content.split(":")[-1].strip()
            predicted_labels.append(predicted_sentiment)

        except Exception as e:
            print(f"An error occurred during the OpenAI API request: {e}")

    return predicted_labels

def query_gemini(prompts):
    GOOGLE_API_KEY = 'AIzaSyA5nXUXPmxS9SlIC9NhexJCOkyavPTvbWY'
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel('gemini-pro')
    predicted_labels = []

    for i, prompt in tqdm(enumerate(prompts), total=len(prompts), desc="Processing prompts with Gemini"):
        messages = [
            f"You are an expert in predicting the sentiment of the given prompt without missing. You will exactly return one of 'positive' or 'negative' in lowercase without any prefix.",
            f"You must predict for this prompt {i+1} of {len(prompts)}: '{prompt}'."
        ]

        try:
            response = model.generate_content(
                messages,
                safety_settings=[
                    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
                ])
            if response:
                predicted_sentiment = response.text.strip()
                predicted_labels.append(predicted_sentiment)
            else:
                print(f"Warning: No response received from Gemini API for prompt {i+1}.")

        except Exception as e:
            print(f"An error occurred during the Gemini API request for prompt {i+1}:{prompt} -> {e}")

    return predicted_labels


dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "english", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "arabic", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "french", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "german", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "hindi", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "italian", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "portuguese", split='test')
#dataset = load_dataset("cardiffnlp/tweet_sentiment_multilingual", "spanish", split='test')

labels = ["negative", "neutral", "positive"]

true_labels = []
predicted_labels = []
N = 25

positive_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'positive'][:N]
negative_samples = [sample for sample, label_id in zip(dataset['text'], dataset['label']) if labels[label_id] == 'negative'][:N]
prompts = positive_samples + negative_samples

true_labels.extend(['positive'] * N + ['negative'] * N)

print(f"Prompts: {prompts}")
print(f"True Labels: {true_labels},{len(true_labels)}")
gemini_predicted_labels = query_gemini(prompts)
print(f"Gemini Predicted Labels: {gemini_predicted_labels},{len(gemini_predicted_labels)}")
if gemini_predicted_labels and true_labels:
      gemini_classification_report_output = classification_report(true_labels, gemini_predicted_labels, target_names=['positive', 'negative'])
      print(f"Gemini Classification Report:")
      print(gemini_classification_report_output)
else:
    print("Error: No predicted labels were returned by Gemini.")


'''gpt_predicted_labels = query_gpt(prompts)
print(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")
if gpt_predicted_labels and true_labels:
      gpt_classification_report_output = classification_report(true_labels, gpt_predicted_labels, target_names=['positive', 'negative'])
      print(f"GPT-3 Classification Report:")
      print(gpt_classification_report_output)
else:
    print("Error: No predicted labels were returned by GPT-3.")'''


Prompts: ['@user You are a stand up guy and a Gentleman Vice President Pence ', "i'm not even catholic, but pope francis is my dude. like i just need him to hug me and tell me everything is okay. ", 'Samsung to Bring Android 7.0 Nougat to Galaxy S6, S6 edge, Note 5, and Tab S2 - Softpedia News ', "I will go so far to say s1 of westworld isn't just good, it's brilliant. A story within a story within a story about storytelling ", '#NationalFastFoodDay Would love to live there. Chick-fil-A 😍 ', 'The Reputation Doctor weighs in on Tony Romo #NFL @user joins @user on #TheMorningRush LISTEN: ', 'So proud of way @user & #trumpTransitionteam are molding strong leadership group for #America #TeamTrump #MakeAmericaGreatAgain ', '@user @user - #ScreamQueens so lucky to get to work with TK. ', 'This is a big deal and a smart move by Microsoft: ', 'Digesting while watching #ScreamQueens s1 Thanksgiving & Black Friday eps. SO SO GOOD!! ', "I've been listening to Leonard Cohen nonstop for the past mo

Processing prompts with Gemini: 100%|██████████| 50/50 [01:21<00:00,  1.62s/it]

Gemini Predicted Labels: ['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative'],50
Gemini Classification Report:
              precision    recall  f1-score   support

    positive       0.92      0.88      0.90        25
    negative       0.88      0.92      0.90        25

    accuracy                           0.90        50
   macro avg       0.90      0.90      0.90        50
weighted avg       0.90      0.90      0.90        50






'gpt_predicted_labels = query_gpt(prompts) \nprint(f"GPT-3 Predicted Labels: {gpt_predicted_labels},{len(gpt_predicted_labels)}")\nif gpt_predicted_labels and true_labels:\n      gpt_classification_report_output = classification_report(true_labels, gpt_predicted_labels, target_names=[\'positive\', \'negative\'])\n      print(f"GPT-3 Classification Report:")\n      print(gpt_classification_report_output)\nelse:\n    print("Error: No predicted labels were returned by GPT-3.")'