# Set Up

In [1]:
import sys
sys.path.append('../../Modules/')
sys.path.append('../../Modules/Processors from Prof')
from Packages import *
from My_CSV_processor import *
from Semantic_functions import *
from Wordcloud import *

# Import processors from Prof
from ipynb.fs.full.Utilities import *
from ipynb.fs.full.Json_Processor import *
from ipynb.fs.full.CSV_Processor import *

In [2]:
import google.generativeai as genai
os.environ["GEMINI_API_KEY"] = "AIzaSyCM-GWMhMPoBZpvlXWqKr5nKnY02OIVdf4"
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

In [3]:
import os
from groq import Groq

# Get Data

In [7]:
farel_bench = FarelBench_CSV_Processor_my('Farel-Bench', 'farel_bench.csv').convert_df()

In [8]:
farel_bench['topic'].unique()

array(['child', 'parent', 'grandchild', 'sibling', 'grandparent',
       'great grandchild', 'niece or nephew', 'aunt or uncle',
       'great grandparent', 'great great grandchild',
       'grand-niece or grand-nephew', '1st cousin',
       'grand-aunt or grand-uncle', 'great great grandparent',
       '3rd great grandchild', 'great grand-niece or great grand-nephew',
       '1st cousin 1x removed', 'great grand-aunt or great grand-uncle',
       '3rd great grandparent'], dtype=object)

# FUNCTIONS

In [4]:
"""
GEMINI

The function is to generate Gemini answers for the given data.

@params: data(initial dataframe), system_message(message for Gemini system)
@return: sample(dataframe with gemini_output column)
"""

def gemini_generator(data, system_message):

    import time
    total_requests = 0
    successful_requests = 0


    model=genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    system_instruction=system_message,
    )

    sample = data.copy()
    sample['gemini_output'] = None

    for i in range(len(sample)):
        success = False
        retries = 3

        while not success and retries > 0:
            try:
                total_requests += 1

                # Make API request
                response = model.generate_content(sample['input'][i])
                # print(response.text)
                sample.loc[i, 'gemini_output'] = response.text.strip()
                success = True
                successful_requests += 1
                time.sleep(5)

            except Exception as e:
                # print(f"Error: {e}")
                retries -= 1
                time.sleep(5)
                total_requests += 1

    print(f"Total requests made: {total_requests}")
    print(f"Successful requests: {successful_requests}")

    return sample

In [None]:
def gemini_line_generator(input_df, input_idx, output_df, message):
    model=genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        system_instruction= message,
    )

    response = model.generate_content(input_df.loc[input_idx, "input"])

    print(response.text)

    output_df.loc[input_idx,"gemini_output"] = response.text.strip()

In [6]:
from groq import Groq
os.environ["GROQ_API_KEY"] = "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"
client = Groq(
    api_key=os.environ['GROQ_API_KEY'],
)

"""
GROQ

The function is to generate GROQ answers for the given data.

@params: data(initial dataframe), system_message(message for Gemini system)
@return: sample(dataframe with gemini_output column)
"""

def groq(data, system_message, model_name):

    import time
    total_requests = 0
    successful_requests = 0
    client = Groq(api_key=os.environ['GROQ_API_KEY'],)

    sample = data.copy()
    sample[model_name] = None


    for i in range(len(sample)):
        success = False
        retries = 3

        while not success and retries > 0:
            try:
                total_requests += 1
                
                # Make a request to the GROQ API
                chat_completion = client.chat.completions.create(
                    messages=[
                        {
                            "role":"user",
                            "content": sample.loc[i, 'input']
                        },
                        {
                            'role': 'system',
                            'content': system_message
                        }
                    ],
                    model = model_name
                )

                response = chat_completion.choices[0].message.content

                sample.loc[i, model_name] = response.strip()
                success = True
                successful_requests += 1
                # print(response)
                time.sleep(5)

            except Exception as e:
                # print(f"Error: {e}")
                retries -= 1
                time.sleep(5)
                total_requests += 1

    print(f"Total requests made: {total_requests}")
    print(f"Successful requests: {successful_requests}")

    return sample


"""
GROQ

The function is to generate GROQ answers for the given data ROWS.

"""

def groq_line_generate(raw_dataset, output_dataset, start_idx, end_idx, system_message, model_name):
    sample = raw_dataset.loc[start_idx:end_idx, ].copy()
    sample.reset_index(drop=True, inplace=True)
    groq_sample = groq(sample, system_message, model_name)
    output_dataset.loc[start_idx:end_idx, "llama_output"] = groq_sample[model_name].values
    return output_dataset

# GEMINI

In [9]:
farel_bench.head()

Unnamed: 0,topic,input,target
0,child,"Given the family relationships:\n* Ralph is Anthony's parent.\n* Albert is Ralph's parent.\nWhat is Anthony's relationship to Ralph?\nSelect the correct answer:\n1. Anthony is Ralph's child.\n2. Anthony is Ralph's parent.\nEnclose the selected answer number in the <ANSWER> tag, for example: <ANSWER>1</ANSWER>.",1
1,child,"Given the family relationships:\n* Jessica is John's parent.\n* John is Lawrence's parent.\nWhat is Lawrence's relationship to John?\nSelect the correct answer:\n1. Lawrence is John's parent.\n2. Lawrence is John's child.\nEnclose the selected answer number in the <ANSWER> tag, for example: <ANSWER>1</ANSWER>.",1
2,child,"Given the family relationships:\n* Raymond is William's parent.\n* Denise is Raymond's parent.\nWhat is William's relationship to Raymond?\nSelect the correct answer:\n1. William is Raymond's parent.\n2. William is Raymond's child.\nEnclose the selected answer number in the <ANSWER> tag, for example: <ANSWER>1</ANSWER>.",1
3,child,"Given the family relationships:\n* Samantha is Elijah's parent.\n* Elijah is Joshua's parent.\nWhat is Joshua's relationship to Elijah?\nSelect the correct answer:\n1. Joshua is Elijah's parent.\n2. Joshua is Elijah's child.\nEnclose the selected answer number in the <ANSWER> tag, for example: <ANSWER>1</ANSWER>.",1
4,child,"Given the family relationships:\n* Anna is Charlotte's parent.\n* Marie is Anna's parent.\nWhat is Charlotte's relationship to Anna?\nSelect the correct answer:\n1. Charlotte is Anna's parent.\n2. Charlotte is Anna's child.\nEnclose the selected answer number in the <ANSWER> tag, for example: <ANSWER>1</ANSWER>.",1


In [10]:
gemini_farel_bench = farel_bench.copy()
gemini_farel_bench['gemini_output'] = None

In [39]:
gemini_line_generator(farel_bench, 60, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 61, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 62, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 63, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 64, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 65, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 66, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 67, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 68, gemini_farel_bench, message='Provide answers only.')
gemini_line_generator(farel_bench, 69, gemini_farel_bench, message='Provide answers only.')

<ANSWER>1</ANSWER>

<ANSWER>1</ANSWER>

<ANSWER>2</ANSWER>

<ANSWER>2</ANSWER>

<ANSWER>1</ANSWER>

<ANSWER>2</ANSWER>

<ANSWER>2</ANSWER>

<ANSWER>1</ANSWER>

<ANSWER>1</ANSWER>

<ANSWER>1</ANSWER>



In [40]:
gemini_farel_bench.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   topic          1000 non-null   object
 1   input          1000 non-null   object
 2   target         1000 non-null   object
 3   gemini_output  70 non-null     object
dtypes: object(4)
memory usage: 31.4+ KB


# WordCloud

In [None]:
text = build_text_from_questions(farel_bench, write_to = None, remove_stopwords = True)

In [None]:
wc = Wordcloud()
wc.generate01(benchmark_name="Farel-Bench", text=text, myStopwords = None, file_name = "wordcloud")

# Word Sense Disambiguation (WSD)

In [None]:
text = build_text_from_questions(farel_bench)

In [None]:
wsd_farel_bench = auto_wsd(farel_bench)

In [None]:
wsd_farel_bench

In [None]:
len(wsd_farel_bench)

In [None]:
get_synset_def('solution.n.02')

# Semantic Role Labeling (SRL)
Assigns roles to words in a sentence (who did what to whom, when, and how).
Example: "John gave Mary a book."
Agent (Who?): John
Action (What Happened?): Gave
Recipient (To Whom?): Mary
Object (What?): A book


Latent Semantic Analysis (LSA)
Captures hidden relationships between words in a large text corpus using Singular Value Decomposition (SVD).
Use Case: Document similarity, topic modeling.
Example:
"Car" and "Automobile" are grouped as related words based on their occurrences in different contexts.
Libraries: scikit-learn, gensim

# Word Embeddings (Word2Vec, GloVe, FastText)
- Represents words as dense vectors, capturing meaning based on usage.

- Libraries: gensim, spaCy

Example:
Word2Vec captures similarity:
vec("king") - vec("man") + vec("woman") ≈ vec("queen")