## Setup

In [2]:
import sys
sys.path.append('../../Modules/')
sys.path.append('../../Modules/Processors from Prof')
from Packages import *
from My_Json_processor import *
# from My_Utilities_processor import *

# Import processors from Prof
from ipynb.fs.full.Utilities import *
from ipynb.fs.full.Json_Processor import *
from ipynb.fs.full.CSV_Processor import *

## GROQ API - Testing

In [6]:
import os
from groq import Groq

In [None]:
print(os.environ['GROQ_API_KEY'])

In [7]:
client = Groq(
    api_key=os.environ['GROQ_API_KEY'],
)

In [None]:
chat_completion = client.chat.completions.create(
    messages=[
        {
            "role":"user",
            "content": main_df.iloc[20]['full_input']
        },
        {
            'role': 'system',
            'content': 'You only answer A, B, C, or D from the multiple choices.'
        }
    ],
    model = "llama3-8b-8192"
)

print(chat_completion.choices[0].message.content)

## Functions

### Main

In [3]:
"""
GEMINI

The function is to generate Gemini answers for the given data.

@params: data(initial dataframe), system_message(message for Gemini system)
@return: sample(dataframe with gemini_output column)
"""

def gemini_generator(data, system_message):

    import time
    total_requests = 0
    successful_requests = 0


    model=genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    system_instruction=system_message,
    )

    sample = data.copy()
    sample['gemini_output'] = None

    for i in range(len(sample)):
        success = False
        retries = 3

        while not success and retries > 0:
            try:
                total_requests += 1

                # Make API request
                response = model.generate_content(sample['input'][i])
                # print(response.text)
                sample.loc[i, 'gemini_output'] = response.text.strip()
                success = True
                successful_requests += 1
                time.sleep(5)

            except Exception as e:
                # print(f"Error: {e}")
                retries -= 1
                time.sleep(5)
                total_requests += 1

    print(f"Total requests made: {total_requests}")
    print(f"Successful requests: {successful_requests}")

    return sample

In [4]:
"""
The function is to export the dataframe to a csv file.

@params: folder_name(name of the folder), df(dataframe)
@return: True if the dataframe is exported successfully, False otherwise
"""

def df_to_csv(folder_name, df, file_name, output_col):
    base_path = r'/Users/ezishr/Library/CloudStorage/OneDrive-UniversityofCincinnati/Undergraduate Research/Check points'
    if (df[output_col].isnull().sum() == 0) or (df[output_col].isnull().sum() == 0):
        file_path = os.path.join(base_path, folder_name, f'{file_name}.csv')
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        df.to_csv(file_path, index=False)
        print(f"DataFrame exported successfully to {file_path}!")
    else:
        print("Export failed: DataFrame contains null values in 'gemini_output'.")
        return False

## My Testing - ARC-Challenge-Test.jsonl

### Get Dataset

In [5]:
arc_challenge = ARC_Challenge_Processor_my('ARC-Challenge','ARC-Challenge-Test.jsonl').convert_df()

In [6]:
arc_challenge['full_input'] = arc_challenge['input'] + " " + arc_challenge['choices']
arc_challenge.drop(columns=['input','choices'],inplace=True)
arc_challenge.rename(columns={'full_input':'input'},inplace=True)

In [None]:
arc_challenge.head()

In [None]:
arc_challenge.info()

In [None]:
arc_challenge[arc_challenge['target'].isin(['2', '4', '3', '1'])]

### GEMINI Applied

In [None]:
gemini_arc_challenge = gemini_arc_challenge = gemini_generator(arc_challenge, "You only answer A, B, C, D, or 1,2,3,4 from the provided multiple choices.")

In [None]:
gemini_arc_challenge.info()

In [None]:
gemini_arc_challenge

In [None]:
gemini_arc_challenge.info()

In [None]:
gemini_arc_challenge[gemini_arc_challenge['gemini_output'].isnull()]

In [14]:
gemini_arc_challenge.drop(index = 1171, inplace = True)

In [None]:
gemini_arc_challenge.info()

In [None]:
df_to_csv('ARC Challenge', gemini_arc_challenge, 'gemini_arc_challenge', 'gemini_output')

## My Testing - ARC-Challenge-Dev.jsonl

### Get Dataset

In [23]:
arc_dev = ARC_Challenge_Processor_my('ARC-Challenge','ARC-Challenge-Dev.jsonl').convert_df()

In [None]:
arc_dev.info()
arc_dev.head()

In [25]:
arc_dev['full_input'] = arc_dev['input'] + " " + arc_dev['choices']
arc_dev.drop(columns=['input','choices'],inplace=True)
arc_dev.rename(columns={'full_input':'input'},inplace=True)

### GEMINI Applied

In [None]:
gemini_arc_dev = gemini_generator(arc_dev, "You only answer A, B, C, D, or 1,2,3,4 from the provided multiple choices.")

gemini_arc_dev.info()

In [38]:
gemini_arc_dev.drop(index=298, inplace=True)

In [None]:
df_to_csv('ARC-Challenge', gemini_arc_dev, 'gemini_arc_dev', 'gemini_output')

## My Testing - ARC-Challenge-Train.jsonl

### Get Dataset

In [5]:
arc_train = ARC_Challenge_Processor_my('ARC-Challenge','ARC-Challenge-Train.jsonl').convert_df()

In [6]:
arc_train.drop(index = 1118, inplace = True)

In [7]:
arc_train['full_input'] = arc_train['input'] + " " + arc_train['choices']
arc_train.drop(columns=['input','choices'],inplace=True)
arc_train.rename(columns={'full_input':'input'},inplace=True)

### GEMINI Applied

In [8]:
arc_train['target'].unique()

array(['A', 'B', 'D', 'C', '2', '4', '1', '3'], dtype=object)

In [None]:
gemini_arc_train = gemini_generator(arc_train, "You only answer A, B, C, D, or 1,2,3,4 from the provided multiple choices.")

df_to_csv('ARC-Challenge', gemini_arc_train, 'gemini_arc_train', 'gemini_output')

# NOT DONE YET

In [10]:
arc_train.head()

Unnamed: 0,target,input
0,A,George wants to warm his hands quickly by rubbing them. Which skin surface will produce the most heat? A: dry palms\nB: wet palms\nC: palms covered with oil\nD: palms covered with lotion\n
1,B,Which of the following statements best explains why magnets usually stick to a refrigerator door? A: The refrigerator door is smooth.\nB: The refrigerator door contains iron.\nC: The refrigerator door is a good conductor.\nD: The refrigerator door has electric wires in it.\n
2,B,A fold observed in layers of sedimentary rock most likely resulted from the A: cooling of flowing magma.\nB: converging of crustal plates.\nC: deposition of river sediments.\nD: solution of carbonate minerals.\n
3,D,Which of these do scientists offer as the most recent explanation as to why many plants and animals died out at the end of the Mesozoic era? A: worldwide disease\nB: global mountain building\nC: rise of mammals that preyed upon plants and animals\nD: impact of an asteroid created dust that blocked the sunlight\n
4,B,A boat is acted on by a river current flowing north and by wind blowing on its sails. The boat travels northeast. In which direction is the wind most likely applying force to the sails of the boat? A: west\nB: east\nC: north\nD: south\n


In [11]:
arc_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1118 entries, 0 to 1117
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   target  1118 non-null   object
 1   input   1118 non-null   object
dtypes: object(2)
memory usage: 17.6+ KB


In [13]:
top250 = arc_train.loc[0:250,].copy()

In [14]:
top250.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251 entries, 0 to 250
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   target  251 non-null    object
 1   input   251 non-null    object
dtypes: object(2)
memory usage: 4.1+ KB


In [15]:
gemini_arc_train_top250 = gemini_generator(top250, "You only answer A, B, C, D, or 1,2,3,4 from the provided multiple choices.")

Total requests made: 264
Successful requests: 250


In [16]:
gemini_arc_train_top250.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251 entries, 0 to 250
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   target         251 non-null    object
 1   input          251 non-null    object
 2   gemini_output  250 non-null    object
dtypes: object(3)
memory usage: 6.0+ KB


In [18]:
gemini_arc_train_top250[gemini_arc_train_top250['gemini_output'].isnull()]

Unnamed: 0,target,input,gemini_output
191,B,"When people exercise, they often feel thirsty and begin to sweat. It is important for people to feel thirsty when exercising because it makes them realize that they should A: take a break\nB: consume liquids\nC: slow their breathing\nD: stop to eat something\n",


In [19]:
model=genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    system_instruction="You only answer A, B, C, D, or 1,2,3,4 from the provided multiple choices.",
)

response = model.generate_content(gemini_arc_train_top250['input'][191])

In [21]:
gemini_arc_train_top250.loc[191, 'gemini_output'] = response.text.strip()

In [22]:
df_to_csv("ARC-Challenge", gemini_arc_train_top250, "gemini_arc_train_top250", "gemini_output")

DataFrame exported successfully to /Users/ezishr/Library/CloudStorage/OneDrive-UniversityofCincinnati/Undergraduate Research/Check points/ARC-Challenge/gemini_arc_train_top250.csv!


In [26]:
top251_350 = arc_train.loc[251:350,].copy()

In [27]:
gemini_arc_train_top251_350 = gemini_generator(top251_350, "You only answer A, B, C, D, or 1,2,3,4 from the provided multiple choices.")

## NOT DONE

Total requests made: 600
Successful requests: 0
