# Setup

In [1]:
import sys
sys.path.append('../../Modules/')
sys.path.append('../../Modules/Processors from Prof')
from Packages import *
from My_CSV_processor import *
from My_Json_processor import *
from Semantic_functions import *
from Wordcloud import *

# Import processors from Prof
from ipynb.fs.full.Utilities import *
from ipynb.fs.full.Json_Processor import *
from ipynb.fs.full.CSV_Processor import *

In [10]:
import os
from groq import Groq

In [11]:
import google.generativeai as genai
os.environ["GEMINI_API_KEY"] = "AIzaSyCM-GWMhMPoBZpvlXWqKr5nKnY02OIVdf4"
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Functions

In [2]:
"""
GEMINI

The function is to generate Gemini answers for the given data.

@params: data(initial dataframe), system_message(message for Gemini system)
@return: sample(dataframe with gemini_output column)
"""

def gemini_generator(data, system_message):

    import time
    total_requests = 0
    successful_requests = 0


    model=genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    system_instruction=system_message,
    )

    sample = data.copy()
    sample['gemini_output'] = None

    for i in range(len(sample)):
        success = False
        retries = 3

        while not success and retries > 0:
            try:
                total_requests += 1

                # Make API request
                response = model.generate_content(sample['input'][i])
                # print(response.text)
                sample.loc[i, 'gemini_output'] = response.text.strip()
                success = True
                successful_requests += 1
                time.sleep(5)

            except Exception as e:
                # print(f"Error: {e}")
                retries -= 1
                time.sleep(5)
                total_requests += 1

    print(f"Total requests made: {total_requests}")
    print(f"Successful requests: {successful_requests}")

    return sample

In [3]:
def gemini_line_generator(input_df, input_idx, output_df, message):
    model=genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        system_instruction= message,
    )

    response = model.generate_content(input_df.loc[input_idx, "input"])

    print(response.text)

    output_df.loc[input_idx,"gemini_output"] = response.text.strip()

In [4]:
from groq import Groq
os.environ["GROQ_API_KEY"] = "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"
client = Groq(
    api_key=os.environ['GROQ_API_KEY'],
)

"""
GROQ

The function is to generate GROQ answers for the given data.

@params: data(initial dataframe), system_message(message for Gemini system)
@return: sample(dataframe with gemini_output column)
"""

def groq(data, system_message, model_name):

    import time
    total_requests = 0
    successful_requests = 0
    client = Groq(api_key=os.environ['GROQ_API_KEY'],)

    sample = data.copy()
    sample[model_name] = None


    for i in range(len(sample)):
        success = False
        retries = 3

        while not success and retries > 0:
            try:
                total_requests += 1
                
                # Make a request to the GROQ API
                chat_completion = client.chat.completions.create(
                    messages=[
                        {
                            "role":"user",
                            "content": sample.loc[i, 'input']
                        },
                        {
                            'role': 'system',
                            'content': system_message
                        }
                    ],
                    model = model_name
                )

                response = chat_completion.choices[0].message.content

                sample.loc[i, model_name] = response.strip()
                success = True
                successful_requests += 1
                # print(response)
                time.sleep(5)

            except Exception as e:
                # print(f"Error: {e}")
                retries -= 1
                time.sleep(5)
                total_requests += 1

    print(f"Total requests made: {total_requests}")
    print(f"Successful requests: {successful_requests}")

    return sample


"""
GROQ

The function is to generate GROQ answers for the given data ROWS.

"""

def groq_line_generate(raw_dataset, output_dataset, start_idx, end_idx, system_message, model_name):
    sample = raw_dataset.loc[start_idx:end_idx, ].copy()
    sample.reset_index(drop=True, inplace=True)
    groq_sample = groq(sample, system_message, model_name)
    output_dataset.loc[start_idx:end_idx, "llama_output"] = groq_sample[model_name].values
    return output_dataset

# abstrac_algebra

## Get Dataset

In [5]:
abstract_algebra = MMLU_CSV_Processor_my('MMLU','abstract_algebra_test.csv').convert_df()

In [6]:
abstract_algebra.head()

Unnamed: 0,input,target
0,"Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q. \n0 \n4 \n2 \n6",B
1,"Let p = (1, 2, 5, 4)(2, 3) in S_5 . Find the index of <p> in S_5. \n8 \n2 \n24 \n120",C
2,"Find all zeros in the indicated finite field of the given polynomial with coefficients in that field. x^5 + 3x^3 + x^2 + 2x in Z_5 \n0 \n1 \n0,1 \n0,4",D
3,"Statement 1 | A factor group of a non-Abelian group is non-Abelian. Statement 2 | If K is a normal subgroup of H and H is a normal subgroup of G, then K is a normal subgroup of G. \nTrue, True \nFalse, False \nTrue, False \nFalse, True",B
4,"Find the product of the given polynomials in the given polynomial ring. f(x) = 4x - 5, g(x) = 2x^2 - 4x + 2 in Z_8[x]. \n2x^2 + 5 \n6x^2 + 4x + 6 \n0 \nx^2 + 1",B


In [7]:
abstract_algebra.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   input   100 non-null    object
 1   target  100 non-null    object
dtypes: object(2)
memory usage: 1.7+ KB


## GEMINI

In [None]:
gemini_abstract_algebra = abstract_algebra.copy()
gemini_abstract_algebra['gemini_output'] = None

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   input          100 non-null    object
 1   target         100 non-null    object
 2   gemini_output  0 non-null      object
dtypes: object(3)
memory usage: 2.5+ KB


In [35]:
gemini_abstract_algebra.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   input          100 non-null    object
 1   target         100 non-null    object
 2   gemini_output  100 non-null    object
dtypes: object(3)
memory usage: 2.5+ KB


In [None]:
gemini_line_generator(abstract_algebra, 90, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 91, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 92, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 93, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 94, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 95, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 96, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 97, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 98, gemini_abstract_algebra, message='Provide answers only')
gemini_line_generator(abstract_algebra, 99, gemini_abstract_algebra, message='Provide answers only')

In [28]:
gemini_abstract_algebra[gemini_abstract_algebra['gemini_output'].isnull()]

Unnamed: 0,input,target,gemini_output
70,"(Z,*) is a group with a*b = a+b+1 for all a, b in Z. The inverse of a is \n0 \n-2 \na-2 \n(2+a)*-1",D,
71,Find the degree for the given field extension Q(sqrt(2)*sqrt(3)) over Q. \n0 \n4 \n2 \n6,C,
72,"Using Fermat's theorem, find the remainder of 3^47 when it is divided by 23. \n1 \n2 \n3 \n4",D,
73,Find the characteristic of the ring Z_3 x Z_3. \n0 \n3 \n12 \n30,B,
74,"Statement 1 | If a and b are elements of a group and a is not equal to b, then a^2 is not equal to b^2. Statement 2 | The set of rotations in a dihedral group form a cyclic subgroup. \nTrue, True \nFalse, False \nTrue, False \nFalse, True",D,
...,...,...,...
95,"Statement 1 | If H is a subgroup of G and a belongs to G then aH is a subgroup of G if and only if a is in H. Statement 2 | If H is a subgroup of G and a and b belong to G then aH = bH if and only if ab is in H. \nTrue, True \nFalse, False \nTrue, False \nFalse, True",C,
96,"Find all zeros in the indicated finite field of the given polynomial with coefficients in that field. x^2 + 1 in Z_2 \n0 \n1 \n0,1 \n2",B,
97,Find the number of elements in the indicated cyclic group: The cyclic subgroup of Z_30 generated by 25. \n25 \n5 \n6 \n30,C,
98,"The element (4, 2) of Z_12 x Z_8 has order \n4 \n8 \n12 \n6",C,


In [36]:
gemini_abstract_algebra.to_csv('gemini_abstract_algebra.csv', index=False)

# anatomy_test

## Get Dataset

In [37]:
anatomy_test = MMLU_CSV_Processor_my('MMLU','anatomy_test.csv').convert_df()

In [38]:
anatomy_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135 entries, 0 to 134
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   input   135 non-null    object
 1   target  135 non-null    object
dtypes: object(2)
memory usage: 2.2+ KB


In [42]:
anatomy_test.loc[0, 'input']

'A lesion causing compression of the facial nerve at the stylomastoid foramen will cause ipsilateral \nparalysis of the facial muscles. \nparalysis of the facial muscles and loss of taste. \nparalysis of the facial muscles, loss of taste and lacrimation. \nparalysis of the facial muscles, loss of taste, lacrimation and decreased salivation.'

## GEMINI

In [43]:
gemini_anatomy = anatomy_test.copy()
gemini_anatomy['gemini_output'] = None

In [94]:
gemini_anatomy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135 entries, 0 to 134
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   input          135 non-null    object
 1   target         135 non-null    object
 2   gemini_output  135 non-null    object
dtypes: object(3)
memory usage: 3.3+ KB


In [93]:
gemini_anatomy[gemini_anatomy['gemini_output'].isnull()]

Unnamed: 0,input,target,gemini_output


In [None]:
gemini_line_generator(anatomy_test, 130, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(anatomy_test, 131, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(anatomy_test, 132, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(anatomy_test, 133, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(anatomy_test, 134, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
# gemini_line_generator(anatomy_test, 135, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
# gemini_line_generator(anatomy_test, 136, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
# gemini_line_generator(anatomy_test, 137, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
# gemini_line_generator(anatomy_test, 138, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
# gemini_line_generator(anatomy_test, 139, gemini_anatomy, message='Treat each options as A, B, C, D, etc. and provide answers only')

KeyError: 135

In [95]:
gemini_anatomy.to_csv('gemini_anatomy.csv', index=False)

# astronomy_test

## Get Dataset

In [96]:
astronomy_test = MMLU_CSV_Processor_my('MMLU','astronomy_test.csv').convert_df()
astronomy_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152 entries, 0 to 151
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   input   152 non-null    object
 1   target  152 non-null    object
dtypes: object(2)
memory usage: 2.5+ KB


In [97]:
astronomy_test.head()

Unnamed: 0,input,target
0,"What is true for a type-Ia (""type one-a"") supernova? \nThis type occurs in binary systems. \nThis type occurs in young galaxies. \nThis type produces gamma-ray bursts. \nThis type produces high amounts of X-rays.",A
1,If you know both the actual brightness of an object and its apparent brightness from your location then with no other information you can estimate: \nIts speed relative to you \nIts composition \nIts size \nIts distance from you,D
2,Why is the sky blue? \nBecause the molecules that compose the Earth's atmosphere have a blue-ish color. \nBecause the sky reflects the color of the Earth's oceans. \nBecause the atmosphere preferentially scatters short wavelengths. \nBecause the Earth's atmosphere preferentially absorbs all other colors.,C
3,You’ve made a scientific theory that there is an attractive force between all objects. When will your theory be proven to be correct? \nThe first time you drop a bowling ball and it falls to the ground proving your hypothesis. \nAfter you’ve repeated your experiment many times. \nYou can never prove your theory to be correct only “yet to be proven wrong”. \nWhen you and many others have tested the hypothesis.,C
4,Which of the following is/are true? \nTitan is the only outer solar system moon with a thick atmosphere \nTitan is the only outer solar system moon with evidence for recent geologic activity \nTitan's atmosphere is composed mostly of hydrocarbons \nA and D,D


## GEMINI

In [98]:
gemini_astronomy = astronomy_test.copy()
gemini_astronomy['gemini_output'] = None

In [145]:
gemini_astronomy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152 entries, 0 to 151
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   input          152 non-null    object
 1   target         152 non-null    object
 2   gemini_output  152 non-null    object
dtypes: object(3)
memory usage: 3.7+ KB


In [144]:
gemini_astronomy[gemini_astronomy['gemini_output'].isnull()]

Unnamed: 0,input,target,gemini_output


In [None]:
gemini_line_generator(astronomy_test, 140, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 141, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 142, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 143, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 144, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 145, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 146, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 147, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 148, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 149, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(astronomy_test, 150, gemini_astronomy, message='Treat each options as A, B, C, D, etc. and provide answers only')

B



In [146]:
gemini_astronomy.to_csv('gemini_astronomy.csv', index=False)

# business_ethics_test

## Get Dataset

In [147]:
business_ethics_test = MMLU_CSV_Processor_my('MMLU','business_ethics_test.csv').convert_df()
business_ethics_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   input   100 non-null    object
 1   target  100 non-null    object
dtypes: object(2)
memory usage: 1.7+ KB


## GEMINI

In [148]:
gemini_business_ethics_test = business_ethics_test.copy()
gemini_business_ethics_test['gemini_output'] = None

In [149]:
gemini_business_ethics_test.head()

Unnamed: 0,input,target,gemini_output
0,"_______ such as bitcoin are becoming increasingly mainstream and have a whole host of associated ethical implications, for example, they are______ and more ______. However, they have also been used to engage in _______. \nCryptocurrencies, Expensive, Secure, Financial Crime \nTraditional currency, Cheap, Unsecure, Charitable giving \nCryptocurrencies, Cheap, Secure, Financial crime \nTraditional currency, Expensive, Unsecure, Charitable giving",C,
1,"Typical advertising regulatory bodies suggest, for example that adverts must not: encourage _________, cause unnecessary ________ or _____, and must not cause _______ offence. \nUnsafe practices, Wants, Fear, Trivial \nUnsafe practices, Distress, Fear, Serious \nSafe practices, Wants, Jealousy, Trivial \nSafe practices, Distress, Jealousy, Serious",B,
2,"______ are the obligations of workers towards their employer, based on individual contracts and wider employment laws. \nEmployee rights \nEmployee rights \nEmployer duties \nEmployee duties",D,
3,"______ is an employee's preferred ratio between work-related and non-work-related activities which, due to intensification of work and technological shifts, has become a hotly contested issue in recent years. \nPresenteeism \nAbsenteeism \nWork-play balance \nWork-life balance",D,
4,"_______ can be a likened to their natural counterparts, comprising of a balanced network of interdependent organisms and their environments thus adding value to sustainability thinking due to the consideration of companies and industries as being bound together, and interdependent due to all kinds of resources and wastes. \nIndustrial supply loops \nIndustrial ecosystems \nEcological ecosystems \nCorporate ecosystems",B,


In [165]:
gemini_business_ethics_test[gemini_business_ethics_test['gemini_output'].isnull()]

Unnamed: 0,input,target,gemini_output
30,"Ethical issues related to marketing communications include the fact that they are _________ and unavoidable, they create ________ needs, reinforce _________ and perpetuate _________ and ______. \nIntrusive, Artificial, Consumerism, Insecurity, Social Stereotypes \nIntrusive, Artificial, De-materialism, Security, Happiness \nIntrusive, Real, Consumerism, Insecurity, Happiness \nIntrusive, Real, De-materialism, Security, Social stereotypes",A,
31,"In order to try and align the interest of shareholders and senior management of a company, _________ are often made available to _______ of a company, to ensure that it is in their interests to increase _______ and dividend performance. \nShare options, Senior managers, Share Price \nShare options, All employees, Share Price \nNon-financial remunerations, Senior managers, Employee Satisfaction \nNon-financial remunerations, All employees, Employee Satisfaction",A,
32,"Predatory pricing. \n1,2,4 \n1,2,3,4 \n1,2 \n1,4",D,
33,"There are a number of means by which can influence _______. These include _________to decision maker, varying from very direct modes to indirect modes, _________, being either public or private and ________ of communication. \nGovernment, Avenue of approach, Breadth of transmission, Content \nConsumers, Avenue of approach, Height of transmission, Content \nGovernment, Breath of transmission, Avenue of approach, Content \nEmployees, Breadth of transmission, Content, Avenue of approach",A,
34,Pick the correct description of the following term: Utilitarianism is… \nA theory which states that an action is morally incorrect if it promotes the greatest good for the greatest number \nA theory which states that an action is morally right if it promotes the greatest good for the decision maker \nA theory which states that an action is morally right if it promotes the greatest good for the smallest number \nA theory which states that an action is morally right if it promotes the greatest good for the greatest number,D,
...,...,...,...
95,"According to Evan and Freeman (1993), the Principle of ________ and the Principle of _______ can be used in a specific situation to determine who constitutes a _______. \nCorporate rights, Corporate effect, Stakeholder \nCorporate deniability, Corporate effect, Stakeholder \nCorporate rights, Corporate responsibility, Stakeholder \nCorporate rights, Corporate effect, Shareholder",A,
96,"The relationship between ethics and the law can be described as… \nIncredibly close, in fact, in all instances what is ethical is also what is legal and vice versa. \nNot close, ethics and the law share a small amount of overlap, but fundamentally address differing questions and themes. \nClose, as there is considerable overlap between ethics and law, but the two are not equivalent. \nNot at all close, ethics and the law are virtually incompatible.",C,
97,"In a business to business context, conflicts of interest can arise in two main ways: conflict of _______ and _______ interests, which might arise, for example, when a firm is hired as a supplier of professional services by another firm or conflict of _________ and ________ interests, such as where an individual's interests may conflict with that of their employer. \nProfessional, Organizational, Personal, Organizational \nProfessional, Individual, Personal, Organizational \nIndividual, Organizational, Organizational, Professional \nIndividual, Professional, Organizational, Personal",A,
98,"________ are a business ethics management tool that have grown over the last few decades. Areas which these typically cover are compliance, corruption and environmental issues. However, they are constantly evolving, with for example, the onus on __________ having grown in recent years. \nAnnual reports, Company financial performance \nAnnual report, Internet - social media and networking \nCodes of ethics, Company financial performance \nCodes of ethics, Internet - social media and networking",D,


In [166]:
gemini_line_generator(business_ethics_test, 30, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 31, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 32, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 33, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 34, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 35, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 36, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 37, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 38, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')
gemini_line_generator(business_ethics_test, 39, gemini_business_ethics_test, message='Treat each options as A, B, C, D, etc. and provide answers only')

A

A



ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).

In [167]:
gemini_business_ethics_test.to_csv('gemini_business_ethics_test_not_done.csv', index=False)