<a href="https://colab.research.google.com/github/athakur36/LLMs-and-Ego-Development/blob/main/LLMs_ego_dev.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Introduction and API keys

This notebook is taken from the following preprint: https://arxiv.org/abs/2302.02083

We have modified the code to suit our own study. The original code can be found in: https://colab.research.google.com/drive/1ZRtmw87CdA4xp24DNS_Ik_uA2ypaRnoU?usp=sharing

Please let me know if you spot any issues or can think of any improvements.

To run this code, you will need to get:
1. Your own API key from OpenAI account settings (https://platform.openai.com/account/api-keys)
2. Your own API key from HuggingFace account settings (https://huggingface.co/docs/huggingface_hub/how-to-inference)

##Set up the environment

In [None]:
!pip install openai==0.28

In [None]:
!pip install requests

In [4]:
import os
import openai
import numpy as np
from random import shuffle
import pandas as pd
import requests
from google.colab import userdata

openai_key = userdata.get('openai_key')
hugging_face_key = userdata.get('hugging_face_key')

In [5]:
#### Function for quering huggingface API for different versions of GPT-2
def hf(prompt, engine="gpt2", temperature=.01,max_tokens=50, top_p=0.95):
    query ={"inputs": prompt,
            "parameters": {"temperature": temperature,"do_sample":True,"top_p": top_p,
             "max_new_tokens": max_tokens, "max_time": 120},
            "options":{"wait_for_model":True}
            }
    API_URL = "https://api-inference.huggingface.co/models/"+engine
    headers = {"Authorization": "Bearer "+hugging_face_key}
    response = requests.post(API_URL, headers=headers, json=query)
    print(temperature)
    print(response.json())

    out = response.json()
    try:
        out = out[0]["generated_text"][len(prompt):]
    except KeyError:
        # Handle the KeyError by returning a descriptive message or handling the error
        out = "Error in API response"
    return out

In [6]:

# Function to generate responses using the OpenAI Completion API
def generate_response(prompt, max_tokens=50, engine="davinci", temperature= 0.02):
    """
    Generates a response based on the provided prompt.

    Args:
        prompt (str): The input prompt for text generation.
        max_tokens (int): The maximum number of tokens in the generated response.
        engine (str): The OpenAI engine to use for text generation.

    Returns:
        str: The generated text response.
    """
    print(temperature)
    if engine in ["gpt-4", "gpt-3.5-turbo", "gpt-4-1106-preview"]:
        completion = openai.ChatCompletion.create(
            model=engine,
            temperature= temperature,
            messages=[{"role": "system", "content": "Complete the following sentences:"}, {"role": "user", "content": prompt}],
            max_tokens=max_tokens
        )
        response = completion.choices[0].message["content"].strip()
    else:
        completion = openai.Completion.create(
            engine=engine,
            temperature= temperature,
            prompt=prompt,
            max_tokens=max_tokens
        )
        response = completion.choices[0].text.strip()

    return response



In [None]:
# checking the list of available models by openai
openai_api_key = openai_key

url = "https://api.openai.com/v1/models"
headers = {
    "Authorization": f"Bearer {openai_api_key}"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
    # If the request was successful
    models = response.json()
    # Extracting and printing only the 'id' of each model
    for model in models['data']:
        print(model['id'])
else:
    # If there was an error
    print("Error in API request:", response.status_code, response.text)



In [8]:
prompts = [
    "Raising a family",
    "When I am criticized",
    "Change is",
    "Education",
    "When people are helpless",
    "The past",
    "I just can’t stand people who",
    "Rules",
    "At times I worry about",
    "Privacy",
    "Sometimes I wish that",
    "A good boss"
]


In [None]:

# Define a list of prompts and a list of engines
temp_values=[0.0001]
max_lengths = [50]
data= []
#response_df = pd.DataFrame(columns=["Prompt", "Engine", "Response", "Temperature", "max_length"])
# supported engines list can be found at https://platform.openai.com/docs/guides/gpt
#engines_openai = ["babbage-002", "text-ada-001", "ada", "text-babbage-001", "babbage", "curie", "text-curie-001", "davinci-002", "davinci","gpt-4", "gpt-3.5-turbo", "gpt-4-1106-preview", "gpt-3.5-turbo-instruct"]
engines_openai = ["babbage-002",  "davinci-002", "gpt-4", "gpt-3.5-turbo", "gpt-4-1106-preview", "gpt-3.5-turbo-instruct"]


# Loop through prompts and engines to generate responses
for temp in temp_values:
  for max_length in max_lengths:
    for prompt in prompts:
      for engine in engines_openai:
      # print(engine)
        response = generate_response(prompt, max_tokens=max_length, engine=engine, temperature= temp)
        data.append({"Prompt": prompt, "Engine": engine, "Response": response, "Temperature": temp, "max_length": max_length})
response_df= pd.DataFrame(data)
file_name = f"GPT_responses_len_v1.4.xlsx"
response_df.to_excel(file_name, index=False)
response_df = response_df[0:0]



In [None]:
temp_values=[0.2]
max_lengths = [50]
data=[]
engines_hf = ["gpt2-medium", "gpt2", "gpt2-large", "gpt2-xl"]
for temp in temp_values:
  for max_length in max_lengths:
    for prompt in prompts:
      for engine in engines_hf:
        response = hf(prompt, engine, temp, max_length)
        data.append({"Prompt": prompt, "Engine": engine, "Response": response, "Temperature": temp, "max_length": max_length})
    # Save the DataFrame to an Excel file
response_df = pd.DataFrame(data)
file_name = f"GPT2_hf_responses_v1.4.xlsx"
response_df.to_excel(file_name, index=False)
response_df = response_df[0:0]

In [None]:
# accessing EleutherAI/gpt-neo-125M, EleutherAI/gpt-neo-2.7B, AI21Labs/jurassic-1-jumbo
from transformers import pipeline
generator = pipeline('text-generation', model='EleutherAI/gpt-j-6b')
generator("Raising a family", do_sample=True, max_length=50, temperature= 0.7)


In [None]:
!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [None]:
from transformers import GPTNeoForCausalLM, GPT2Tokenizer



In [None]:
model_name = "EleutherAI/gpt-neo-2.7B"
model = GPTNeoForCausalLM.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Set the pad token to be the same as the eos token
tokenizer.pad_token = tokenizer.eos_token

prompt = "Raising a family"
inputs = tokenizer(prompt, return_tensors="pt", padding="max_length", truncation=True, max_length=50)

# Create an attention mask for the inputs
attention_mask = inputs.attention_mask

# Set pad_token_id
model.config.pad_token_id = tokenizer.pad_token_id

output_sequences = model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=attention_mask,
    max_length=50,
    temperature=0.7,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    num_return_sequences=1
)

generated_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
print(generated_text)

In [None]:
import requests

temp_values=[0.0001]
max_lengths = [200, 500]

API_URL = "https://api-inference.huggingface.co/models/EleutherAI/polyglot-ko-3.8b"
headers = {"Authorization": f"Bearer {hugging_face_key}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()
for max_length in max_lengths:
  for temp in temp_values:
    for prompt in prompts:
      output = query({
          "inputs": prompt,
          "parameters": { "temperature": temp,"max_new_tokens": max_length},
      })
      #print(temp)
      print(output)
      response_df = response_df.append({"Prompt": prompt, "Engine": "bloom", "Response": output[0]['generated_text'], "Temperature": temp, "max_length": max_length}, ignore_index=True)
file_name = f"GPT_responses__polyglot_len_{max_length}_v1.4.xlsx"
response_df.to_excel(file_name, index=False)
response_df = response_df[0:0]  # Clear the DataFrame for the next max_length iteration