<a href="https://colab.research.google.com/github/athakur36/LLMs-and-Ego-Development/blob/main/LLM_Gemini_Bloom.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow 2.x
!pip install transformers

In [None]:
import tensorflow as tf
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [None]:
import transformers
from transformers import BloomForCausalLM
from transformers import BloomTokenizerFast
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
temp_values=[0.0001,0.2,0.5,0.8, 0.9]
max_lengths=[500]
response_df = pd.DataFrame(columns=["Prompt", "Engine", "Response", "Temperature", "max_length"])
prompts = [
    "Raising a family",
    "When I am criticized",
    "Change is",
    "Education",
    "When people are helpless",
    "The past",
    "I just can’t stand people who",
    "Rules",
    "At times I worry about",
    "Privacy",
    "Sometimes I wish that",
    "A good boss"
]

In [None]:
from transformers import TFGPT2LMHeadModel, GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2LMHeadModel.from_pretrained('gpt2', pad_token_id=tokenizer.eos_token_id)

In [None]:
def generate_text(prompt, max_length=100, num_return_sequences=1):
    # Encode the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors='tf')

    # Create the attention mask
    attention_mask = tf.ones(input_ids.shape, dtype=tf.int32)

    # Generate text with the GPT-2 model
    output = model.generate(
    input_ids,
    max_length=max_length,
    num_return_sequences=3,
    no_repeat_ngram_size=3,
    pad_token_id=model.config.pad_token_id,
    eos_token_id=model.config.eos_token_id,
    early_stopping=True,
    num_beams=5,
    )

    # Decode and return the generated text
    generated_text = [
        tokenizer.decode(output_sequence, skip_special_tokens=True)
        for output_sequence in output
    ]

    return generated_text


In [None]:
prompt = "When I am criticized"
generated_text = generate_text(prompt, max_length=200, num_return_sequences=3)

for idx, text in enumerate(generated_text):
    print(f"Generated text {idx + 1}:")
    print(text)

Here openai-gpt is using first generation of GPT associated with https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf

https://huggingface.co/openai-gpt

In [None]:
from google.colab import userdata
hf_api_key= userdata.get('hugging_face_key')

In [None]:
temp_values=[0.0001,0.2,0.3,0.5,0.7,0.8, 0.9, 1]
response_df = pd.DataFrame(columns=["Prompt", "Engine", "Response", "Temperature"])
prompts = [
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: Raising a family",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: When I am criticized",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: Change is",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: Education",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: When people are helpless",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: The past",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: I just can’t stand people who",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: Rules",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: At times I worry about",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: Privacy",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: Sometimes I wish that",
    "Complete the following sentence thoughtfully and honestly with your own words. There are no right or wrong response: A good boss"
]

In [None]:
model_blm_560m = BloomForCausalLM.from_pretrained("bigscience/bloomz-560m", token=hf_api_key)
tokenizer_blm_560m = BloomTokenizerFast.from_pretrained("bigscience/bloomz-560m", token=hf_api_key)



In [None]:
model_bloomz_1b1 = BloomForCausalLM.from_pretrained("bigscience/bloomz-1b1", token=hf_api_key)
tokenizer_bloomz_1b1 = BloomTokenizerFast.from_pretrained("bigscience/bloomz-1b1", token=hf_api_key)


In [None]:
tokenizer_gpt_j_6b = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b", token=hf_api_key)
model_gpt_j_6b = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b", token=hf_api_key)

In [None]:
model_bloomz_3b = BloomForCausalLM.from_pretrained("bigscience/bloomz-3b", token=hf_api_key)
tokenizer_bloomz_3b = BloomTokenizerFast.from_pretrained("bigscience/bloomz-3b", token=hf_api_key)


In [None]:
def generate_responses(model, tokenizer, engine_name, max_lengths, temp_values, prompts, pad_token_id=None, attention_mask=None, sample= False):
    response_df = pd.DataFrame(columns=["Prompt", "Engine", "Response", "Temperature", "max_length"])

    for max_length in max_lengths:
        for temp in temp_values:
            for prompt in prompts:
                inputs = tokenizer(prompt, return_tensors="pt")
                response = tokenizer.decode(model.generate(inputs["input_ids"],
                                                            max_length=max_length,
                                                            pad_token_id=pad_token_id,  # Set pad token ID if provided
                                                            attention_mask=attention_mask,  # Set attention mask if provided
                                                            temperature=temp,
                                                            do_sample=sample)[0])
                response_df = response_df.append({"Prompt": prompt, "Engine": engine_name, "Response": response, "Temperature": temp, "max_length": max_length}, ignore_index=True)

    return response_df

In [None]:
# Define models and tokenizers
models = {
    "gpt-j-6b": (model_gpt_j_6b, tokenizer_gpt_j_6b),
    #"bloomz_560m": (model_blm_560m, tokenizer_blm_560m),
    #"bloomz_1b1": (model_bloomz_1b1, tokenizer_bloomz_1b1),
    #"bloomz_3b": (model_bloomz_3b, tokenizer_bloomz_3b)
}

# Generate responses for each model
for engine_name, (model, tokenizer) in models.items():
    if engine_name == "gpt-j-6b":
        response_df = generate_responses(model, tokenizer, engine_name, max_lengths, temp_values, prompts, tokenizer.eos_token_id, sample=True)
    else:
        response_df = generate_responses(model, tokenizer, engine_name, max_lengths, temp_values, prompts)
    file_name = f"{engine_name}_responses_v1.4.xlsx"
    response_df.to_excel(file_name, index=False)

In [None]:
prompts = [
    "Raising a family",
    "When I am criticized",
    "Change is",
    "Education",
    "When people are helpless",
    "The past",
    "I just can’t stand people who",
    "Rules",
    "At times I worry about",
    "Privacy",
    "Sometimes I wish that",
    "A good boss"
]
temp_values=[0.0001]
response_df = pd.DataFrame(columns=["Prompt", "Engine", "Response", "Temperature", "max_length"])
tokenizer_gpt_j_6b = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b", token=hf_api_key)
model_gpt_j_6b = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b", token=hf_api_key)




In [None]:
response_df = response_df[0:0]
max_lengths= [200,500]
for temp in temp_values:
  for max_length in max_lengths:
    for prompt in prompts:
      inputs = tokenizer_gpt_j_6b.decode(prompt, return_tensors="pt")
      #print("success")
      response = tokenizer_gpt_j_6b.decode(model_gpt_j_6b.generate(inputs["input_ids"],
                          max_length=max_length,
                          temperature= temp,
                          do_sample= True
                          )[0])
      response_df = response_df.append({"Prompt": prompt, "Engine": "model_gpt_j_6b", "Response": response, "Temperature": temp, "max_length": max_length}, ignore_index=True)
  # Save the DataFrame to an Excel file
  file_name = f"Falcon_180B_responses_v1.4{max_length}.xlsx"
  response_df.to_excel(file_name, index=False)
  response_df = response_df[0:0]

In [None]:
import requests

temp_values=[0.0001, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1]
response_df = pd.DataFrame(columns=["Prompt", "Engine", "Response", "Temperature"])
prompts = [
    "Raising a family",
    "When I am criticized",
    "Change is",
    "Education",
    "When people are helpless",
    "The past",
    "I just can’t stand people who",
    "Rules",
    "At times I worry about",
    "Privacy",
    "Sometimes I wish that",
    "A good boss"
]

API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
#API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-neo-2.7B"
#API_URL = "https://api-inference.huggingface.co/models/EleutherAI/polyglot-ko-3.8b"
#API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-neo-2.7B"
#API_URL = "https://api-inference.huggingface.co/models/bigscience/bloomz-1b1"
headers = {"Authorization": "Bearer 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"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()
for temp in temp_values:
  for prompt in prompts:
    output = query({
        "inputs": prompt,
        "parameters": { "temperature": temp,"max_new_tokens": 800},
    })
    #print(temp)
    print(output)
    #response_df = response_df.append({"Prompt": prompt, "Engine": "bloom", "Response": output[0]['generated_text'], "Temperature": temp}, ignore_index=True)
#response_df.to_excel("responses__bloomz-3b_token_800_v1.4.xlsx", index=False)


Accessing google Palm2 api:
1. Follow the instructions mentioned in https://colab.research.google.com/github/google/generative-ai-docs/blob/main/site/en/tutorials/python_quickstart.ipynb#scrollTo=G-zBkueElVEO
to set up the environment
2. Use genai.generate_text instead of genai.generate_content to access pal2 models

In [None]:
# check quickly if the api key is working
!curl \
  -H 'Content-Type: application/json' \
  -d '{"contents":[{"parts":[{"text":"Write a story about a magic backpack"}]}]}' \
  -X POST https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key=GOOGLE_API_KEY

{
  "error": {
    "code": 400,
    "message": "API key not valid. Please pass a valid API key.",
    "status": "INVALID_ARGUMENT",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.ErrorInfo",
        "reason": "API_KEY_INVALID",
        "domain": "googleapis.com",
        "metadata": {
          "service": "generativelanguage.googleapis.com"
        }
      }
    ]
  }
}


In [None]:
!pip install -q -U google-generativeai

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/158.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.8/158.8 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display


In [None]:
# Used to securely store your API key
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-001
models/gemini-1.5-flash-latest
models/gemini-1.5-pro
models/gemini-1.5-pro-001
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [None]:
import pandas as pd
temp_values=[0.0001, 0.2, 0.5, 0.8]
response_df = pd.DataFrame(columns=["Prompt", "Engine", "Response", "Temperature"])
prompts = [
    "Complete the following sentence: Raising a family",
    "Complete the following sentence: When I am criticized",
    "Complete the following sentence: Change is",
    "Complete the following sentence: Education",
    "Complete the following sentence: When people are helpless",
    "Complete the following sentence: The past",
    "Complete the following sentence: I just can’t stand people who",
    "Complete the following sentence: Rules",
    "Complete the following sentence: At times I worry about",
    "Complete the following sentence: Privacy",
    "Complete the following sentence: Sometimes I wish that",
    "Complete the following sentence: A good boss"
]
#model_id="models/text-bison-001"
model = genai.GenerativeModel('gemini-1.5-flash')

for temp in temp_values:
  for prompt in prompts:
    generation_config = genai.GenerationConfig(
    stop_sequences = None,
    temperature=temp,
    max_output_tokens=200
  )
    try:
      response = model.generate_content(
      contents = prompt,
      generation_config=generation_config,
      stream=False,
    )
      response_text = response.text if response.parts else 'No valid response'
      print(response_text)
    except Exception as e:
      response_text = f"Error generating response: {str(e)}"
    response = model.generate_content(
    contents = prompt,
    generation_config=generation_config,
    stream=False,
  )
    #response=genai.generate_text( #for Palm2
    #    model=model_id,
    #   prompt=prompt,
    #    temperature=temp,
    #    max_output_tokens=200,
    #)
    #print(response.result)
    #print(response)
    new_row = pd.DataFrame([{
    "Prompt": prompt,
    "Engine": 'gemini-1.5-flash',
    "Response": response.text,
    "Temperature": temp
    }])
    response_df = pd.concat([response_df, new_row], ignore_index=True)
#print(response_df['Response'])
response_df.to_excel("LLM_responses__gemini_pro_200tokens_v1.4.xlsx", index=False)

In [2]:
from google.colab import userdata


In [None]:
!pip install anthropic

In [None]:
import anthropic
temp_values=[0.0001]
prompts = [
    "Complete the following sentence: Raising a family",
    "Complete the following sentence: When I am criticized",
    "Complete the following sentence: Change is",
    "Complete the following sentence: Education",
    "Complete the following sentence: When people are helpless",
    "Complete the following sentence: The past",
    "Complete the following sentence: I just can’t stand people who",
    "Complete the following sentence: Rules",
    "Complete the following sentence: At times I worry about",
    "Complete the following sentence: Privacy",
    "Complete the following sentence: Sometimes I wish that",
    "Complete the following sentence: A good boss"
]

client = anthropic.Anthropic(
    # defaults to os.environ.get("ANTHROPIC_API_KEY")
    api_key=userdata.get('anthropic_key'),
)
for temp in temp_values:
  for prompt in prompts:
    message = client.messages.create(
        model="claude-3-opus-20240229",
        max_tokens=200,
        temperature=temp,
        messages=[{
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt
                }
            ]
        }]
    )
    print(message.content[0].text)

In [None]:
! pip install mistralai

In [None]:
#https://colab.research.google.com/github/mistralai/cookbook/blob/main/quickstart.ipynb#scrollTo=e0eb939e-a7e6-42d9-a7ce-c61444c5dc62
# code reference is taken from above codebook
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
temp_values=[0.0001]
prompts = [
    "Complete the following sentence: Raising a family",
    "Complete the following sentence: When I am criticized",
    "Complete the following sentence: Change is",
    "Complete the following sentence: Education",
    "Complete the following sentence: When people are helpless",
    "Complete the following sentence: The past",
    "Complete the following sentence: I just can’t stand people who",
    "Complete the following sentence: Rules",
    "Complete the following sentence: At times I worry about",
    "Complete the following sentence: Privacy",
    "Complete the following sentence: Sometimes I wish that",
    "Complete the following sentence: A good boss"
]
api_key = userdata.get('mistral_key')
model = "mistral-large-latest"
client = MistralClient(api_key=api_key)

for temp in temp_values:
  for prompt in prompts:
    chat_response = client.chat(
    model=model,
    messages=[ChatMessage(role="user", content=prompt, temperature=temp, max_new_tokens=100)],
    temperature=temp,
    )

    print(chat_response.choices[0].message.content)