In [8]:
import requests
import json
import os
from dotenv import load_dotenv
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import MarianMTModel, MarianTokenizer
import faiss
from groq import Groq


In [2]:
# Load environment variables
load_dotenv()

# Groq API setup
GROQ_API = os.getenv("GROQ_API")

In [13]:
# Function to get response from Groq
def get_groq_response(prompt):
    client = Groq(
        api_key=GROQ_API,
    )

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="llama3-8b-8192",
    )

    print(chat_completion.choices[0].message.content)

In [14]:
# Example usage
prompt = "What is the NBA"
get_groq_response(prompt)

The National Basketball Association (NBA) is a professional sports organization that is home to the best basketball players in the world. It is the top-tier level of professional basketball in North America, comprising 30 teams from the United States and Canada. The NBA is considered one of the most popular professional sports leagues globally, with millions of fans worldwide.

Here are some key facts about the NBA:

1. **History**: The NBA was founded in 1946 as the Basketball Association of America (BAA). It merged with the National Basketball League (NBL) in 1949 to form the modern NBA.
2. **Season**: The NBA season typically runs from October to April, consisting of 82 games per team. The regular season is followed by the NBA playoffs, which culminate in the NBA Finals.
3. **Teams**: The league is divided into two conferences: the Eastern Conference and the Western Conference. There are three divisions within each conference, with five teams in each division.
4. **Players**: The NB

In [17]:
import json
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

# Load and preprocess data
with open('speech_total.json', 'r') as f:
    data = json.load(f)

# Assume 'data' is a list of Elon Musk's sentences
text = "\n".join(data)

# Save preprocessed data to a file
with open('elon_data.txt', 'w') as f:
    f.write(text)

# Load pre-trained model and tokenizer
model = GPT2LMHeadModel.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Prepare dataset
dataset = TextDataset(
    tokenizer=tokenizer,
    file_path='elon_data.txt',
    block_size=128
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Set up training arguments
training_args = TrainingArguments(
    output_dir="./elon_gpt2",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
)

# Start fine-tuning
trainer.train()

# Save the fine-tuned model and tokenizer
model_path = "./elon_gpt2"
trainer.save_model()
tokenizer.save_pretrained(model_path)  # Explicitly save the tokenizer

print(f"Model and tokenizer saved to {model_path}")



  0%|          | 0/1266 [00:00<?, ?it/s]

{'loss': 0.0, 'grad_norm': nan, 'learning_rate': 3.0252764612954187e-05, 'epoch': 1.18}
{'loss': 0.0, 'grad_norm': nan, 'learning_rate': 1.0505529225908373e-05, 'epoch': 2.37}
{'train_runtime': 2067.8996, 'train_samples_per_second': 2.445, 'train_steps_per_second': 0.612, 'train_loss': 0.0, 'epoch': 3.0}
Model and tokenizer saved to ./elon_gpt2


In [22]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

def rephrase_as_elon(text, model, tokenizer):
    if text is None or text.strip() == "":
        return "I couldn't get a response to rephrase. Let's talk about Mars instead!"
    
    input_ids = tokenizer.encode(text + " In Elon Musk's style: ", return_tensors='pt')
    with torch.no_grad():
        output = model.generate(input_ids, max_length=100, num_return_sequences=1, temperature=0.7)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Load your fine-tuned model and tokenizer
model_path = "./elon_gpt2"
try:
    model = GPT2LMHeadModel.from_pretrained(model_path)
    tokenizer = GPT2Tokenizer.from_pretrained(model_path)
    print("Successfully loaded the fine-tuned model and tokenizer.")
except Exception as e:
    print(f"Error loading fine-tuned model or tokenizer: {e}")
    print("Falling back to default GPT-2 model and tokenizer.")
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Get response from Groq API
prompt = "What is the NBA?"
groq_response = get_groq_response(prompt)

if groq_response is None or groq_response.strip() == "":
    print("Error: No response received from Groq API.")
    groq_response = "Space exploration is fascinating."  # Fallback response

# Rephrase the response
elon_style_response = rephrase_as_elon(groq_response, model, tokenizer)

print("Original response:", groq_response)
print("Elon-style response:", elon_style_response)

Successfully loaded the fine-tuned model and tokenizer.


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The National Basketball Association (NBA) is the premier professional basketball league in the world. It is composed of 30 teams from the United States and Canada, and is widely considered to be one of the most competitive and popular professional sports leagues globally.

The NBA was founded in 1946 as the Basketball Association of America (BAA), and it merged with the National Basketball League (NBL) in 1949 to form the modern NBA. The league has since grown to become one of the most watched and followed sports leagues in the world.

Here are some key facts about the NBA:

1. Number of teams: 30 teams, divided into two conferences: Eastern Conference (15 teams) and Western Conference (15 teams).
2. Players: The league consists of professional basketball players from around the world, with many coming from the United States, Canada, and Africa.
3. Schedule: The regular season typically runs from October to April, with each team playing 82 games.
4. Playoffs: The top teams from each co