In [1]:

import os
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, Trainer, TrainingArguments
from transformers import LineByLineTextDataset, DataCollatorForLanguageModeling
import numpy as np
from sklearn.metrics import accuracy_score


torch.cuda.empty_cache()
print(torch.cuda.memory_summary(device=None, abbreviated=False))



# Make sure CUDA operations are set for debugging. Remove in production.
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Define paths
train_data_file = "./train.csv"
output_dir = "./model_output"

# Check for the existence of the training data file
if not os.path.isfile(train_data_file):
    raise ValueError(f"Training data file not found: {train_data_file}")

# Set up the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the tokenizer and model
model_name = "gpt2-medium"  # You can also use "gpt2-medium", "gpt2-large", or "gpt2-xl"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token to EOS token
config = GPT2Config.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name, config=config)

# Resize model embeddings to match the new tokenizer size
model.resize_token_embeddings(len(tokenizer))

# Move model to the device
model.to(device)

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------------------|
| Active memory         |      0 B   |      0 B   |      0 B   |      0 B   |
|       from large pool |      0 B   |      0 B   |      0 B   |      0 B   |
|       from small pool |      0 B   |      0 B   |      0 B   |      0 B   |
|---------------------------------------------------------------

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50257, bias=False)
)

In [None]:
eval_data_file = "./eval.csv"
dataset = LineByLineTextDataset(
    tokenizer=tokenizer,
    file_path=train_data_file,
    block_size=128  # Adjusted block size, ensure it's less than the model's max input length
)

eval_dataset = LineByLineTextDataset(
    tokenizer=tokenizer,
    file_path=eval_data_file,
    block_size=128  # Make sure this is smaller than the model's max input length
)

# Prepare data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Define training arguments with additional optimizations
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    num_train_epochs=5,  # Reduced for a small dataset
    per_device_train_batch_size=2,  # Keep the reduced batch size for memory management
    save_strategy="epoch",  # Save at the end of each epoch
    logging_steps=1,  # Log more frequently due to fewer steps per epoch
    gradient_accumulation_steps=1,  # Adjust as needed
    fp16=True,  # Enable mixed precision if your GPU supports it
    evaluation_strategy="epoch"
)


# Initialize Trainer with compute_metrics function for accuracy
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    #compute_metrics=compute_metrics  # Compute accuracy per epoch
)

# Train and save the model
trainer.train()
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)




Epoch,Training Loss,Validation Loss


In [31]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained model and tokenizer
 # Replace with your actual model directory
model = GPT2LMHeadModel.from_pretrained(output_dir)
tokenizer = GPT2Tokenizer.from_pretrained(output_dir)
model.to(device)

def generate_text(prompt, max_length=50, temperature=0.5, top_k=50, top_p=0.5, num_beams=10, early_stopping=True, no_repeat_ngram_size=2, do_sample=True, num_return_sequences=1):
    # Encode the prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

    # Generate text
    generated_outputs = model.generate(
        input_ids,
        pad_token_id=tokenizer.eos_token_id,
        max_length=max_length,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        num_beams=num_beams,
        early_stopping=early_stopping,
        no_repeat_ngram_size=no_repeat_ngram_size,
        do_sample=do_sample,
        num_return_sequences=num_return_sequences
    )

    # Decode and print generated text
    for i, output in enumerate(generated_outputs):
        generated_text = tokenizer.decode(output, skip_special_tokens=True)
        print(f"Generated text {i+1}: {generated_text}")

# Text Generation Loop
while True:
    scan = input("Enter prompt (or 'Exit' to quit): ").strip()
    if scan.lower() == "exit":
        break

    # Generate and print text
    generate_text(scan)


Generated text 1: Tell me something about yourself?,"I am a software engineer by training. I am passionate about technology and have a keen interest in web technologies. My passion for technology led me to pursue a Master's in Computer Science from Northeastern University."What is


In [39]:
while True:
    scan = input("Enter prompt (or Exit' to quit): ").strip()
    if scan.lower() == "exit":
        break

    # Generate and print text
    generate_text(scan)

Generated text 1: Tell me sokmething about Priyesh?","Priyashree is a software engineer by training. He is passionate about technology and software engineering, and is dedicated to improving the quality of life for others."What is your approach to continuous


In [8]:
import requests

API_URL = "https://huggingface.co/priyesh2023/GPT"
headers = {"Authorization": "Bearer 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"}

payload = {
    "inputs": "Who is priyesh?",
    "parameters": {
        "max_length": 100,
        "temperature": 0.6,
        "top_k": 40,
        "top_p": 0.5,
        "num_beams": 5,
        "early_stopping": True,
        "no_repeat_ngram_size": 2,
        "do_sample": True,
        "num_return_sequences": 1
    }
}

response = requests.post(API_URL, headers=headers, json=payload)
result = response

print(result)


<Response [404]>


In [32]:
import requests

API_URL = "https://api-inference.huggingface.co/models/priyesh2023/GPT"
headers = {"Authorization": "Bearer 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"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

output = query({
	"inputs": "What is Priyesh's GPA?",
     "parameters": {
        "max_length": 75,
        "temperature": 0.4,
        "top_k": 30,
        "top_p": 0.4,
        "num_beams": 5,
        "early_stopping": True,
        "no_repeat_ngram_size": 2,
        "do_sample": True,
        "num_return_sequences": 1
    }
})
print(output)

[{'generated_text': 'What is Priyesh\'s GPA? Priyaesh has a GPA of 3.9 at Northeastern University."Priyash is pursuing a Master of Science in Computer Science at Northwestern University, where he is working on projects related to machine learning and data analysis." Priyoesh is passionate about technology, and is motivated by the challenges of using technology to solve problems."What'}]
