In [1]:
import pandas as pd

In [2]:
# Load the data
df = pd.read_csv('Data/train_essays/train_essays.csv')

# Display the data
df

Unnamed: 0,id,prompt_id,text,generated
0,0059830c,0,Cars. Cars have been around since they became ...,0
1,005db917,0,Transportation is a large necessity in most co...,0
2,008f63e3,0,"""America's love affair with it's vehicles seem...",0
3,00940276,0,How often do you ride in a car? Do you drive a...,0
4,00c39458,0,Cars are a wonderful thing. They are perhaps o...,0
...,...,...,...,...
1373,fe6ff9a5,1,There has been a fuss about the Elector Colleg...,0
1374,ff669174,0,Limiting car usage has many advantages. Such a...,0
1375,ffa247e0,0,There's a new trend that has been developing f...,0
1376,ffc237e9,0,As we all know cars are a big part of our soci...,0


In [3]:
# Load the data
df_prompts = pd.read_csv('Data/train_prompts.csv')

# Display the data
df_prompts

Unnamed: 0,prompt_id,prompt_name,instructions,source_text
0,0,Car-free cities,Write an explanatory essay to inform fellow ci...,"# In German Suburb, Life Goes On Without Cars ..."
1,1,Does the electoral college work?,Write a letter to your state senator in which ...,# What Is the Electoral College? by the Office...


In [4]:
import os
import gpt_2_simple as gpt2
import time

# Check if the model is already downloaded
if not os.path.isdir('models/355M'):
    # Maximum number of download attempts
    max_attempts = 5

    for attempt in range(max_attempts):
        try:
            gpt2.download_gpt2(model_name="355M")
            break  # If the download is successful, break the loop
        except Exception as e:
            print(f"Download attempt {attempt + 1} failed. Retrying in 5 seconds...")
            time.sleep(5)  # Wait for 5 seconds before the next attempt
else:
    print("Model already downloaded.")

In [None]:
import os

model_name = "355M"  # replace with the name of the model you downloaded
model_dir = os.path.join("models", model_name)

# Check if the model directory exists
if os.path.exists(model_dir):
    print(f"The directory {model_dir} exists.")
    
    # Check if the necessary files exist
    necessary_files = ["model.ckpt.data-00000-of-00001", "model.ckpt.index", "model.ckpt.meta", "checkpoint"]
    for file in necessary_files:
        if os.path.exists(os.path.join(model_dir, file)):
            print(f"The file {file} exists.")
        else:
            print(f"The file {file} does not exist.")
else:
    print(f"The directory {model_dir} does not exist.")

The directory models\355M exists.
The file model.ckpt.data-00000-of-00001 exists.
The file model.ckpt.index exists.
The file model.ckpt.meta exists.
The file checkpoint exists.


In [None]:
import tqdm

# Load the trained GPT-2 model
print("Starting TensorFlow session...")
sess = gpt2.start_tf_sess()
print("Loading GPT-2 model...")
gpt2.load_gpt2(sess, model_name='355M')
print("Model loaded successfully.")

# Function to generate text
def generate_text(prompt):
    print(f"Generating text for prompt: {prompt}")
    generated_text = gpt2.generate(sess, model_name='355M', prefix=prompt, return_as_list=True)[0]
    print(f"Generated text: {generated_text}")
    return generated_text

# Create a new DataFrame to store the synthetic data
df_synthetic = pd.DataFrame(columns=['id', 'prompt_id', 'text', 'generated'])
print("DataFrame for synthetic data created.")

# Loop over the unique prompts
for prompt_id, prompt in tqdm.tqdm(df_prompts[['prompt_id', 'instructions']].values, desc="Generating synthetic responses"):
    # Generate a synthetic response
    text = generate_text(prompt)
    
    # Append the generated response to the DataFrame
    new_data = pd.DataFrame({'id': ['synthetic'+str(prompt_id)], 'prompt_id': [prompt_id], 'text': [text], 'generated': [1]})
    df_synthetic = pd.concat([df_synthetic, new_data], ignore_index=True)

print("Synthetic responses generated and appended to DataFrame.")

# Save the synthetic data to a CSV file
df_synthetic.to_csv('Data/synthetic_data.csv', index=False)
print("Synthetic data saved to 'Data/synthetic_data.csv'.")

Starting TensorFlow session...
Loading GPT-2 model...
Loading pretrained model models\355M\model.ckpt
INFO:tensorflow:Restoring parameters from models\355M\model.ckpt
Model loaded successfully.
DataFrame for synthetic data created.


Generating synthetic responses:   0%|          | 0/2 [00:00<?, ?it/s]

Generating text for prompt: Write an explanatory essay to inform fellow citizens about the advantages of limiting car usage. Your essay must be based on ideas and information that can be found in the passage set. Manage your time carefully so that you can read the passages; plan your response; write your response; and revise and edit your response. Be sure to use evidence from multiple sources; and avoid overly relying on one source. Your response should be in the form of a multiparagraph essay. Write your essay in the space provided.


Generating synthetic responses:  50%|█████     | 1/2 [06:39<06:39, 399.57s/it]

Generated text: Write an explanatory essay to inform fellow citizens about the advantages of limiting car usage. Your essay must be based on ideas and information that can be found in the passage set. Manage your time carefully so that you can read the passages; plan your response; write your response; and revise and edit your response. Be sure to use evidence from multiple sources; and avoid overly relying on one source. Your response should be in the form of a multiparagraph essay. Write your essay in the space provided.

Statement of Purpose

Write a statement of purpose in case the State does not provide for basic transportation. The purpose of the Statement of Purpose should be to inform the public of the benefits of the proposed policy. The purpose of the Statement of Purpose should also be to inform the public of that the State will provide for basic transportation and to help them understand how to apply the policy to their daily lives. The purpose of the Statement of Purpose s

In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [None]:
import torch

if torch.cuda.is_available():
    print("GPU is available")
    device = torch.device("cuda")
else:
    print("GPU not available, using the CPU instead.")
    device = torch.device("cpu")

# create a tensor on GPU
x = torch.tensor([1.0], device=device)

GPU not available, using the CPU instead.
