# Generate Synthetic Taylor Swift Lyrics with Gretel GPT

* This notebook demonstrates how to use Gretel GPT to generate synthetic Taylor Swift lyrics.
* To run this notebook, you will need an API key from the [Gretel Console](https://console.gretel.ai/).

## Getting Started

In [None]:
%%capture
!pip install -U gretel-client

In [None]:
import pandas as pd

from gretel_client import configure_session
from gretel_client.helpers import poll
from gretel_client.projects import create_or_get_unique_project, get_project

In [None]:
# Log into Gretel
configure_session(api_key="prompt", cache="yes", endpoint="https://api.gretel.cloud", validate=True, clear=True)

pd.set_option('max_colwidth', None)

## Load and preview training data

In [None]:
# Specify a dataset to train on 
DATASET_PATH = 'https://gretel-public-website.s3.us-west-2.amazonaws.com/datasets/taylor_swift_lyrics/TaylorSwiftLyrics.csv' 
df = pd.read_csv(DATASET_PATH, usecols=['text'])

# Print human-friendly preview of training data
print(f"Training dataset contains {df.shape[0]} rows.")
print(f"Total training set length {sum([len(x) for x in df['text'].values])} bytes.")
print()
print("Example row:")
print(df['text'][0])

## Configure Model

In this example, we will finetune Gretel GPT to generate synthetic Taylor Swift lyrics.

In [None]:
from gretel_client.projects.models import read_model_config


def calc_epochs(num_rows, minutes=30) -> float:
    """Estimate the number of rows that can be trained within a time period"""
    rows_per_minute = 102.0
    return (rows_per_minute * minutes) / num_rows                
                      

config = read_model_config("synthetics/natural-language")
config['models'][0]['gpt_x']['pretrained_model'] = "gretelai/mpt-7b"
config['models'][0]['gpt_x']['epochs'] = calc_epochs(len(df))
config['models'][0]['gpt_x']['generate'] = {'num_records': 1}
config

## Train the synthetic model

In [None]:
%%time

# Designate project
PROJECT = 'taylor-swift-lyrics'
project = create_or_get_unique_project(name=PROJECT)

# Create and submit model
model = project.create_model_obj(model_config=config, data_source=df)
model.name = f"{PROJECT}-mpt-7b"
model.submit_cloud()

poll(model)

## Generate Lyrics

In [None]:
%%time

params={"maximum_text_length": 1000, "top_p": 0.95, "num_records": 1}

record_handler = model.create_record_handler_obj(params = params)
record_handler.submit_cloud()
poll(record_handler)

In [None]:
# View Results
gpt_output = pd.read_csv(record_handler.get_artifact_link("data"), compression='gzip')
print(gpt_output['text'][0])