In [1]:
import tensorflow as tf
from transformers import AutoTokenizer, TFBertForSequenceClassification, GPT2Tokenizer, TFGPT2LMHeadModel

In [2]:
# Load a BERT model pre-trained for sentiment analysis
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFBertForSequenceClassification.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/670M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at nlptown/bert-base-multilingual-uncased-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [3]:
# Load GPT-2 model and tokenizer for story generation
gpt_model_name = "gpt2"
gpt_tokenizer = GPT2Tokenizer.from_pretrained(gpt_model_name)
gpt_model = TFGPT2LMHeadModel.from_pretrained(gpt_model_name)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [4]:
# Read stories from "stories.txt"
with open("stories.txt", "r") as f:
    stories = f.readlines()

In [5]:
# Set batch size
batch_size = 8


In [6]:
# Function to create batches and pad them
def create_batches(stories, batch_size):
    for i in range(0, len(stories), batch_size):
        batch_stories = stories[i:i + batch_size]
        # Tokenize and pad each batch
        batch_inputs = tokenizer(batch_stories, padding=True, truncation=True, return_tensors="tf")
        yield batch_inputs


In [7]:
# Process each batch separately for sentiment analysis
sentiments = []
for batch_inputs in create_batches(stories, batch_size):
    # Generate predictions for the batch
    outputs = model(**batch_inputs)
    batch_predictions = tf.argmax(outputs.logits, axis=-1)
    sentiments.extend(batch_predictions.numpy())


# Now `sentiments` contains the sentiment labels for each story

In [8]:
# Count occurrences of each sentiment
sentiment_labels = ["very negative", "negative", "neutral", "positive", "very positive"]
sentiment_counts = {label: 0 for label in sentiment_labels}

for sentiment in sentiments:
    sentiment_counts[sentiment_labels[sentiment]] += 1

# Show sentiment counts
print("Sentiment Counts:")
for label, count in sentiment_counts.items():
    print(f"{label}: {count}")

Sentiment Counts:
very negative: 18
negative: 30
neutral: 17
positive: 130
very positive: 68


In [9]:
# Identify the highest count sentiment
highest_count_sentiment = max(sentiment_counts, key=sentiment_counts.get)


In [10]:
input_prompt = "<BOS> <adventure> Once upon a time"

In [12]:
def create_prompt_for_sentiment(sentiment):
    return (f"Generate a folktale with a ' {sentiment}' sentiment. "
            f"The story should clearly reflect this sentiment through its tone and theme, "
            f"and should be written from a first person perspective.")


In [13]:
def generate_story(prompt, max_length=250):
    print(f"Prompt for GPT-2: {prompt}")  # Debug print
    inputs = gpt_tokenizer(prompt, return_tensors="tf", truncation=True, max_length=512)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

# Generate story
    outputs = gpt_model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        num_return_sequences=1,
        pad_token_id=gpt_tokenizer.eos_token_id,
        no_repeat_ngram_size=2,
        top_p=0.95,
        temperature=0.7
    )
    print(f"Generated Story: {gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)}")  # Debug print
    return gpt_tokenizer.decode(outputs[0], skip_special_tokens=True)


In [18]:
from transformers import pipeline, TextGenerationPipeline, GPT2LMHeadModel, AutoTokenizer
model_name = "aspis/gpt2-genre-story-generation"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
# Input should be of format "<BOS> <Genre token> Optional starter text"
#input_prompt = "<BOS> <adventure>"
story = generator(input_prompt, max_length=200, do_sample=True,
               repetition_penalty=1.5, temperature=1.2,
               top_p=0.95, top_k=50)
print(story)

config.json:   0%|          | 0.00/907 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/510M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/250 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


[{'generated_text': "<BOS> <adventure> Once upon a time, you had been the luckiest man in America. You didn't like dancing around or eating all of your favorite hot food unless absolutely necessary—which you definitely did if they were on his schedule at home and your child was watching to win her share just because she wanted out...but who else would want that? Your mother seemed determined enough when it came down which way you went and this trip together with Bill got everything straight up as long old girls started talking about who he might be again! You never spoke much to Jack before joining him for dinner on Saturday night (which made breakfast even worse than he expected). In general during our friendshipmaking visit after last game having an ice cold beer while I watched them play hockey there one afternoon back-to school we talked until we weren't ready but then went outside hoping nothing more happened since what everyone told me wasn't true; my heart stopped racing often s

In [19]:
story[0]["generated_text"]

"<BOS> <adventure> Once upon a time, you had been the luckiest man in America. You didn't like dancing around or eating all of your favorite hot food unless absolutely necessary—which you definitely did if they were on his schedule at home and your child was watching to win her share just because she wanted out...but who else would want that? Your mother seemed determined enough when it came down which way you went and this trip together with Bill got everything straight up as long old girls started talking about who he might be again! You never spoke much to Jack before joining him for dinner on Saturday night (which made breakfast even worse than he expected). In general during our friendshipmaking visit after last game having an ice cold beer while I watched them play hockey there one afternoon back-to school we talked until we weren't ready but then went outside hoping nothing more happened since what everyone told me wasn't true; my heart stopped racing often so why hadn't things 