In [1]:
# prompt: Generative text model to generate coherent pharagraphs on specific topics

!pip install transformers

from transformers import pipeline

generator = pipeline('text-generation', model='gpt2')

def generate_paragraph(topic, max_length=100, num_return_sequences=1):
  """Generates a paragraph on a given topic using a generative text model.

  Args:
    topic: The topic to generate text about.
    max_length: The maximum length of the generated text.
    num_return_sequences: The number of paragraphs to generate.

  Returns:
    A list of generated paragraphs.
  """
  prompt = f"Write a coherent paragraph about {topic}."
  generated_text = generator(prompt, max_length=max_length, num_return_sequences=num_return_sequences)
  return [text['generated_text'] for text in generated_text]

# Example usage:
topic = "the importance of renewable energy"
paragraphs = generate_paragraph(topic, max_length=150, num_return_sequences=2)
for i, paragraph in enumerate(paragraphs):
  print(f"Paragraph {i+1}:\n{paragraph}\n")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=150) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Paragraph 1:
Write a coherent paragraph about the importance of renewable energy.

Petition to save energy from fossil fuels

The petition was launched last year by the International Renewable Energy Agency (IRENA) and International Energy Agency (IEA) and to include the following:

The role of the US government in combating climate change

The role of the European Union in supporting the United Nations Climate Council

The role of the United Nations to protect the environment

The role of the European Union in combating climate change

A statement that will be considered by the UN Security Council in November.

The petition will provide a forum for those who care about climate change to bring their voice to the UN.

"Climate change is an urgent and urgent threat to the existence of the planet. It is our responsibility to reduce it and to make sure that it doesn't continue – and that we do so with bold, pragmatic leadership," said Liza Le, executive director of the petition.

The UN Cl

In [2]:
# prompt: Generative text model to generate coherent pharagraphs on specific topics using LSTM

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Install TensorFlow if not already installed
try:
    import tensorflow as tf
except ImportError:
    !pip install tensorflow
    import tensorflow as tf

# Function to prepare data for LSTM
def prepare_data_for_lstm(text, sequence_length=50):
    """Prepares text data for LSTM training."""
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts([text])
    total_words = len(tokenizer.word_index) + 1

    input_sequences = []
    for line in text.split('\n'):
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)

    # Pad sequences
    max_sequence_len = max([len(x) for x in input_sequences])
    padded_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

    # Create predictors and labels
    xs, labels = padded_sequences[:,:-1], padded_sequences[:,-1]
    ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

    return xs, ys, tokenizer, max_sequence_len, total_words

# Function to build and train LSTM model
def build_and_train_lstm_model(xs, ys, total_words, max_sequence_len, epochs=50, embedding_dim=100):
    """Builds and trains an LSTM model for text generation."""
    model = Sequential()
    model.add(Embedding(total_words, embedding_dim, input_length=max_sequence_len-1))
    model.add(LSTM(150, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100))
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    history = model.fit(xs, ys, epochs=epochs, verbose=1)

    return model, history

# Function to generate text using the trained LSTM model
def generate_text_lstm(seed_text, next_words, model, max_sequence_len, tokenizer):
    """Generates text using a trained LSTM model."""
    output_text = seed_text
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
        output_text += " " + output_word
    return output_text

# Example of how to use the LSTM for text generation
# Replace this with your actual training data (a large text corpus related to your topics)
# For a real-world scenario, you would train this model on a large dataset of text.
sample_text = """
Renewable energy sources are essential for a sustainable future.
Solar power is generated from sunlight and is becoming increasingly affordable.
Wind energy utilizes wind turbines to convert wind into electricity.
Hydropower uses the energy of moving water to produce power.
Geothermal energy comes from the heat within the Earth.
These renewable sources help reduce greenhouse gas emissions and combat climate change.
Investing in renewable energy is crucial for protecting our planet.
Many countries are setting ambitious goals for renewable energy adoption.
The transition to clean energy creates new jobs and economic opportunities.
Battery storage technology is improving, making renewable energy more reliable.
Smart grids help manage the flow of electricity from diverse sources.
Community solar projects allow individuals to benefit from solar power.
Offshore wind farms are growing in capacity and efficiency.
Renewable energy is a key component of a resilient energy system.
Educating the public about renewable energy is important for widespread adoption.
"""

# Prepare data
xs, ys, tokenizer, max_sequence_len, total_words = prepare_data_for_lstm(sample_text)

# Build and train the model (Note: This will take time for larger datasets and more epochs)
# For a real-world application, train on a much larger corpus and potentially more epochs.
lstm_model, lstm_history = build_and_train_lstm_model(xs, ys, total_words, max_sequence_len, epochs=50)

# Function to generate a paragraph using the trained LSTM model
def generate_paragraph_lstm(seed_topic_words, num_words_to_generate=100, model=lstm_model, max_sequence_len=max_sequence_len, tokenizer=tokenizer):
    """Generates a paragraph using a trained LSTM model given a seed topic/words."""
    generated_paragraph = generate_text_lstm(seed_topic_words, num_words_to_generate, model, max_sequence_len, tokenizer)
    return generated_paragraph

# Example usage:
topic_seed = "renewable energy"
generated_paragraph = generate_paragraph_lstm(topic_seed, num_words_to_generate=150)
print(f"Generated Paragraph on '{topic_seed}':\n{generated_paragraph}\n")

topic_seed_2 = "climate change solutions"
# You would need to train the model on text related to "climate change solutions" for better results on this topic.
# For this example, we'll use the model trained on the renewable energy text, which might not be ideal.
generated_paragraph_2 = generate_paragraph_lstm(topic_seed_2, num_words_to_generate=150)
print(f"Generated Paragraph on '{topic_seed_2}':\n{generated_paragraph_2}\n")




Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 120ms/step - accuracy: 0.0683 - loss: 4.5842
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 108ms/step - accuracy: 0.0689 - loss: 4.5541
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step - accuracy: 0.0759 - loss: 4.4574
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 143ms/step - accuracy: 0.0680 - loss: 4.3603
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 168ms/step - accuracy: 0.1045 - loss: 4.2087
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.0754 - loss: 4.2011
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.0845 - loss: 4.1442
Epoch 8/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.1043 - loss: 4.1785
Epoch 9/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[