In [3]:
import transformers
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline, set_seed
from gtts import gTTS
import os

# Running server
from flask import Flask, request, jsonify
from flask_cors import CORS
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor

### Model for creating and refining prompts

In [5]:
# Initialize the T5 model and tokenizer
t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')

def refine_prompt(user_input):
    # Encode the input text
    input_ids = t5_tokenizer.encode(f"refine: {user_input}", return_tensors="pt")
    # Generate refined prompt
    outputs = t5_model.generate(input_ids)
    # Decode the output text
    refined_prompt = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return refined_prompt

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


': I like coding!'

In [7]:
refine_prompt("Does the model even change this text?")

'refine: Does the model even change this text?'

### Model for Generating Textual Output, Captions

In [12]:
def substring_until_last_full_stop(text):
    last_full_stop_index = text.rfind('.')
    if last_full_stop_index != -1:
        return text[:last_full_stop_index + 1]
    return text  # Return the original text if no full stop is found

In [14]:
app = Flask(__name__)
CORS(app)

# Initialize the GPT-2 pipeline
set_seed(42)
text_generator = pipeline('text-generation', model='gpt2')

@app.route('/')
def home():
    return "Flask server is running!"

@app.route('/generate', methods=['POST'])
def generate():
    data = request.get_json()
    user_input=''
    user_input = data.get('user_input', '')

    # Create a prompt for GPT-2
    prompt = f"Generate a detailed description for the following scenario: {user_input}"
    print("This is the scenario:")
    print(prompt)

    # Generate text
    generated_text = text_generator(str(prompt), max_length=150, num_return_sequences=1)[0]['generated_text']

    print("This is the generated text from the model:")
    print(generated_text)

    print("This is the text after substringing it:")
    generated_text = substring_until_last_full_stop(generated_text)

    # Here, you can integrate video generation logic using generated_text
    # For now, we return the generated text as a placeholder
    return jsonify({'generated_text': generated_text})

if __name__ == '__main__':
    app.run(port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [08/Jun/2024 03:47:45] "OPTIONS /generate HTTP/1.1" 200 -
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


This is the scenario:
Generate a detailed description for the following scenario: Horses on a rack


127.0.0.1 - - [08/Jun/2024 03:48:01] "POST /generate HTTP/1.1" 200 -


This is the generated text from the model:
Generate a detailed description for the following scenario: Horses on a rack:

(Note that the horses are also not in any way being chased in the scene, i.e. no horses should be attacked). Also note:

The horses are not allowed to fly above the wall in any way. The same rule as in original scenario: no vehicles.

The horses may then run over each other up to the tower with both men and women.

The horses will not enter any of the following areas:


This can be avoided by attacking the entrances of the structures, or they are being blocked.

There may be no horses in any order, but with at least one (maybe two) to two (
This is the text after substringing it:
