<a href="https://colab.research.google.com/github/fede-m/Illuscoder/blob/main/IllusCoder_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Things to install

# 1) Install ngrok to start Flask application
!pip install pyngrok

# 2) Summarize text
!pip install sentencepiece

# 3) Named Entity Recognition
!python -m spacy download en_core_web_md

# 4) Generate images
!pip install diffusers==0.8.0 transformers ftfy
!pip install accelerate

# 5) Modify images
!pip install Pillow

In [None]:
# Imports
import json

# Text summarizer
from transformers import pipeline
from nltk.tokenize import sent_tokenize
import nltk
nltk.download('punkt')
import random

# Named Entity Recognition
import spacy

# Stable Diffusion Models
from diffusers import DiffusionPipeline
from diffusers import DPMSolverMultistepScheduler
from transformers import pipeline
import torch

#check for gpu

if torch.cuda.is_available():
    device_name = torch.device("cuda")
else:
    device_name = torch.device('cpu')


#Image modification
from PIL import Image, ImageFont, ImageDraw
import textwrap

# Mount google colab to have access to the HTML, CSS and JavaScript files
from google.colab import drive
drive.mount('/content/drive')



In [None]:
# Text summarizer


summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
#tokenizer = PegasusTokenizer.from_pretrained("facebook/bart-large-cnn")
#model = PegasusForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

# Named Entity Recognition
nlp = spacy.load('en_core_web_md')


# Image Generation
pipe = DiffusionPipeline.from_pretrained("naclbit/trinart_characters_19.2m_stable_diffusion_v1")
#pipeline = DiffusionPipeline.from_pretrained("eimiss/EimisAnimeDiffusion_1.0v")

# Scheduler for Diffusion Model ---> fastest by now is DPMSolverMultistepScheduler (Stable_Diffusions_2)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
#pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)

# Generator --> Use the manual_seed attribute to select and fix a seed. This helps ensuring coherence in the style of the outputs
generator = torch.Generator().manual_seed(1)



In [None]:
# Define a palette according to the genre to ensure coherence in style
def set_mood(genre):
  palettes = {"fantasy": "light blue, grey and white", "adventure":"brown and orange", "romantic":"red, rose, bordeaux", "horror":"black, red, gray, white" }
  return palettes[genre]

In [None]:
# Summarize the text of the chapter to get only the most important parts of the story

def summarize_text(chapter, summarizer):

  '''
  param: chapter --> string with the current chapter text
  param: summarizer --> summarization model (BART)

  output: list of max 3 summarized sentences

  '''

  # Clean text (replace /n etc.)
  chapter.replace("\n", "")

  # Generate summary of the chapter
  summary = summarizer(chapter, max_length=130, min_length=30, do_sample=False)
  sents = summary[0]['summary_text']

  # Pick only 3 sentences (to generate only 3 images for every chapter)
  sentences = sent_tokenize(sents)
  if len(sentences) > 3:
    sentences = random.sample(sentences, 3)

  return sentences


In [None]:
# Find people involved in the story, places and actions
def named_entity_recognition(sentence, nlp):

  doc = nlp(sentence)
  per_entities = []
  loc_entities = []
  actions = ["","", "", ""]      #(verb, direct object, adverb, attribute)
  persons = []

  print(sentence)

  # extract characters and locations
  for ent in doc.ents:

    if ent.label_ == "PERSON":
      persons.append(ent)
    if ent.label_ == ("LOC" or "GPE" or "FAC"):
      loc_entities.append(ent)
  print(persons)

  # extract actions
  for token in doc:
    if token.dep_ == "nsubj":
      if len(persons)== 0:
        per_entities.append(token)
      else:                         #Check if Subject is a detected Person, include otherwise
          is_person = False
          for p in persons:
            if token in p:
              is_person = True
              per_entities.append(p)
          if is_person == False:
            per_entities.append(token)


    # Added negation
    if token.dep_ == "neg":
      actions[0] = token
    if token.dep_ == "ROOT":
      actions[1] = token
    if token.dep_ == "dobj":
      actions[3] = token
    if token.dep_ == "acomp" and actions[1].lemma_=="be":
      actions[2] = token
    if token.dep_ == "attr" and actions[1].lemma_=="be":
      actions[3] = token

    # for p in persons:
    #   if p not in per_entities and p not in actions[3]:
    #     per_entities.append(p)


  return per_entities, loc_entities, actions


In [None]:
# Prepare the prompts to feed the model with
def prepare_prompts(sentences, nlp, characters, time, genre):
  prompts = []
  # Set the colors for the mood
  palette = set_mood(genre)
  for sent in sentences:
    curr_prompt = ""
    char_entities, loc_entities, actions = named_entity_recognition(sent, nlp)

    # Add characters to prompt
    for character in char_entities:

      is_character = False
      for char in characters:
        if str(character) in char["name"]:
          curr_prompt += char["gender"] + ", " + char["hairColor"] + ", " + char["eyesColor"] + ", "
          is_character = True
      if not is_character:
        curr_prompt += str(character) + ", "

    # Add actions to prompt
    for action in actions:
      is_character = False
      for char in characters:
        if str(action) in char["name"].split():
          curr_prompt += char["gender"] + ", " + char["hairColor"] + ", " + char["eyesColor"] + ", "
          is_character = True
      if not is_character:
        curr_prompt += str(action) + " "

    # Add places to prompt
    for place in loc_entities:
          curr_prompt += ", " + str(place)
    curr_prompt += ", " +time

    # Add palette
    curr_prompt += ", " + palette

    prompts.append(curr_prompt)
  return prompts


In [None]:
def generate_images(prompts):
  images = []
  n_prompt = "lowres, NSFW, bad anatomy, bad hands, text, error, missing fingers, cropped, jpeg artifacts, worst quality, low quality, signature, watermark, blurry, deformed, extra ears, deformed, disfigured, mutation, censored"

  # Generate an image for each prompt
  for p in prompts:
    img =pipe(prompt=p, negative_prompt=n_prompt,generator=generator, num_inference_steps = 7).images[0]
    img.show()

    # Check if image is NSFW (in this case the image is completely black) and if so, generate a new image
    good_image = img.getbbox()
    while not good_image:
      print("NSFW content")
      img =pipe(prompt=p, negative_prompt=n_prompt,generator=generator, num_inference_steps = 7).images[0]
      good_image = img.getbbox()
    images.append(img)
  return images


In [None]:
def add_text(img, sent):
  """Adds a sentence on a white space at the bottom of the picture."""
  draw = ImageDraw.Draw(img)
  sent = textwrap.fill(sent, img.size[0]//6)      # Cut Sentence to the length of the picture
  text_h  = draw.textsize(sent)[1]       #get height of text
  new_size = (img.size[0], img.size[1]+text_h)
  new_image = Image.new('RGB', new_size, (255, 255, 255)) #slightly larger white background
  new_image.paste(img)

  #add text into white area
  h = new_image.size[1]                 #get height of image
  ImageDraw.Draw(new_image).text(( 0, h-text_h), sent, (0,0,0))


  return new_image

def make_gif(images, curr_chapter):
  """takes list of images and saves them as a gif"""
  frame_one = images[0]
  w, h = images[0].size
  ims = [i.resize((w, h)) for i in images]

  frame_one.save("/content/drive/MyDrive/app/static/images/img-"+curr_chapter+".gif", format="GIF", append_images=ims, save_all=True, duration=5000, loop=0)

  return






In [None]:
def handler(story_prompt):

  # 1) summarize chapter text
  summarized_text = summarize_text(story_prompt["chapter"], summarizer)

  # 2) prepare prompts --> extract the characters description, the background etc.
  prompts = prepare_prompts(summarized_text, nlp, story_prompt["characters"], story_prompt["time"], story_prompt["genre"])

  # 3) generate images (one image for each prompt)

  images = generate_images(prompts)

  # 4) add text to image
  images_with_text = []
  for img, sent in zip(images, summarized_text):
    images_with_text.append(add_text(img, sent))

  # 5) store them in /images folder on Google Drive

  make_gif(images_with_text, story_prompt["currChapter"])

  # Return the url with the GIF on google drive that can be added at the src element of the image on Javascript (does not work)
  img_name = "img-"+ story_prompt["currChapter"]+".gif"

  return img_name




In [None]:
from flask import Flask, render_template, request, jsonify, redirect
from pyngrok import ngrok
import os
import io

# Set paths for HTML, CSS and Javascript files
IMAGES_FOLDER = os.path.join('static', 'images')

app = Flask(__name__, template_folder="/content/drive/MyDrive/app/templates", static_folder="/content/drive/MyDrive/app/static")
app.config['UPLOAD_FOLDER'] = IMAGES_FOLDER
# Set ngrok token to start the server
ngrok_token = os.environ["NGROK_TOKEN"]
ngrok.set_auth_token(ngrok_token)

# Get a public URL that can be accessed from an external user
public_url = ngrok.connect(5000).public_url
print(public_url)

# Show the HTML template
@app.route('/')
def index():
    return render_template('index.html')

# Route to generate the image
@app.route('/generate_image', methods=['GET', 'POST'])
def generate_image():
  unpacked_json = request.get_json(force=True) # unpack the request in a python dictionary

  img_url = handler(unpacked_json) # Start the process
  img_name= "img-"+unpacked_json["currChapter"]+".gif"
  resp = jsonify(img_url = img_url, success=True)
  image_path = os.path.join(app.config['UPLOAD_FOLDER'], img_name)

  return render_template('index.html', gif_name=image_path)


app.run()