# GPT for surname creation

https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf

In [2]:
import copy
import random
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import gradio as gr

from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.loggers.wandb import WandbLogger
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

import wandb

# Model

In [3]:
class NewGELU(nn.Module):
    """
    Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
    """
    def forward(self, x):
        return 0.5 * x * (1.0 + torch.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * torch.pow(x, 3.0))))

class GptAttention(nn.Module):
    """
    For this attention module k = v = q are all the same.
    It's for encoder/decoder only transfomers.
    """
    def __init__(self, config):
        super(GptAttention, self).__init__()
        self.config = config

        assert self.config["d_model"] % self.config["heads"] == 0
        self.heads = self.config["heads"]

        self.w_attn = nn.Linear(self.config["d_model"], 3*self.config["d_model"])
        self.head = nn.Linear(self.config["d_model"], self.config["d_model"])

        self.attn_dropout = nn.Dropout(config["attn_pdrop"])
        self.resid_dropout = nn.Dropout(config["resid_pdrop"])

        # causal mask to ensure that attention is only applied to the left in the input sequence
        self.register_buffer(
            "bias", 
            torch.tril(
                torch.ones(
                    self.config["window"], 
                    self.config["window"])
                ).view(1, 1, self.config["window"], self.config["window"])
        )
    
    def forward(self, x):
        B, window, embs = x.shape

        q, v, k = self.w_attn(x).split(self.config["d_model"], dim=2)

        # (B, heads, window, embs)
        q = q.view(
            B, 
            window, 
            self.config["heads"], 
            embs // self.config["heads"]
        ).transpose(1, 2)
        k = k.view(
            B, 
            window, 
            self.config["heads"], 
            embs // self.config["heads"]
        ).transpose(1, 2)
        v = v.view(
            B, 
            window, 
            self.config["heads"], 
            embs // self.config["heads"]
        ).transpose(1, 2)
        
        # Self-attend: (B, heads, window, embs) x (B, heads, embs, window) -> (B, heads, window, window)
        scores = q @ k.transpose(-2, -1) / math.sqrt(k.size(-1))
        mask = scores.masked_fill(self.bias[:,:,:window,:window] == 0, float('-inf'))
        probs = F.softmax(mask, dim=-1)
        attn = self.attn_dropout(probs)
        attn = probs @ v
        attn = attn.transpose(1, 2).contiguous().view(B, window, embs)

        return self.resid_dropout(self.head(attn))

class FeedForward(nn.Module):
    def __init__(self, config):
        super(FeedForward, self).__init__()
        self.l1 = nn.Linear(config["d_model"], 4*config["d_model"])
        self.l2 = nn.Linear(4*config["d_model"], config["d_model"])
        self.dropout = nn.Dropout(config["resid_pdrop"])

    def forward(self, x):
        x = NewGELU()(self.l1(x))
        return self.dropout(self.l2(x))

class Block(nn.Module):
    def __init__(self, config):
        super(Block, self).__init__()
        self.attn = GptAttention(config)
        self.norm1 = nn.LayerNorm(config["d_model"])
        self.ff = FeedForward(config)
        self.norm2 = nn.LayerNorm(config["d_model"])

    def forward(self, x):
        x = self.norm1(x + self.attn(x))
        x = self.norm2(x + self.ff(x))
        return x

# gpt_attn = GptAttention(heads, d_model)
# out = gpt_attn(enc_prompt)
# print(out.shape)

# b = Block(d_model, 3)
# out = b(emb_prompts)
# print(out.shape)

In [4]:
class GPT(nn.Module):
    def __init__(self, config):
        super(GPT, self).__init__()
        self.config = config

        self.vocab_emb = nn.Embedding(self.config["vocab"], self.config["d_model"])
        self.pos_emb = nn.Embedding(self.config["window"], self.config["d_model"])
        self.emb_dropout = nn.Dropout(config["embd_pdrop"])

        self.blocks = nn.ModuleList([Block(self.config) for _ in range(self.config["blocks"])])
        self.head_layer_norm = nn.LayerNorm(config["d_model"])
        self.head = nn.Linear(self.config["d_model"], self.config["vocab"])

        # init all weights, and apply a special scaled init to the residual projections, per GPT-2 paper
        self.apply(self._init_weights)
        for pn, p in self.named_parameters():
            if pn.endswith('c_proj.weight'):
                torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config["n_layer"]))

        # report number of parameters (note we don't count the decoder parameters in lm_head)
        n_params = sum(p.numel() for p in self.parameters())
        print("number of parameters: %.2fM" % (n_params/1e6,))

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
        elif isinstance(module, nn.LayerNorm):
            torch.nn.init.zeros_(module.bias)
            torch.nn.init.ones_(module.weight)

    def forward(self, x):
        vocab_emb = self.vocab_emb(x)
        pos_emb = self.pos_emb(torch.arange(0, x.shape[1], dtype=torch.long, device=x.device))

        x = self.emb_dropout(vocab_emb + pos_emb)

        for b in self.blocks:
            x = b(x)

        x = self.head_layer_norm(x)
        x = self.head(x)

        return x

    def configure_opt(self):
        p_decay = set()
        p_no_decay = set()
        whitelist_weight_modules = (torch.nn.Linear, )
        blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
        for mn, m in self.named_modules():
            for pn, p in m.named_parameters():
                fpn = '%s.%s' % (mn, pn) if mn else pn # full param name
                # random note: because named_modules and named_parameters are recursive
                # we will see the same tensors p many many times. but doing it this way
                # allows us to know which parent module any tensor p belongs to...
                if pn.endswith('bias'):
                    # all biases will not be decayed
                    p_no_decay.add(fpn)
                elif pn.endswith('weight') and isinstance(m, whitelist_weight_modules):
                    # weights of whitelist modules will be weight decayed
                    p_decay.add(fpn)
                elif pn.endswith('weight') and isinstance(m, blacklist_weight_modules):
                    # weights of blacklist modules will NOT be weight decayed
                    p_no_decay.add(fpn)

        # validate that we considered every parameter
        param_dict = {pn: p for pn, p in self.named_parameters()}
        inter_params = p_decay & p_no_decay
        union_params = p_decay | p_no_decay
        assert len(inter_params) == 0, "parameters %s made it into both decay/no_decay sets!" % (str(inter_params), )
        assert len(param_dict.keys() - union_params) == 0, "parameters %s were not separated into either decay/no_decay set!" \
                                                    % (str(param_dict.keys() - union_params), )

        # create the pytorch optimizer object
        optim_groups = [
            {"params": [param_dict[pn] for pn in sorted(list(p_decay))], "weight_decay": self.config["weight_decay"]},
            {"params": [param_dict[pn] for pn in sorted(list(p_no_decay))], "weight_decay": 0.0},
        ]
        optimizer = torch.optim.AdamW(
            optim_groups, 
            lr=self.config["lr"], 
            betas=(self.config["b1"], self.config["b2"])
        )
        return optimizer

    def sample_char(self, x):
        logits = self(x)
        probs = F.softmax(logits[:,-1,:], dim=1)
        return torch.multinomial(probs, num_samples=1).item()


# Data Loader for names

In [5]:
class NameDataLoader():
    """
    Creates a dataset based on a list of names. All letters will be shifted by one when comparing input and output.

    Examples:

    Basic usage
    - prepend_syllables=False
    - prepend_num_last_chars =0
    Name = 'jef'
    x[0] --> y[0] = .......... --> .........j
    x[1] --> y[1] = .........j --> ........je
    x[2] --> y[2] = ........je --> .......jef
    x[3] --> y[3] = .......jef --> ......jef.

    syllables
    NOTE: 'jef' has 1 syllable, and itos[1] = ' '
    - prepend_syllables=True
    - prepend_num_last_chars=0
    Name = 'jef'
    x[0] --> y[0] = ........ . --> ....... .j
    x[1] --> y[1] = ....... .j --> ...... .je
    x[2] --> y[2] = ...... .je --> ..... .jef
    x[3] --> y[3] = ..... .jef --> .... .jef.

    prepend_num_last_chars
    - prepend_syllables=True
    - prepend_num_last_chars=2
    Name = 'jef'
    x[0] --> y[0] = .....ef.1. --> ....ef.1.j
    x[1] --> y[1] = ....ef.1.j --> ...ef.1.je
    x[2] --> y[2] = ...ef.1.je --> ..ef.1.jef
    x[3] --> y[3] = ..ef.1.jef --> .ef.1.jef.

    NOTE: Data Loader can be extended with other dataloaders for extreme data augmentation!

    """
    def __init__(self, words, window, stoi, itos, prepend_syllables=False, prepend_num_last_chars=0):
        self.stoi = stoi
        self.itos = itos
        self.window = window
        self.prepend_syllables = prepend_syllables
        self.prepend_num_last_chars = prepend_num_last_chars

        self.X, self.Y = self._build_dataset(words)

    def __getitem__(self, index: int):
        return self.X[index], self.Y[index]

    def __len__(self) -> int:
        return len(self.Y)
        
    def _count_syllables(self, word):
        word = word.lower()
        count = 0
        vowels = "aeiouy"
        if word[0] in vowels:
            count += 1

        # NOTE: this will break if there are 3 vowels in a row
        for index in range(1, len(word)):
            if word[index] in vowels and word[index - 1] not in vowels:
                count += 1
        if word.endswith("e"):
            count -= 1
        if count == 0:
            count += 1
        return count

    def _build_dataset(self, words):
        x, y = [], []

        for name in words:
            ctx = [0] * self.window

            if (self.prepend_num_last_chars > 0):
                """
                Add final chars to the end of the buffer
                """
                ori_len = len(name)

                ctx.pop(0)
                ctx.append(self.stoi['.'])

                for i in reversed(range(1, min(self.prepend_num_last_chars+1, len(name)+1))):
                    ctx.pop(0)
                    ctx.append(self.stoi[name[-i]])

                ctx.pop(0)
                ctx.append(self.stoi['.'])

            if self.prepend_syllables:
                num_syllables = self._count_syllables(name)
                ctx.pop(0)
                ctx.pop(0)
                ctx.append(num_syllables)
                ctx.append(self.stoi['.'])

                # name = self.itos[num_syllables] + '.' + name

            for c in (name + '.'):
                x.append(copy.deepcopy(ctx))
                ctx.pop(0)
                ctx.append(self.stoi[c])
                y.append(copy.deepcopy(ctx))
                
        return torch.tensor(x), torch.tensor(y)

    def cat(self, new_data_loader):
        self.X = torch.cat((self.X, new_data_loader.X))
        self.Y = torch.cat((self.Y, new_data_loader.Y))

    def debug_print(self, i_start, i_end):
        for i in range(i_start, i_end):
            print("".join([self.itos[c.item()] for c in self.X[i]]) + " --> " + "".join([self.itos[c.item()] for c in self.Y[i]]))


class NameData():
    def __init__(self, name_txt_path, window, num_final_chars):

        self.window = window
        self.names = open(name_txt_path, 'r').read().splitlines()

        self.stoi, self.itos = self._make_stoi_and_itos(self.names)

        random.seed(42)
        random.shuffle(self.names)
        n1 = int(0.8*len(self.names))
        n2 = int(0.9*len(self.names))

        self.train, self.dev, self.test = self.make_data_loaders(n1, n2, prepend_syllables=False, prepend_num_last_chars=0)
        for i in range(1, num_final_chars + 1):
            self.cat_data_loaders(n1, n2, prepend_syllables=False, prepend_num_last_chars=i)

    def cat_data_loaders(self, n1, n2, prepend_syllables, prepend_num_last_chars):
        new_train, new_dev, new_test = self.make_data_loaders(n1, n2, prepend_syllables, prepend_num_last_chars)
        self.train.cat(new_train)
        self.dev.cat(new_dev)
        self.test.cat(new_test)

    def make_data_loaders(self, n1, n2, prepend_syllables=False, prepend_num_last_chars=0):
        train = NameDataLoader(self.names[:n1], self.window, self.stoi, self.itos, prepend_syllables, prepend_num_last_chars)
        dev = NameDataLoader(self.names[n1:n2], self.window, self.stoi, self.itos, prepend_syllables, prepend_num_last_chars)
        test = NameDataLoader(self.names[n2:], self.window, self.stoi, self.itos, prepend_syllables, prepend_num_last_chars)
        return train, dev, test

    def _make_stoi_and_itos(self, names):
        ## functions to convert chars to int and inverse
        chars = sorted(list(set(''.join(names))))
        stoi = {s:i+1 for i,s in enumerate(chars)}

        # . is both "before start" in X, and "im done" for Y
        stoi['.'] = 0
        itos = {s:i for i,s in stoi.items()}

        return stoi, itos

    def stoi(self, char):
        return self.stoi[char]
    
    def itos(self, i):
        return self.itos[i]

    def vocab(self):
        return len(self.stoi)

    def train_data_loader(self):
        return self.train

    def test_data_loader(self):
        return self.test

    def val_data_loader(self):
        return self.dev

# lightning framework

In [7]:
class LitSurnames(LightningModule):
    def __init__(self, config, data_file):
        super().__init__()

        self.config = config

        self.data = NameData(
            data_file, 
            self.config["window"], 
            self.config["num_final_chars_in_dataset"]
        )
        self.config["vocab"] = self.data.vocab()

        self.model = GPT(config)

    def forward(self, x):
        return self.model(x)

    def loss(self, batch):
        x, y = batch
        logits = self(x)
        return F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1), ignore_index=-1)

    def training_step(self, batch, batch_idx):
        loss = self.loss(batch)
        self.log('tr_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        loss = self.loss(batch)
        self.log('test_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        loss = self.loss(batch)
        self.log('val_loss', loss)
        return loss

    def configure_optimizers(self):
        return self.model.configure_opt()

    def generate_name(self, first_chars = "", final_chars = ""):
        self.eval()

        name = ""
        ctx = [0] * self.config['window']

        # Put final chars in context
        if len(final_chars) > self.config["num_final_chars_in_dataset"]:
            final_chars = final_chars[-self.config["num_final_chars_in_dataset"]:]
            print("Only accepts up to " + str(self.config["num_final_chars_in_dataset"]) + " final chars. Using: " + final_chars)
                
        for c in final_chars:
            ctx = ctx[1:] + [self.data.stoi[c]]
        ctx = ctx[1:] + [self.data.stoi['.']]

        # put first chars both in name and context
        for c in first_chars:
            name += c
            ctx = ctx[1:] + [self.data.stoi[c]]

        # Run inference to finish off the name!
        for _ in range(80):
            logits = self(torch.tensor(ctx).view(1, -1))
            probs = F.softmax(logits[:,-1,:], dim=1)
            ix = torch.multinomial(probs, num_samples=1).item()

            ctx = ctx[1:] + [ix]
            name += self.data.itos[ix]
            if ix == 0:
                break

        self.train()

        return name

    ####################
    # DATA RELATED HOOKS
    ####################

    def prepare_data(self):
        pass

    def setup(self, stage=None):
        pass

    def train_dataloader(self):
        return DataLoader(
            self.data.train_data_loader(), 
            batch_size=self.config["batch_size"],
            shuffle=True,
            # num_workers = config["num_workers"]
        )

    def test_dataloader(self):
        return DataLoader(
            self.data.test_data_loader(), 
            batch_size=self.config["batch_size"],
            shuffle=True,
        )

    def val_dataloader(self):
        return DataLoader(
            self.data.val_data_loader(), 
            batch_size=self.config["batch_size"],
            shuffle=True,
        )


# Hyperparameter

In [6]:
config = {
    # either model_type or (n_layer, n_head, n_embd) must be given in the config
    "model_type": 'gpt',

    # Window must remain the same for the losses to make sense!!
    "window": 32,

    ## Tiny network, for smoke testing
    # "blocks": 3,
    # "heads": 1,
    # "d_model":  4,

    ## Pico network
    "blocks": 1,
    "heads": 3,
    "d_model": 48,

    ## Nano network
    # "blocks": 3,
    # "heads": 3,
    # "d_model": 48,

    ## Micro
    # "blocks": 6,
    # "heads": 4,
    # "d_model": 128,

    ## Mini
    # "blocks": 6,
    # "heads": 6,
    # "d_model": 192,

    ## gpt
    # "blocks": 12,
    # "heads": 12,
    # "d_model":768,

    "weight_decay": 0.1,
    "lr": 3e-4,
    # "lr": 5e-4,
    "b1": 0.9,
    "b2": 0.95,

    # these options must be filled in externally
    "vocab": None,

    # Dropout hyperparameters
    "embd_pdrop": 0.1,
    "resid_pdrop": 0.1,
    "attn_pdrop": 0.1,

    # Training parameters
    "batch_size": 1024,
    "num_workers": 4,
    "epochs": 500000,

    # Dataset parameters
    "num_final_chars_in_dataset": 2,
}


# Run Training!

In [8]:
lit_surname = LitSurnames(config, 'surnames.txt')

number of parameters: 0.03M


In [10]:
print(lit_surname.data.train.X.shape)
lit_surname.data.train.debug_print(3825900, 3825924)

torch.Size([3825924, 32])
.........................hl.gohl --> ........................hl.gohl.
.............................eh. --> ............................eh.j
............................eh.j --> ...........................eh.ja
...........................eh.ja --> ..........................eh.jan
..........................eh.jan --> .........................eh.jann
.........................eh.jann --> ........................eh.janne
........................eh.janne --> .......................eh.janneh
.......................eh.janneh --> ......................eh.janneh.
.............................rd. --> ............................rd.p
............................rd.p --> ...........................rd.pa
...........................rd.pa --> ..........................rd.pai
..........................rd.pai --> .........................rd.pail
.........................rd.pail --> ........................rd.paill
........................rd.paill --> .......................rd.p

In [11]:
use_wandb = False
if use_wandb:
    run = wandb.init(project="surnamerator", reinit=True)
    logger = WandbLogger()
else:
    import os
    logger = TensorBoardLogger(save_dir=os.getcwd(), version=1, name="lightning_logs")

lr_monitor = LearningRateMonitor(logging_interval='step')
checkpoint = ModelCheckpoint(
    dirpath="./",
    filename='gpt-surnames-{epoch:02d}')

trainer = Trainer(
    accelerator="mps",
    devices=1,
    max_epochs=config["epochs"],
    logger=logger,
    callbacks=[lr_monitor, checkpoint]
)

trainer.fit(lit_surname)

if use_wandb:
    run.finish()

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name  | Type | Params
-------------------------------
0 | model | GPT  | 32.8 K
-------------------------------
32.8 K    Trainable params
0         Non-trainable params
32.8 K    Total params
0.131     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [10]:
## Save model and params
import yaml

config["stoi"] = lit_surname.data.stoi

with open('gpt_config.yaml', 'w') as file:
    yaml.dump(config, file)
    
print(open('gpt_config.yaml').read())

# Need to save w/ cpu because spaces doesn't support device('mps')
torch.save(lit_surname.model.to(torch.device("cpu")).state_dict(), "micro_gpt_weights.pt")

attn_pdrop: 0.1
b1: 0.9
b2: 0.95
batch_size: 1024
blocks: 6
d_model: 128
embd_pdrop: 0.1
epochs: 500000
heads: 4
lr: 0.0003
model_type: gpt
num_workers: 4
resid_pdrop: 0.1
stoi:
  ' ': 1
  '''': 2
  '-': 3
  .: 0
  a: 4
  b: 5
  c: 6
  d: 7
  e: 8
  f: 9
  g: 10
  h: 11
  i: 12
  j: 13
  k: 14
  l: 15
  m: 16
  n: 17
  o: 18
  p: 19
  q: 20
  r: 21
  s: 22
  t: 23
  u: 24
  v: 25
  w: 26
  x: 27
  y: 28
  z: 29
vocab: 30
weight_decay: 0.1
window: 32



# View results

### See what the model does at each step of inference

In [67]:
name = ""
ctx = [0] * config['window']

lit_surname.eval()

itos = lit_surname.data.itos
stoi = lit_surname.data.stoi

final_chars = "la"
if final_chars != "":
    for c in final_chars:
        ctx = ctx[1:] + [stoi[c]]

    ctx = ctx[1:] + [stoi['.']]

for c in "":
    name += c
    # print(lit_surname.data.stoi[c])
    ctx = ctx[1:] + [stoi[c]]

print("".join([itos[c] for c in ctx]))

# Run inference to finish off the name!
for _ in range(80):
    logits = lit_surname(torch.tensor(ctx).view(1, -1))
    probs = F.softmax(logits[:,-1,:], dim=1)

    # samping vs expected
    # ix = torch.max(probs, dim=1).indices.squeeze().item()
    ix = torch.multinomial(probs, num_samples=1).item()

    input = ctx
    output = F.softmax(logits, dim=2)
    output = torch.max(output, dim=2).indices.squeeze()

    print("".join([itos[c] for c in ctx]) + " --> " + "".join([itos[c.item()] for c in output]))

    ctx = ctx[1:] + [ix]
    name += lit_surname.data.itos[ix]
    if ix == 0:
        break
print(name)

.............................la.
.............................la. --> ...............................b
............................la.s --> ............................e.bt
...........................la.st --> ...........................e.bca
..........................la.stl --> ..........................e.bcaa
.........................la.stla --> .........................e.bcaan
........................la.stlac --> ........................e.bcaanh
.......................la.stlaci --> .......................e.bcranhn
......................la.stlacic --> ......................e.bcranhnh
.....................la.stlacicz --> .....................e.bcranhnh.
....................la.stlacicza --> ....................e.bcranhnhe.
...................la.stlaciczak --> ...................e.bcranhnhan.
..................la.stlaciczake --> ..................enb.ranhnhani.
stlaciczake.


### app for bulk name generation

In [86]:
def generate_name(first_chars = "", final_chars = ""):
    lit_surname.eval()

    name = ""
    ctx = [0] * config['window']

    # Put final chars in context
    if len(final_chars) > config["num_final_chars_in_dataset"]:
        final_chars = final_chars[-config["num_final_chars_in_dataset"]:]
        print("Only accepts up to 3 final chars. Using: " + final_chars)
            
    for c in final_chars:
        ctx = ctx[1:] + [lit_surname.data.stoi[c]]
    ctx = ctx[1:] + [lit_surname.data.stoi['.']]

    # put first chars both in name and context
    for c in first_chars:
        name += c
        ctx = ctx[1:] + [lit_surname.data.stoi[c]]

    # Run inference to finish off the name!
    for _ in range(80):
        logits = lit_surname(torch.tensor(ctx).view(1, -1))
        probs = F.softmax(logits[:,-1,:], dim=1)
        ix = torch.multinomial(probs, num_samples=1).item()

        ctx = ctx[1:] + [ix]
        name += lit_surname.data.itos[ix]

        if ix == 0:
            break

    lit_surname.train()

    return name


def generate_names(name_start, name_end, number_of_names):
    names = ""
    for _ in range((int)(number_of_names)):
    
        # Initialize name with user input
        names += generate_name(
            name_start, name_end) + "\n"

    return names

demo = gr.Interface(
    fn=generate_names,
    inputs=[
        gr.Textbox(placeholder="Start name with..."),
        gr.Textbox(placeholder="End name with..."),
        gr.Number(value=5),
    ],
    outputs="text",
)
demo.launch()

Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.




Only accepts up to 3 final chars. Using: aa
Only accepts up to 3 final chars. Using: aa
Only accepts up to 3 final chars. Using: aa
Only accepts up to 3 final chars. Using: aa
Only accepts up to 3 final chars. Using: aa
