In [None]:
!pip install tensorflow==2.10.0 #tensorflow 2.10.0 <-- 2.12.0 (버전 다운그레이드 필요)

# How many epochs needs to be interated to get the best result on DialoGPT-large model
# when finetuning with one million words of new dataset?
'''
[ChatGPT & BingChat] Based on the experiences shared in the GitHub repository of the Hugging-Face team,
fine-tuning the Microsoft DialoGPT-large model with one million words requires at least several hundred epochs,
typically around 2000 epochs using a batch size of 2, because the model has a large number of parameters (774 million),
or until the validation loss plateaus. For example:

train_dataset = ... # create or load your training dataset
batch_size = 2
model.compile(loss=model.compute_loss, optimizer="adam")
model.fit(train_dataset.batch(batch_size), 
          validation_data=train_dataset.batch(batch_size),
          batch_size=batch_size, 
          epochs=2000)
'''

In [9]:
# Microsoft/DialoGPT-large(file size = 3GB) supports multiple languages

# Print the current date and time in the format:
# "YYYY-MM-DD HH:MM:SS.microseconds"
import datetime
def print_current_datetime(text=""):
    datetime_string = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
    print("{} @ CDT({})".format(text,datetime_string))

from transformers import AutoTokenizer, TFAutoModelForCausalLM
import tensorflow as tf
from timeit import default_timer

model_path = "DialoGPT-large-finetuned-by-Microsoft-Keras"
learning_rate = 1e-5
batch_size = 2
epochs = 2000

print_current_datetime()

tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
model = TFAutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")

# Load the text data
with open('한글성경(마침표제거)_정제후말뭉치_백일만단어.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Tokenize the text data
tokenized_text = tokenizer(text, return_tensors='tf')

print_current_datetime("Finetune the DialoGPT-large-by-Microsoft-Keras")

# Define the training function
@tf.function
def train_step(input_ids):
    # Truncate input sequence
    max_seq_length = 1024 # "1024" for DialoGPT-large
    input_ids = input_ids[:, :max_seq_length]
    with tf.GradientTape() as tape:
        outputs = model(input_ids, training=True)
        logits = outputs.logits[:, :-1, :]
        labels = input_ids[:, 1:]
        loss_value = loss(labels, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    return loss_value

# Create a TensorSliceDataset from the tokenized text
dataset = tf.data.Dataset.from_tensor_slices(tokenized_text['input_ids'])
dataset = dataset.batch(batch_size)

# Define the loss function and optimizer
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# Fine-tune the model
for epoch in range(epochs):
    start = default_timer()
    print(f'Epoch {epoch+1}/{epochs}')
    for step, batch in enumerate(dataset):
        loss_value = train_step(batch)
        if step % 50 == 0:
            print(f'Step {step} Loss {loss_value}')
    end = default_timer()
    print("Time duration(in seconds):", end - start)

print_current_datetime("Saving Fine-tuned Microsoft DialoGPT Model")

tokenizer.save_pretrained(model_path)
model.save_pretrained(model_path)

 @ CDT(2023-04-26T18:14:25.248695)


All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at microsoft/DialoGPT-large.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.
Token indices sequence length is longer than the specified maximum sequence length for this model (8581266 > 1024). Running this sequence through the model will result in indexing errors


Finetune the DialoGPT-large-by-Microsoft-Keras @ CDT(2023-04-26T18:14:49.050494)
Epoch 1/2000


ResourceExhaustedError: Graph execution error:

Detected at node 'tfgpt2lm_head_model_8/transformer/h_._33/attn/dropout_972/dropout/random_uniform/RandomUniform' defined at (most recent call last):
    File "c:\ProgramData\Anaconda3\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\ProgramData\Anaconda3\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "c:\ProgramData\Anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "c:\ProgramData\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "c:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\ProgramData\Anaconda3\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 471, in dispatch_queue
      await self.process_one()
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 460, in process_one
      await dispatch(*args)
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 367, in dispatch_shell
      await result
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 662, in execute_request
      reply_content = await reply_content
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 360, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "c:\ProgramData\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "c:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "c:\ProgramData\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Windows\Temp\ipykernel_11040\3663138582.py", line 61, in <cell line: 57>
      loss_value = train_step(batch)
    File "C:\Windows\Temp\ipykernel_11040\3663138582.py", line 40, in train_step
      outputs = model(input_ids, training=True)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\transformers\models\gpt2\modeling_tf_gpt2.py", line 727, in call
      transformer_outputs = self.transformer(
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\transformers\models\gpt2\modeling_tf_gpt2.py", line 373, in call
      for i, (block, layer_past) in enumerate(zip(self.h, inputs["past"])):
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\transformers\models\gpt2\modeling_tf_gpt2.py", line 377, in call
      outputs = block(
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\transformers\models\gpt2\modeling_tf_gpt2.py", line 199, in call
      output_attn = self.attn(
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\transformers\models\gpt2\modeling_tf_gpt2.py", line 160, in call
      attn_outputs = self._attn(query, key, value, attention_mask, head_mask, output_attentions, training=training)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\transformers\models\gpt2\modeling_tf_gpt2.py", line 120, in _attn
      w = self.attn_dropout(w, training=training)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\layers\regularization\dropout.py", line 116, in call
      output = control_flow_util.smart_cond(
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\utils\control_flow_util.py", line 108, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\layers\regularization\dropout.py", line 112, in dropped_inputs
      return self._random_generator.dropout(
    File "C:\Users\A\AppData\Roaming\Python\Python39\site-packages\keras\backend.py", line 2162, in dropout
      return tf.nn.dropout(
Node: 'tfgpt2lm_head_model_8/transformer/h_._33/attn/dropout_972/dropout/random_uniform/RandomUniform'
OOM when allocating tensor with shape[1,20,1024,1024] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[{{node tfgpt2lm_head_model_8/transformer/h_._33/attn/dropout_972/dropout/random_uniform/RandomUniform}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_step_202259]

In [None]:
# Print the current date and time in the format:
# "YYYY-MM-DD HH:MM:SS.microseconds"
import datetime
def print_current_datetime(text=""):
    datetime_string = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f")
    print("{} @ CDT({})".format(text,datetime_string))

from transformers import AutoTokenizer, TFAutoModelForCausalLM
from timeit import default_timer

model_path = "DialoGPT-large-finetuned-by-Microsoft-Keras"

print_current_datetime("Loading Fine-tuned Microsoft DialoGPT Model")

tokenizer_loaded = AutoTokenizer.from_pretrained(model_path)
model_loaded = TFAutoModelForCausalLM.from_pretrained(model_path)

print_current_datetime()

def generate_ouput(prompt=""):

    start = default_timer()

    input_ids = tokenizer_loaded.encode(prompt, return_tensors="tf")
    output_ids = model_loaded.generate(input_ids=input_ids,
                                       max_length=1024+input_ids.shape[1],
                                       temperature=0.7,
                                       top_p=0.9,
                                       do_sample=True,
                                       num_return_sequences=5, # The model will generate five different responses to the prompt.
                                       pad_token_id=tokenizer_loaded.eos_token_id)
    generated_text = tokenizer_loaded.decode(output_ids[0], skip_special_tokens=True)

    end = default_timer()

    # num_return_sequences=5, which means the model will generate 5 different responses to the prompt.
    # The below code loops through the generated responses and print them out with a response number.
    for i, return_sequence in enumerate(output_ids):
        print(f'Response {i+1}: {tokenizer_loaded.decode(return_sequence, skip_special_tokens=True)}')

    print("Time duration(in seconds):", end - start)
    return generated_text

# Let's chat for 10 lines
for step in range(10):
    prompt = input(">> User:")
    if prompt.lower() == "bye": break

    generated_text = generate_ouput(prompt)
    split_generated_text = generated_text.split(prompt)
    if len(split_generated_text) > 1:
        generated_text = split_generated_text[1]
    # Trim the sentences after the last period(.)
    text_to_remove = generated_text.split('.')[-1]
    generated_text = generated_text.replace(text_to_remove,'')

    print(">> GPT: {}".format( generated_text ))
    print_current_datetime()