In [None]:
!pip install datasets transformers[torch]



In [None]:
from datasets import load_dataset
dataset = load_dataset('csv', data_files='yugioh_card_info.csv')

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['card info'],
        num_rows: 12487
    })
})

In [None]:
datasets = dataset['train'].train_test_split(test_size=0.1)

In [None]:
from transformers import AutoTokenizer
model_checkpoint = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [None]:
def tokenize_function(examples):
    return tokenizer(examples['card info'])

In [None]:
tokenized_datasets = datasets.map(tokenize_function, num_proc=8, remove_columns=["card info"])

Map (num_proc=8):   0%|          | 0/11238 [00:00<?, ? examples/s]

Map (num_proc=8):   0%|          | 0/1249 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 11238
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 1249
    })
})

In [None]:
block_size = 128
def group_texts(examples):
    # Concatenate all texts.
    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
    total_length = len(concatenated_examples[list(examples.keys())[0]])
    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
        # customize this part to your needs.
    total_length = (total_length // block_size) * block_size
    # Split by chunks of max_len.
    result = {
        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
        for k, t in concatenated_examples.items()
    }
    result["labels"] = result["input_ids"].copy()
    return result

In [None]:
lm_datasets = tokenized_datasets.map(
    group_texts,
    batched=True,
    batch_size=1000,
    num_proc=4,
)

Map (num_proc=4):   0%|          | 0/11238 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/1249 [00:00<?, ? examples/s]

In [None]:
tokenizer.decode(lm_datasets["train"][1]["input_ids"])

' When a "Graydle" monster you control declares a direct attack: You can target 1 monster in your opponent\'s Graveyard; Special Summon that target to their side of the field, if they control no monsters. You can only use each effect of "Graydle Parasite" once per turn -- Continuous Trap.../// Pharaoh\'s Servant -- Normal Monster -- An apparition of those said to formerly serve the Pharaoh. It has tremendous loyalty that does not waiver -- 900 ATK -- 0 DEF -- LEVEL 2 -- Normal -- DARK.../// Gladiator Lash -- Trap Card -- Activate only when you Special Summon a "Gladiator Beast" monster.'

In [None]:
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

Downloading model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
from transformers import Trainer, TrainingArguments

In [None]:
model_name = model_checkpoint.split("/")[-1]
training_args = TrainingArguments(
    f"{model_name}-yugioh",
    learning_rate=2e-5,
    weight_decay=0.01,
    num_train_epochs=10,
    #push_to_hub=True,
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=lm_datasets["train"],
    eval_dataset=lm_datasets["test"],
)

In [None]:
trainer.train()

Step,Training Loss
500,2.3676
1000,1.9128
1500,1.7614
2000,1.6846
2500,1.6134
3000,1.5697
3500,1.5296
4000,1.4931
4500,1.4758
5000,1.4491


TrainOutput(global_step=9710, training_loss=1.5414258868515063, metrics={'train_runtime': 2208.1433, 'train_samples_per_second': 35.161, 'train_steps_per_second': 4.397, 'total_flos': 2535884964495360.0, 'train_loss': 1.5414258868515063, 'epoch': 10.0})

In [None]:
from transformers import pipeline
generator = pipeline('text-generation', model = model, tokenizer = tokenizer, device="cuda")
generator("/// Blue eyes mad cow", max_length = 300, num_return_sequences=3)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': '/// Blue eyes mad cow -- 1500 ATK -- 1900 DEF -- LEVEL 8 -- Normal -- EARTH.../// The Grand Illusion -- Trap Card -- Discard 1 "Flower of the Dark" card. While this card is Normal, it gains 100 ATK for each "Flower of the Dark" Monster Card on the field -- Counter Trap.../// Super Horn of the Ice Barrier -- Effect Monster -- If your opponent controls a Monster Card(s): You can pay 1000 Life Points to destroy 2 monsters in your field and/or GY, and if you do, Special Summon 1 "Ice Barrier" monster from your hand or Graveyard to your zone. If this card destroys a monster by battle, you can select 1 "Super Horn of the Ice Barrier" from your Deck, and destroy it -- 1000 ATK -- 1500 DEF -- LEVEL 3 -- Continuous -- EARTH.../// Dark Dragon of the Ice Barrier -- Pendulum Effect Monster -- [ Pendulum Effect ]If a WATER monster(s) you control is destroyed by battle or card effect and a monster with 1500 or less ATK than you control and no other card in your Graveyard: This c

In [None]:
generator = pipeline('text-generation', model = model, tokenizer = tokenizer, device="cuda")
output = generator('/// Infernoble Knight Bradamante -- ', max_length = 300, num_return_sequences=3)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [None]:
for output_sequence in output:
    output_string = output_sequence['generated_text']
    print(output_string)
    print(output_string.split('...')[0])

/// Infernoble Knight Bradamante -- .../// Wind-Up Dragon -- XYZ Monster -- 2 Level 6 monstersIf this card is Xyz Summoned: You can Normal Summon 2 Level 6 monsters with different names: 1 Level 6 monster can attack your opponent directly this turn. This card is unaffected by Trap Cards' effects, except "Wind-Up Dragon" -- 4000 ATK -- 4000 DEF -- LEVEL 6 -- Ritual -- .../// Bamboo Samurai -- Effect Monster -- When this card is Summoned: You can send 1 Plant monster from your opponent's Deck to the GY, and if the sent monster is at least 1 Plant, both players can Set this card to their field, then draw the same number of cards this card draws. Once per turn, during your End Phase: You can target 1 "Cancer King" you control; for the rest of this turn, all Plant-Type monsters of the opponent's Deck with the same name and Attribute cannot attack with that card, except the Special Summoned monster, until you activate the effect of "Bamboo Ninja" -- 600 ATK -- 2100 DEF -- LEVEL 4 -- Continuo

In [None]:
generator = pipeline('text-generation', model = model, tokenizer = tokenizer, device="cuda")
output = generator("Extra Summon -- Spell Card", max_length = 300, num_return_sequences=3)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [None]:
for output_sequence in output:
    output_string = output_sequence['generated_text']
    print(output_string)
    print(output_string.split('...')[0])

Extra Summon -- Spell Card -- During the following Main Phase 1: You can target 1 "Pilphy" monster you control; it gains ATK equal to the number of non-Pilphy cards you control; increase its Level by the Level of the other card in your Graveyard. You can only use this effect of "Pilphy Token" once per turn -- 2000 ATK -- 2800 DEF -- LEVEL 6 -- Field -- FIRE.../// Light Dragongous -- Flip Effect Monster -- [ Pendulum Effect ]Once per turn, if a face-up "Dragong Token" monster(s), during the Battle Phase, you can activate 1 of these effects, up to the number of "Dragongous" monstersYou cannot Synchro Summon/Set, except "Light Dragongous".----------------------------------------[ Monster Effect ]Once per turn: You can destroy 1 "Dragongous" monster you control, and if it does, inflict damage to your opponent equal to the combined original ATK of the destroyed monster's current Rank. If a "Dragongous" monster you control is destroyed by battle, during the Battle Phase: Return 1 card your o

In [None]:
#from huggingface_hub import notebook_login

# Log in to the hub
#notebook_login()
# Push the code to my account
#trainer.push_to_hub()