In [None]:
# Il faut redemarer l'environement apres l'installation des dependences
!pip install transformers
!pip install transformers[torch]

Collecting accelerate>=0.20.3 (from transformers[torch])
  Downloading accelerate-0.24.1-py3-none-any.whl (261 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.24.1


In [None]:
import json
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

In [None]:

train_data_path = "/content/drive/MyDrive/dataset.json"


with open(train_data_path, "r") as f:
    data = json.load(f)


model_name = "gpt2"  
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)


tokenized_datasets = TextDataset(
    tokenizer=tokenizer,
    file_path=train_data_path,
    block_size=512,  # A faire avec la memoire / plus de comprehension de cotnext quand on agrandis
    overwrite_cache=True,
)


data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)


training_args = TrainingArguments(
    output_dir="./gpt2-fine-tuned2",
    overwrite_output_dir=True,
    num_train_epochs=5,

    per_device_train_batch_size=2,
    save_steps=10_000,
    save_total_limit=2,
)


trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_datasets,
)


trainer.train()


model.save_pretrained("./gpt2-fine-tuned2")
tokenizer.save_pretrained("./gpt2-fine-tuned2")


In [2]:
import json
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [3]:


model_path = "/content/drive/MyDrive/malin_gpt"  
model = GPT2LMHeadModel.from_pretrained(model_path)
tokenizer = GPT2Tokenizer.from_pretrained(model_path)


In [4]:


# Define a function to generate replies
def generate_reply(keywords, comment, max_length=100):
    input_text = f'{{"keywords": {json.dumps(keywords)},"comment": "{comment}"}}'
    input_text_alternative = f'{{"keywords": {json.dumps(keywords)},"{comment}","comment": ""}}'
    input_ids = tokenizer.encode(input_text_alternative, return_tensors='pt')


    output = model.generate(input_ids, max_length=max_length, num_beams=5, no_repeat_ngram_size=2, top_k=50)
    #do_sample is by default set to false if yous et it to true you can change temperature and top_p

    reply = tokenizer.decode(output[0], skip_special_tokens=True)
    return reply



In [5]:
import re

def extract_reply(input_string):
    
    match = re.search(r'"reply":\s*"([^"]+)"', input_string)

    if match:
        return match.group(1)
    match = re.search(r'"reply":\s*"(.*?)"(?=[^"]*$)', input_string)
    if match:
        return match.group(1)
    else:
        return input_string


In [6]:

keywords = ["restaurant","good experience"]
comment = "If you want healthy authentic or ethic food, try this place."

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


We're glad you enjoyed our food! Your positive feedback is valued, and we look forward to serving you again soon.


In [None]:

keywords = ["restaurant","positive experience","good service"]
comment = "Service was fine and the waitress was friendly."

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


We're glad you enjoyed our service at our restaurant! Your positive feedback is appreciated, and we look forward to serving you again in the future. Thank you for bringing this to our attention.


In [None]:

keywords = ["restaurant","negative experience"]
comment = "This place is way too overpriced for mediocre food."

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


We're sorry to hear about your disappointment. Your feedback is important, and we'll strive to improve. We hope to have the chance to make it up to you in the future. If there's anything specific you'd like us to address, please let us know so we can address it in a timely manner.


In [None]:

keywords = ["restaurant","tasty steak"]
comment = "My ribeye steak was cooked perfectly and had great mesquite flavor."

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


We're thrilled you enjoyed our steak at our restaurant! Your positive feedback is valued, and we look forward to serving you again in the future. Thank you for bringing this experience to our attention.


In [None]:

keywords = ["restaurant","Good pizza"]
comment = "The best pizza I have ever had !!"

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


We're glad you enjoyed our pizza! Your positive feedback is appreciated, and we look forward to serving you again.


In [None]:

keywords = ["restaurant","tasty chiken","New menu"]
comment = "Very tasty chicken will be coming back for sure !"

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


We're delighted to hear that you enjoyed our tasty chicken! Your positive feedback is valued, and we look forward to serving you again soon.


In [None]:

keywords = ["restaurant","good waitress"]
comment = "Waitress was sweet and funny."

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


We're delighted to hear that you enjoyed your visit! Your positive feedback is valued, and we look forward to serving you again soon.


In [None]:

keywords = ["restaurant","Bad service"]
comment = "We've have gotten a much better service from the pizza place next door than the services we received from this restaurant."

generated_reply = generate_reply(keywords, comment)
print(extract_reply(generated_reply))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


{"keywords": ["restaurant", "Bad service"],"We've have gotten a much better service from the pizza place next door than the services we received from this restaurant.","comment": ""}",
   "reply": "We apologize for the poor service. Your feedback is important, and we'll address this issue to ensure a better experience in the future. We hope to have the chance to make it up to you by the end of the year. Thank you for bringing this to


In [9]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'stor

In [14]:
model.push_to_hub("malin")
tokenizer.push_to_hub("malin")

model.safetensors:   0%|          | 0.00/498M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/foufou26/malin/commit/2c9371f15375fa5aff7c3e00b25afceaafea5385', commit_message='Upload tokenizer', commit_description='', oid='2c9371f15375fa5aff7c3e00b25afceaafea5385', pr_url=None, pr_revision=None, pr_num=None)