# Install requirements

In [None]:
!sudo apt-get update
!sudo apt install -y default-jre default-jdk

In [None]:
!pip install transformers py_vncorenlp nlpaug sentencepiece googletrans==4.0.0-rc1 polyglot pyicu pycld2 morfessor

In [None]:
import pandas as pd
import torch
from torch import optim
import re
import py_vncorenlp
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from torch.utils.data import TensorDataset, ConcatDataset, DataLoader
import tqdm
import numpy as np
from googletrans import Translator
from polyglot.text import Text
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import random
import seaborn as sns
from wordcloud import WordCloud
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
%mkdir vncorenlp
py_vncorenlp.download_model('./vncorenlp')

# Prepare

### Preprocess data

In [None]:
def replace_emojis_with_words(text):
  # Replace some special emojis with positive/negative words for text that only contains emojis
  if re.search('[a-zA-Z]', text):
    return text

  replace_emoj = ''
  replace_text = ''

  if '⭐⭐⭐⭐⭐' in text:
    replace_emoj = '⭐⭐⭐⭐⭐'
    replace_text = 'chất lượng ở đây tuyệt vời'
  if '😍' in text:
    replace_emoj = '😍'
    replace_text = 'rất thích đồ ăn ở đây'
  if '❤️' in text:
    replace_emoj = '❤️'
    replace_text = 'thích đồ ăn ở đây'
  if '👌🏻' in text:
    replace_emoj = '👌🏻'
    replace_text = 'đồ ăn rất ổn'
  if '🖤' in text:
    replace_emoj = '🖤'
    replace_text = 'thích đồ ăn ở đây'
  if '😋' in text:
    replace_emoj = '😋'
    replace_text = 'đồ ăn rất ngon'
  if '🤤' in text:
    replace_emoj = '🤤'
    replace_text = 'đồ ăn rất ngon'
  if '😊' in text:
    replace_emoj = '😊'
    replace_text = 'rất hài lòng'
  if '🥰' in text:
    replace_emoj = '🥰'
    replace_text = 'thích đồ ăn ở đây'
  if '💔' in text:
    replace_emoj = '💔'
    replace_text = 'không thích đồ ăn ở đây'
  if '🤬' in text:
    replace_emoj = '🤬'
    replace_text = 'rất bực mình'
  if '😞' in text:
    replace_emoj = '😞'
    replace_text = 'không hài lòng lắm'
  if '💕' in text:
    replace_emoj = '💕'
    replace_text = 'thích đồ ăn ở đây'

  if replace_emoj == '':
    return ''

  while replace_emoj * 2 in text:
    text = text.replace(replace_emoj * 2, replace_emoj)

  text = text.replace(replace_emoj, f'Mình thấy {replace_text}.')
  return text

In [None]:
def remove_marks(row, include_normal=False):
  # Xóa tất cả ký tự đặc biệt
  row = row.replace(";", " ").replace("“", " ") \
      .replace(":", " ").replace("”", " ") \
      .replace('"', " ").replace("'", " ") \
      .replace("-", " ").replace(")", " ") \
      .replace('(', " ").replace("*", " ") \
      .replace("^", " ").replace('@', " ") \
      .replace("—", " ").replace('#', " ") \
      .replace(">", " ").replace("<", " ") \
      .replace("~", " ").replace("[", " ") \
      .replace("]", " ")

  while '?' * 2 in row:
    row = row.replace('?' * 2, '?')

  if include_normal:
    row = re.sub(r"[\.,\?]+$-", "", row).replace("!", "")
  row = row.strip()
  return row

In [None]:
def remove_emojis(text):
  # Remove remaining emojis
  emoji_pattern = re.compile("["
          u"\U0001F600-\U0001F64F"  # emoticons
          u"\U0001F300-\U0001F5FF"  # symbols & pictographs
          u"\U0001F680-\U0001F6FF"  # transport & map symbols
          u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
          u"\U00002500-\U00002BEF"  # chinese char
          u"\U00002702-\U000027B0"
          u"\U00002702-\U000027B0"
          u"\U000024C2-\U0001F251"
          u"\U0001f926-\U0001f937"
          u"\U00010000-\U0010ffff"
          u"\u2640-\u2642" 
          u"\u2600-\u2B55"
          u"\u200d"
          u"\u23cf"
          u"\u23e9"
          u"\u231a"
          u"\ufe0f"  # dingbats
          u"\u3030"
          u"\u23f0"
                        "]+", re.UNICODE)
  return emoji_pattern.sub(r'. ', text)

In [None]:
single_correct_mapping = {
    "ship": "vận chuyển",
    "shop": "cửa hàng",
    "m": "mình",
    "mik": "mình",
    "ko": "không",
    "k": " không ",
    "kh": "không",
    "khong": "không",
    "kg": "không",
    "khg": "không",
    "tl": "trả lời",
    "r": "rồi",
    "fb": "mạng xã hội",
    "face": "mạng xã hội",
    "facebok": "mạng xã hội",
    "thanks": "cảm ơn",
    "thank": "cảm ơn",
    "tks": "cảm ơn",
    "tk": "cảm ơn",
    "ok": "tốt",
    "oke": "tốt",
    "dc": "được",
    "vs": "với",
    "đt": "điện thoại",
    "thjk": "thích",
    "qá": "quá",
    "qa": "qua",
    "trể": "trễ",
    "bgjo": "bao giờ",
    "god": "tốt",
    "bh": "bây giờ",
    "sale": "giảm giá",
    "ntn": "như thế này",
    "vote": "đánh giá",
    "ms": "mới",
    "hnay": "hôm nay",
    "kute": "dễ thương",
    "cute": "dễ thương",
    "bik": "biết",
    "od": "gọi món",
    "mn": "mọi người",
    "mng": "mọi người",
    "c": "chị",
    "đc": "được",
    "uk": "ừ",
    "t": "tôi",
    "tt": "thứ tự",
    "gj": "gì",
    "j": "gì",
    "đx": "được",
    "m": "mày",
    "zậy": "vậy",
    "wa": "qua",
    "zui": "vui",
    "thik": "thích",
    "ad": "thêm",
    "pko": "phải không",
    "cmt": "bình luận",
    "dt": "dễ thương",
    "ib": "inbox",
    "klq": "không liên quan",
    "nx": "nhận xét",
    "rep": "trả lời",
    "dj": "đi",
    "mog": "mong",
    "bít": "biết",
    "nc": "nước",
    "lun": "luôn",
    "hiu": "hiểu",
    "rui": "rồi",
    "thui": "thôi",
    "view": "phong cảnh",
    "đg": "đang",
    "h": "giờ",
    "g": "giờ",
    "zòn": "giòn",
    "cx": "cũng",
    "kbiet": "không biết",
    "đ": "không",
    "đéo": "không",
    "mk": "mình",
    "trc": "trước",
    "bùn": "buồn",
    "iu": "yêu",
    "vs": "với",
    "lua": "lừa",
    "b": "bạn",
    "mja": "mua",
    "lớm": "lắm",
    "ng": "người",
    "qly": "quản lý",
    "order": "gọi món",
    "review": "đánh giá",
    "view": "cảnh quan",
    "bminh": "bọn mình",
    "nv": "nhân viên",
    "pvu": "phục vụ",
    "matcha": "trà xanh",
    "detox": "thanh lọc",
    "free": "miễn phí",
    "sn": "sinh nhật",
    "team": "đội",
    "rv": "đánh giá",
    "nguwoif": "người",
    "siu": "siêu",
    "nhg": "nhưng",
    "cócar": "có cả",
    "xiền": "tiền",
    "cf": "cà phê",
    "hok": "không",
    "nhứt": "nhất",
    "nchung": "nói chung",
    "ncl": "nói chung là",
    "rắc": "rất",
    "shiper": "người giao hàng",
    "anw": "dù sao",
    "nqnkn": "ngon",
    "vd": "ví dụ",
    "+": " và ",
    "decor": "trang trí",
    "deco": "trang trí",
    "biêta": "biết",
    "tnao": "thế nào",
    "tn": "thế nào",
    "hnào": "hôm nào",
    "okie": "tốt",
    "size": "cỡ",
    "ps": "lưu ý",
    "p/s": "lưu ý",
    "ún": "uống",
    "lựong": "lượng",
    "luóng": "lượng",
    "chs": "chả hiểu sao",
    "oy": "ơi",
    "5stars": "rất tốt",
    "lém": "lắm",
    "cafe": "cà phê",
    "êi": "ơi",
    "trsi": "trí",
    "sdt": "số điện thoại",
    "tgian": "thời gian",
    "kgian": "không gian",
    "ctrinh": "chương trình",
    "fre": "miễn phí",
    "chug": "chung",
    "bthg": "bình thường",
    "cty": "công ty",
    "quant": "quản",
    "chụy": "chị",
    "ứ": "không",
    "í": "ý",
    "rai": "dai",
    "nhìu": "nhiều",
    "ngón": "ngon",
    "kb": "không biết",
    "mìn": "mình",
    "nồn": "nồng"
}

multiple_correct_mapping = {
    "sì dầu": "xì dầu",
    "ú uk": "ú ụ",
    "rì viu": "đánh giá",
    "mỏng teng": "mỏng tang",
    "gợi í": "gợi ý",
    "nhà vs": "nhà vệ sinh",
    "hế lô": "chào",
    "bạc sỉu": "bạc xỉu",
    "chym ưng": "ưng",
}

def correct_word(w):
  add_ins = ['', ':', '.', ',', '!', '?', ';']
  if w.lower() in single_correct_mapping:
    return single_correct_mapping[w.lower()]
  elif w[-1] in add_ins and w.lower()[:-1] in single_correct_mapping:
    return f'{single_correct_mapping[w.lower()[:-1]]}{w[-1]}'
  else:
    return w

def correct_words(text):
  words = text
  for w in multiple_correct_mapping:
    w_insen = re.compile(w, re.IGNORECASE)
    words = w_insen.sub(multiple_correct_mapping[w], words)

  words = words.split()
  words = map(correct_word, words)
  words = ' '.join(words)
  return words

In [None]:
def remove_special_icons(text):
  icons = [':3', ':D', '+)', ':P', ':O', ':))', ':)', ':(', ':((' , '^^', '^.^', 'ToT']
  for i in icons:
    if i in text:
      text = text.replace(i, ". ")
  return text

In [None]:
def remove_expression_words(text):
  exp_words = ['hehe', 'keke', 'haha', 'hihi', 'ahihi', 'huhu', 'hiuhiu', 'kaka', 'hoho', 'ahjhj', 'hjhj']
  for w in exp_words:
    if w in text:
      text = text.replace(w, "")
  return text

In [None]:
def shorten_expression(text):
  chars = 'aáàạảbcdeéèẹẻêếềệểfghiíìịỉjklmnóòọỏôốồộổơớờợởpqrstuúùụủưứừựửvwxyz.'
  for char in chars:
    while char * 2 in text:
      text = text.replace(char * 2, char)

  # special case in vietnamese
  while 'o' * 3 in text:
    text = text.replace('o' * 3, 'o' * 2)
  if 'ngoon' in text:
    text = text.replace('ngoon', 'ngon')
  if 'áa' in text:
    text = text.replace('áa', 'á')
  return text

In [None]:
def remove_one_chars(text):
  not_to_rm = ['á', 'à', 'ạ', 'ô', 'ừ', 'ý', 'ở', '.']
  words = text.split()
  return ' '.join(w for w in words if len(w) > 1 or w.lower() in not_to_rm)

In [None]:
def rating_to_words(text):
  ratings = re.findall('\d{1,2}/\d{1,2}', text)
  for rating in ratings:
    numbers = rating.split('/')
    n = int(numbers[0])
    d = int(numbers[1])
    r = float(n)/float(d)

    if r >= 0.8:
      text = text.replace(f'{n}/{d}', 'tuyệt vời')
    elif r >= 0.6:
      text = text.replace(f'{n}/{d}', 'khá tốt')
    elif r >= 0.4:
      text = text.replace(f'{n}/{d}', 'tạm ổn')
    elif r >= 0.2:
      text = text.replace(f'{n}/{d}', 'tệ')
    elif r >= 0:
      text = text.replace(f'{n}/{d}', 'cực kì tệ')

  return text

In [None]:
def translate_to_vnmese(text, translator):
    detected_text = Text(text)
    if detected_text.language.code == 'vi':
        return text
    
    translation = translator.translate(text, dest='vi')
    return translation.text

In [None]:
def normalize(df):
    comments = df['Comment'].values
    image_urls = df['image_urls'].values
    translator = Translator()
    normalizeds = []

    for i in range(len(comments)):
        comment = str(comments[i])
        normalized = '. '.join(comment.split('\n'))
        normalized = translate_to_vnmese(normalized, translator)
        normalized = rating_to_words(normalized)
        normalized = remove_special_icons(normalized)
        normalized = remove_marks(normalized)
        normalized = shorten_expression(normalized)
        normalized = correct_words(normalized)
        normalized = remove_expression_words(normalized)
        normalized = replace_emojis_with_words(normalized)
        normalized = remove_emojis(normalized)
        normalized = remove_one_chars(normalized)
        normalized = ' '.join(normalized.split())
        normalized = normalized.strip()

        if normalized == "" or not re.search('[a-zA-Z]', normalized):
            if len(image_urls[i]) >= 2:
                normalized = "Đồ ăn ngon, mình rất thích."
            else:
                normalized = "Đồ ăn không ngon, mình không thích."

        normalizeds.append(normalized)

    return normalizeds

In [None]:
def segment_words(normalized_text, segmenter):
  segmenteds = []
  not_to_rm = ['á', 'à', 'ạ', 'ô', 'ừ', 'ý', 'ở']

  for line in normalized_text:
    line_segment = segmenter.word_segment(line)
    line_segment = [segment for segment in line_segment if len(segment) > 1 or segment.lower() in not_to_rm]
    line_segment = [sentence for segment in line_segment for sentence in segment.replace(', .', ',').split(' . ') if re.search('[a-zA-Z]', sentence)]
    
    normalized_line_segment = []
    address_to_keep = ['tốt', 'ngon', 'tệ', 'hài_lòng', 'đồ_ăn', 'giá_cả', 'cảm_giác', 'chất_lượng']
    for segment in line_segment:

      normalized_segment = segment
      if normalized_segment[0] in '?!.,':
        normalized_segment = normalized_segment[2:]

      if normalized_segment.lower().startswith('địa_chỉ') and not any(w in normalized_segment.lower() for w in address_to_keep):
        continue
      
      if normalized_segment[-2] != ' ':
        normalized_segment += ' .'

      normalized_segment = normalized_segment.replace(normalized_segment[0], normalized_segment[0].upper())

      normalized_line_segment.append(normalized_segment)
    
    segmenteds.append(normalized_line_segment)

  return segmenteds

In [None]:
def preprocess(df, segmenter):
    normalized_comments = normalize(df)
    segmenteds = segment_words(normalized_comments, segmenter)
    
    return segmenteds

### Prepare data

In [None]:
def get_labels(df):
  ratings = df['Rating'].values
  labels = []

  for rating in ratings:
    labels.append(int(rating))

  return labels

In [None]:
def to_ids(segmenteds, tokenizer):
    ids = []
    for comment in segmenteds:
        encoded = [token for sentence in comment for token in tokenizer.encode(sentence)]
        ids.append(encoded)

    return ids

In [None]:
def prepare_dataset(segmenteds, tokenizer, labels=None):
    ids = to_ids(segmenteds, tokenizer)

    padded_ids = pad_sequences(ids, maxlen=250, dtype="long", value=0, truncating="post", padding="post")

    masks = []
    for ids in padded_ids:
        mask = [int(token_id > 0) for token_id in ids]
        masks.append(mask)

    inputs = torch.tensor(padded_ids)
    masks = torch.tensor(masks)

    if labels:
        labels = torch.tensor(labels)

        return TensorDataset(inputs, masks, labels)

    return TensorDataset(inputs, masks)

### Training functions

In [None]:
def flat_outputs(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return pred_flat, labels_flat

In [None]:
def train(train_dataloader, val_dataloader, model, optimizer, device, epochs):
    best_training_loss = -1
    last_eval_loss = -1
    early_stop = -1

    train_losses = []
    val_losses = []
    epoch_i = 0
    
    while epoch_i < epochs:
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')

        train_loss = 0
        model.train()
        train_accuracy = 0
        nb_train_steps = 0
        train_f1 = 0

        for step, batch in tqdm.notebook.tqdm(enumerate(train_dataloader)):
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            model.zero_grad()
            outputs = model(b_input_ids, 
              token_type_ids=None, 
              attention_mask=b_input_mask,
              labels=b_labels
            )
            
            loss = outputs[0]
            step_loss = loss.item()
            train_loss += step_loss
            
            logits = outputs[1].detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            
            preds_flat, labels_flat = flat_outputs(logits, label_ids)
            tmp_train_accuracy = accuracy_score(preds_flat, labels_flat)
            tmp_train_f1 = f1_score(preds_flat, labels_flat)
            train_accuracy += tmp_train_accuracy
            train_f1 += tmp_train_f1
            nb_train_steps += 1

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

        avg_train_loss = train_loss / len(train_dataloader)
        train_losses.append(avg_train_loss)
        
        print(" Accuracy: {0:.4f}".format(train_accuracy/nb_train_steps))
        print(" F1 score: {0:.4f}".format(train_f1/nb_train_steps))
        print(" Average training loss: {0:.4f}".format(avg_train_loss))
        
        print("Running Validation...")
        model.eval()
        eval_loss = 0
        eval_accuracy = 0
        nb_eval_steps = 0
        eval_f1 = 0
        for batch in tqdm.notebook.tqdm(val_dataloader):

            batch = tuple(t.to(device) for t in batch)

            b_input_ids, b_input_mask, b_labels = batch

            with torch.no_grad():
                outputs = model(
                    b_input_ids,
                    token_type_ids=None,
                    attention_mask=b_input_mask,
                    labels=b_labels
                )
                
                loss = outputs[0]
                step_loss = loss.item()
                eval_loss += step_loss
                
                logits = outputs[1]
                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()

                preds_flat, labels_flat = flat_outputs(logits, label_ids)
                tmp_eval_accuracy = accuracy_score(preds_flat, labels_flat)
                tmp_eval_f1 = f1_score(preds_flat, labels_flat)
                eval_accuracy += tmp_eval_accuracy
                eval_f1 += tmp_eval_f1
                nb_eval_steps += 1
                
        avg_eval_loss = eval_loss / len(val_dataloader)
        val_losses.append(avg_eval_loss)

        print(" Accuracy: {0:.4f}".format(eval_accuracy/nb_eval_steps))
        print(" F1 score: {0:.4f}".format(eval_f1/nb_eval_steps))
        print(" Average validation loss: {0:.4f}".format(avg_eval_loss))
        
        # save model state with lowest loss
        if early_stop == -1 and (best_training_loss == -1 or avg_train_loss < best_training_loss):
            best_training_loss = avg_train_loss
            torch.save(phobert.state_dict(), '/kaggle/working/model.pth')
            
        if early_stop == -1 and last_eval_loss != -1 and avg_eval_loss - last_eval_loss > 0.012:
            early_stop = epoch_i + 1
            epochs = min([epochs, epoch_i + 10])
        
        last_eval_loss = avg_eval_loss
        epoch_i += 1
    
    print("Training complete!")
    return train_losses, val_losses, early_stop, epochs

# Visualize functions

### Data

In [None]:
def data_distribution(df):
    sns.countplot(x='Rating', data=df)

In [None]:
def plot_wordcloud(text, mask=None, max_words=200, max_font_size=100, figure_size=(24.0,16.0), 
                   title = None, title_size=40):

    wordcloud = WordCloud(background_color='white',
                    max_words = max_words,
                    max_font_size = max_font_size, 
                    random_state = 42,
                    width=800, 
                    height=400,
                    mask = mask)
    wordcloud.generate(str(text))
    
    plt.figure(figsize=figure_size)
    plt.imshow(wordcloud);
    plt.title(title, fontdict={'size': title_size, 'color': 'black', 'verticalalignment': 'bottom'})
    plt.axis('off');
    plt.tight_layout()

### Training results

In [None]:
def plot_loss(epochs, train_loss, val_loss, stop_early=-1):
    epochs = range(1, epochs + 1)
    
    plt.plot(epochs, train_loss, label='Training Loss')
    plt.plot(epochs, val_loss, label='Validation Loss')

    if stop_early != -1:
        plt.axvline(x=stop_early, color='r', linestyle='dashed')

    # Add in a title and axes labels
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')

    # Display the plot
    plt.legend(loc='best')
    plt.show()

# Main

In [None]:
df = pd.read_csv('/kaggle/input/foodyrv/full_train.csv')

### Visualize data

In [None]:
data_distribution(df)

In [None]:
plot_wordcloud(df['Comment'].values, title="Word Cloud của các đánh giá trên Foody")

### Train

In [None]:
rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=['wseg'], save_dir='/kaggle/working/vncorenlp', max_heap_size='-Xmx500m')

In [None]:
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")

segmenteds = preprocess(df, rdrsegmenter)
labels = get_labels(df)

train_segmenteds, val_segmenteds, train_labels, val_labels = train_test_split(segmenteds, labels, test_size=0.1)

train_data = prepare_dataset(train_segmenteds, tokenizer, train_labels)
train_dataloader = DataLoader(train_data, batch_size=32)

val_data = prepare_dataset(val_segmenteds, tokenizer, val_labels)
val_dataloader = DataLoader(val_data, batch_size=32)

device = 'cuda'

phobert = AutoModelForSequenceClassification.from_pretrained(
    pretrained_model_name_or_path="vinai/phobert-base",
    # attention_probs_dropout_prob=0.2,
    # hidden_dropout_prob=0.3
)
phobert.cuda()

param_optimizer = list(phobert.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

optimizer = optim.AdamW(optimizer_grouped_parameters, lr=1e-5)

In [None]:
epochs = 25
train_loss, val_loss, early_stop, real_epochs = train(train_dataloader, val_dataloader, phobert, optimizer, device, epochs)

In [None]:
plot_loss(real_epochs, train_loss, val_loss, early_stop)

# Generate results

In [None]:
test_df = pd.read_csv('/kaggle/input/foodyrv/test.csv')
test_segmenteds = preprocess(test_df, rdrsegmenter)

test_data = prepare_dataset(test_segmenteds, tokenizer)
test_dataloader = DataLoader(test_data, batch_size=32)

In [None]:
phobert = AutoModelForSequenceClassification.from_pretrained(
    pretrained_model_name_or_path="vinai/phobert-base",
    # attention_probs_dropout_prob=0.2,
    # hidden_dropout_prob=0.3
)
phobert.cuda()
phobert.load_state_dict(torch.load('/kaggle/working/model.pth'))

In [None]:
device = 'cuda'
results = []
phobert.eval()

for batch in tqdm.notebook.tqdm(test_dataloader):

    batch = tuple(t.to(device) for t in batch)

    b_input_ids, b_input_mask = batch

    with torch.no_grad():
        outputs = phobert(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        logits = outputs[0]
        logits = logits.detach().cpu().numpy()
        preds = np.argmax(logits, axis=1).flatten()
        results.append(preds)

results = [pred for preds in results for pred in preds.tolist()]

In [None]:
result_df = test_df[['RevId']].copy()
result_df['Rating'] = results
result_df.to_csv('/kaggle/working/results.csv', index=False)