### model

In [None]:
import torch
from transformers import AutoModelForSequenceClassification,Trainer,TrainingArguments
from transformers import AutoTokenizer,DataCollatorWithPadding
from datasets import Dataset
from datasets import load_metric

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
from scipy.stats import mode

#### 下載 model

兩種載入模型的方法
1. 利用 Config
  Config 可以看到模型相關超參數等細節，搭配 model 使用會初始化這個模型
2. `.from_pretrained` 故名思義載入育訓練好的 model

In [None]:
from transformers import DistilBertModel, DistilBertConfig

# Initializing a DistilBERT configuration
configuration = DistilBertConfig()

# Initializing a model from the configuration
model = DistilBertModel(configuration)

# Accessing the model configuration
configuration = model.config

print(configuration)
del model

DistilBertConfig {
  "activation": "gelu",
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "transformers_version": "4.17.0",
  "vocab_size": 30522
}



In [None]:
checkpoint = "distilbert-base-uncased" # huggingface checkpoint
checkpoint = "./model/checkpoint-1800" # local checkpoint
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

#### 資料處理

In [None]:
df_train = pd.read_excel('./data/df_train.xlsx')
df_test = pd.read_excel('./data/df_text.xlsx')

1. 斷詞並轉為 id
各模型會有其使用的斷詞方法，而輸入模型時會將輸入文本進行斷詞後，轉成整數 id 再送入模型，而 Tokenizer 就是在幫我們完成這件事。tokenizer 將輸入文字，根據預定義的字典，將文字轉成整數id 序列

斷詞方法
- GPT-2 BPE
- Bert: WordPiece
- sentencepiece, unigram, ...etc



In [None]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

`.tokenize()` 進行斷詞

`.convert_tokens_to_ids()` 將斷詞結果轉為 id

`.decode()` 可將 id *序列還原為文字*

In [None]:
text = df_train['text_cleaned'].iloc[0]
tokens = tokenizer.tokenize(text)
print(f"斷詞結果：{tokens}")

ids = tokenizer.convert_tokens_to_ids(tokens)
print(f"分詞 id 序列: {ids}")

decoded_string = tokenizer.decode(ids)
print(f'decode 還原：{decoded_string}')

斷詞結果：['our', 'deeds', 'are', 'the', 'reason', 'of', 'this', '#', 'earthquake', 'may', 'allah', 'forgive', 'us', 'all']
分詞 id 序列: [2256, 15616, 2024, 1996, 3114, 1997, 2023, 1001, 8372, 2089, 16455, 9641, 2149, 2035]
decode 還原：our deeds are the reason of this # earthquake may allah forgive us all


可以直接調用 tokenizer 進行分詞，return 分詞結果相關資訊，並且加上模型預訓練時本就會加上的 [CLS]、[SEP]

In [None]:
result = tokenizer(text,
                   return_tensors="pt")
print(f'result: {result}')

print(tokenizer.decode(result['input_ids'][0]))

result: {'input_ids': tensor([[  101,  2256, 15616,  2024,  1996,  3114,  1997,  2023,  1001,  8372,
          2089, 16455,  9641,  2149,  2035,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
[CLS] our deeds are the reason of this # earthquake may allah forgive us all [SEP]


In [None]:
# truncat 到該模型的 max_input_dim，應該是 Config 裡的 dim=768
def do_tokenizer(data: Dataset):
  return tokenizer(data["text_cleaned"],
                   truncation=True,
                  #  padding=True,
                  #  reutrn_tensors="pt"
                  #  model_max_length=768
                   )

2. 將資料轉為 Datsets，使用 map 轉換資料

In [None]:
df_trian, df_dev = train_test_split(df_train, test_size=0.2, random_state=42)
ds_train = Dataset.from_pandas(df_train)
ds_dev = Dataset.from_pandas(df_dev)
ds_test = Dataset.from_pandas(df_test)

In [None]:
ds_train.features

{'id': Value(dtype='int64', id=None),
 'keyword': Value(dtype='string', id=None),
 'location': Value(dtype='string', id=None),
 'text': Value(dtype='string', id=None),
 'target': Value(dtype='int64', id=None),
 'text_cleaned': Value(dtype='string', id=None),
 'target_relabeled': Value(dtype='int64', id=None)}

In [None]:
# batched 用來加速

ds_train = ds_train.map(do_tokenizer, batched=True)
ds_dev = ds_dev.map(do_tokenizer, batched=True)
ds_test = ds_test.map(do_tokenizer, batched=True)

In [None]:
[len(input_ids) for input_ids in ds_train[:10]['input_ids']]

[16, 12, 27, 14, 22, 29, 20, 21, 15, 16]

可以發現每個文本的輸入長度不同，但模型會需要輸入資料長度一至，所以需要做 `padding`，有兩種做法。

1. 所有資料皆 padding
在 tokenizer 時，可以設定 `padding=True` 將所有資料根據出現最大長度進行
  
2. 每一 batch 各字依照 batch 最大長度字串進行 padding
  也就是動態 padding，利用 data collator。

In [None]:
from transformers import DataCollatorWithPadding

# 動態 padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

`Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.1`


看起來是 tokenizer 沒有吃到模型的 Config

3. 其他處理
- 刪除不需要的欄位
- 除新命名欄位
- 將 label 0 1 轉為 yes or no

In [None]:
ds_train.features

{'id': Value(dtype='int64', id=None),
 'keyword': Value(dtype='string', id=None),
 'location': Value(dtype='string', id=None),
 'text': Value(dtype='string', id=None),
 'target': Value(dtype='int64', id=None),
 'text_cleaned': Value(dtype='string', id=None),
 'target_relabeled': Value(dtype='int64', id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

In [None]:
col2remove = ['id', 'keyword', 'location', 'text']

ds_train = ds_train.remove_columns(col2remove)
ds_dev = ds_dev.remove_columns(col2remove)
ds_test = ds_test.remove_columns(col2remove)

In [None]:
ds_train = ds_train.rename_column("target_relabeled", "label")
ds_dev = ds_dev.rename_column("target_relabeled", "label")

In [None]:
id2label = {0: "NOT",
            1: "YES"}
label2id = {v: k for k, v in id2label.items()}

#### Traing

- 載入模型
- 設定超參數
- 定義指標

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
    num_labels=2,
    id2label=id2label,
    label2id=label2id
)

loading configuration file ./model/checkpoint-1800/config.json
Model config DistilBertConfig {
  "_name_or_path": "./model/checkpoint-1800",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "NOT",
    "1": "YES"
  },
  "initializer_range": 0.02,
  "label2id": {
    "NOT": 0,
    "YES": 1
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "problem_type": "single_label_classification",
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "torch_dtype": "float32",
  "transformers_version": "4.17.0",
  "vocab_size": 30522
}

loading weights file ./model/checkpoint-1800/pytorch_model.bin
All model checkpoint weights were used when initializing DistilBertForSequenceClassification.

All the weights of DistilBertForS

print model 可以看到模型架構，可知
- 此模型的輸入應該使 512(? 透過 Embedding block 轉乘 768
- 6層 transformer encode 層
- 最後街兩層 FFN
- 分類任務，所以最後一層換成分類頭

In [None]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [None]:
training_args = TrainingArguments(
    output_dir = './model/',
    learning_rate=2e-5,
    seed=11,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=4,
    weight_decay=0.01,
    eval_steps=600,
    save_steps=600,
    evaluation_strategy="steps",
    save_strategy="steps",
    load_best_model_at_end=True,
    # push_to_hub=True,
    # hub_model_id='BOCHENG/tweet'
)

In [None]:
def compute_metrics(eval_pred):
    load_acc = load_metric('accuracy')
    load_f1 = load_metric('f1')
    logits,labels = eval_pred
    predictions = np.argmax(logits,axis = -1)
    acc = load_acc.compute(predictions = predictions,references = labels)['accuracy']
    f1 = load_f1.compute(predictions = predictions, references = labels)['f1']
    return {'acc':acc,'f1':f1}

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_dev,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
for name, param in model.named_parameters():
    print(f"{name} - {'Training' if param.requires_grad else 'Frozen'}")

distilbert.embeddings.word_embeddings.weight - Training
distilbert.embeddings.position_embeddings.weight - Training
distilbert.embeddings.LayerNorm.weight - Training
distilbert.embeddings.LayerNorm.bias - Training
distilbert.transformer.layer.0.attention.q_lin.weight - Training
distilbert.transformer.layer.0.attention.q_lin.bias - Training
distilbert.transformer.layer.0.attention.k_lin.weight - Training
distilbert.transformer.layer.0.attention.k_lin.bias - Training
distilbert.transformer.layer.0.attention.v_lin.weight - Training
distilbert.transformer.layer.0.attention.v_lin.bias - Training
distilbert.transformer.layer.0.attention.out_lin.weight - Training
distilbert.transformer.layer.0.attention.out_lin.bias - Training
distilbert.transformer.layer.0.sa_layer_norm.weight - Training
distilbert.transformer.layer.0.sa_layer_norm.bias - Training
distilbert.transformer.layer.0.ffn.lin1.weight - Training
distilbert.transformer.layer.0.ffn.lin1.bias - Training
distilbert.transformer.layer.0.f

In [None]:
trainer.train()

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Cloning https://huggingface.co/BOCHENG/tweet into local empty directory.


Download file model.safetensors:   0%|          | 16.6k/255M [00:00<?, ?B/s]

Clean file model.safetensors:   0%|          | 1.00k/255M [00:00<?, ?B/s]

The following columns in the training set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text_cleaned, target. If text_cleaned, target are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 7613
  Num Epochs = 4
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 1904


Step,Training Loss,Validation Loss


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text_cleaned, __index_level_0__, target. If text_cleaned, __index_level_0__, target are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1523
  Batch size = 16
  load_acc = load_metric('accuracy')
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

Step,Training Loss,Validation Loss,Acc,F1
600,0.4204,0.281386,0.90151,0.878049
1200,0.3101,0.202737,0.933684,0.919135
1800,0.2377,0.163754,0.946159,0.933764


Saving model checkpoint to ./model/checkpoint-600
Configuration saved in ./model/checkpoint-600/config.json
Model weights saved in ./model/checkpoint-600/pytorch_model.bin
tokenizer config file saved in ./model/checkpoint-600/tokenizer_config.json
Special tokens file saved in ./model/checkpoint-600/special_tokens_map.json
tokenizer config file saved in ./model/tokenizer_config.json
Special tokens file saved in ./model/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text_cleaned, __index_level_0__, target. If text_cleaned, __index_level_0__, target are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1523
  Batch size = 16
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to l

TrainOutput(global_step=1904, training_loss=0.2952884305425051, metrics={'train_runtime': 134.1058, 'train_samples_per_second': 227.074, 'train_steps_per_second': 14.198, 'total_flos': 311623345928928.0, 'train_loss': 0.2952884305425051, 'epoch': 4.0})

In [None]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text_cleaned, target. If text_cleaned, target are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 7613
  Num Epochs = 4
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 1904


Step,Training Loss,Validation Loss,Acc,F1
600,0.1846,0.163754,0.946159,0.933764
1200,0.1719,0.163754,0.946159,0.933764
1800,0.1822,0.163754,0.946159,0.933764


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text_cleaned, __index_level_0__, target. If text_cleaned, __index_level_0__, target are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1523
  Batch size = 16
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
Saving model checkpoint to ./model/checkpoint-600
Configuration saved in ./model/checkpoint-600/config.json
Model weights saved in ./model/checkpoint-600/pytorch_model.bin
tokenizer conf

TrainOutput(global_step=1904, training_loss=0.17948506860172048, metrics={'train_runtime': 134.5529, 'train_samples_per_second': 226.32, 'train_steps_per_second': 14.151, 'total_flos': 310704094625364.0, 'train_loss': 0.17948506860172048, 'epoch': 4.0})

#### Bert with Addtional Feature

In [None]:
import torch
from torch import nn
from transformers import BertForSequenceClassification, BertModel

class BertForSequenceClassificationWithFeatures(BertForSequenceClassification):
    def __init__(self, config, feature_dim):
        super().__init__(config)
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # 增加一个线性层用于处理额外的特征
        self.feature_layer = nn.Linear(feature_dim, feature_dim)
        # 修改分类器以包括额外特征的维度
        self.classifier = nn.Linear(config.hidden_size + feature_dim, config.num_labels)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None, labels=None, feature=None):
        # 处理文本输入
        outputs = self.bert(input_ids,
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids,
                            position_ids=position_ids,
                            head_mask=head_mask,
                            inputs_embeds=inputs_embeds)

        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)

        # 处理额外的特征
        feature_output = self.feature_layer(feature)
        feature_output = self.dropout(feature_output)

        # 将文本表示和额外特征结合
        concat_output = torch.cat((pooled_output, feature_output), 1)

        # 应用分类层
        logits = self.classifier(concat_output)

        # 输出模型的损失和logits
        outputs = (logits,) + outputs[2:]  # 添加隐藏层状态和注意力
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            outputs = (loss,) + outputs

        return outputs

In [None]:
from transformers import BertConfig

config = BertConfig.from_pretrained('bert-base-uncased')
model = BertForSequenceClassificationWithFeatures(config, feature_dim=your_feature_dim)

#### evaluation

dev

In [None]:
pred = trainer.predict(ds_dev)
pred = np.argmax(pred.predictions, axis=-1)

The following columns in the test set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text_cleaned, __index_level_0__, target. If text_cleaned, __index_level_0__, target are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1523
  Batch size = 16


  load_acc = load_metric('accuracy')
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

In [None]:
load_acc = load_metric('accuracy')
load_f1 = load_metric('f1')

acc = load_acc.compute(predictions = pred,references = ds_dev['label'])['accuracy']
f1 = load_f1.compute(predictions = pred, references = ds_dev['label'])['f1']

In [None]:
print(f'acc: {acc}')
print(f'f1: {f1}')

acc: 0.9461588969139856
f1: 0.9337641357027464


In [None]:
ds_dev.features

{'target': Value(dtype='int64', id=None),
 'text_cleaned': Value(dtype='string', id=None),
 'label': Value(dtype='int64', id=None),
 '__index_level_0__': Value(dtype='int64', id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

error analysis

In [None]:
df_dev['pred'] = pred

In [None]:
idx = df_dev['pred'] != df_dev['target_relabeled']
error_pred = df_dev[idx]

In [None]:
error_pred.target_relabeled.value_counts()

target_relabeled
1    71
0    11
Name: count, dtype: int64

In [None]:
error_pred

Unnamed: 0,id,keyword,location,text,target,text_cleaned,target_relabeled,pred
2644,3796,destruction,,So you have a new weapon that can cause un-ima...,1,So you have a new weapon that can cause un - i...,1,0
1765,2538,collision,,my favorite lady came to our volunteer meeting...,1,my favorite lady came to our volunteer meeting...,1,0
5324,7602,pandemonium,Dallas Fort-Worth,Pandemonium In Aba As Woman Delivers Baby With...,1,Pandemonium In Aba As Woman Delivers Baby With...,1,0
6292,8989,storm,,TodayÛªs storm will pass; let tomorrowÛªs li...,1,Todays storm will pass ; let tomorrows light ...,1,0
251,356,annihilation,CA physically- Boston Strong?,U.S National Park Services Tonto National Fore...,1,U . S National Park Services Tonto National Fo...,1,0
...,...,...,...,...,...,...,...,...
3208,4602,emergency%20services,"USA, Alabama",Sustainability Task Force Presents Levy to Fun...,1,Sustainability Task Force Presents Levy to Fun...,1,0
2692,3861,detonation,,Ignition Knock (Detonation) Sensor-Senso Stand...,1,Ignition Knock ( Detonation ) Sensor - Senso...,1,0
6825,9775,trapped,????s ?? ????Ìø????Ì¡a,(?EudryLantiqua?) Hollywood Movie About Trappe...,1,( ? Eudry Lantiqua ? ) Hollywood Movie Abo...,1,0
6221,8880,smoke,Ktx,I get to smoke my shit in peace,1,I get to smoke my shit in peace,1,0


submission

In [None]:
df_test = pd.read_excel('./data/df_text.xlsx')

In [None]:
ds_test = Dataset.from_pandas(df_test)
ds_test = ds_test.map(do_tokenizer, batched=True)

Map:   0%|          | 0/3263 [00:00<?, ? examples/s]

In [None]:
pred = trainer.predict(ds_test)

The following columns in the test set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text_cleaned, id, location, text, keyword. If text_cleaned, id, location, text, keyword are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 3263
  Batch size = 16


In [None]:
pred

PredictionOutput(predictions=array([[-2.0890646,  1.6790618],
       [-2.9456859,  2.4122443],
       [-3.0292892,  2.5756662],
       ...,
       [-3.1238441,  2.606762 ],
       [-2.2980227,  1.9896573],
       [-2.9722788,  2.4812737]], dtype=float32), label_ids=None, metrics={'test_runtime': 6.8471, 'test_samples_per_second': 476.552, 'test_steps_per_second': 29.794})

In [None]:
preds = np.argmax(pred.predictions, axis=-1)

In [None]:
preds.shape

(3263,)

In [None]:
submission = pd.DataFrame(
    {'id': df_test['id'],
     'target': preds}
)

In [None]:
submission.to_csv('./submission_0414.csv', index=False)