In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from transformers import *
from transformers.optimization import AdamW
import os
import time
from sklearn.metrics import roc_auc_score
from tqdm import tqdm
from sklearn.model_selection import GroupKFold
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"


In [2]:
!export CUDA_VISIBLE_DEVICES=0,1

In [3]:
bertwwm_tokenizer =BertTokenizer.from_pretrained('./preTrainModel/chinese-roberta-wwm-ext-large/')
# device=torch.device("cuda")
target_dir='./models/'
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

In [4]:
train_left = pd.read_csv('./train/train.query.tsv',sep='\t',header=None)
train_left.columns=['id','query']
train_right = pd.read_csv('./train/train.reply.tsv',sep='\t',header=None)
train_right.columns=['id','id_sub','reply','label']
train_data = train_left.merge(train_right, how='left')
train_data['reply'] = train_data['reply'].fillna('好的')

In [5]:
test_left = pd.read_csv('./test/test.query.tsv',sep='\t',header=None, encoding='gbk')
test_left.columns = ['id','query']
test_right =  pd.read_csv('./test/test.reply.tsv',sep='\t',header=None, encoding='gbk')
test_right.columns=['id','id_sub','reply']
df_test = test_left.merge(test_right, how='left')
df_test['label']=666

In [6]:
train_data 

Unnamed: 0,id,query,id_sub,reply,label
0,0,采荷一小是分校吧,0,杭州市采荷第一小学钱江苑校区，杭州市钱江新城实验学校。,1
1,0,采荷一小是分校吧,1,是的,0
2,0,采荷一小是分校吧,2,这是5楼,0
3,1,毛坯吗？,0,因为公积金贷款贷的少,0
4,1,毛坯吗？,1,是呢,0
...,...,...,...,...,...
21580,5998,您好，我正在看尚林家园的房子,1,有啊,0
21581,5998,您好，我正在看尚林家园的房子,2,我带你看看,0
21582,5999,今天可以安排看房子吗？,0,我约下房东，稍后回你,1
21583,5999,今天可以安排看房子吗？,1,可以看，你几点有时间过来呢？,1


In [7]:
df_test

Unnamed: 0,id,query,id_sub,reply,label
0,0,东区西区？什么时候下证？,0,我在给你发套,666
1,0,东区西区？什么时候下证？,1,您看下我发的这几套,666
2,0,东区西区？什么时候下证？,2,这两套也是金源花园的,666
3,0,东区西区？什么时候下证？,3,价钱低,666
4,0,东区西区？什么时候下证？,4,便宜的房子，一般都是顶楼,666
...,...,...,...,...,...
53752,13998,这套房子有啥问题吗 我看价格不高,3,租约还有两年,666
53753,13998,这套房子有啥问题吗 我看价格不高,4,都有学位的,666
53754,13999,我看看时间吧,0,没有呢,666
53755,13999,我看看时间吧,1,今天新上的,666


In [8]:
class DataPrecessForSentence(Dataset):
    """
    对文本进行处理
    """
    def __init__(self, bert_tokenizer, df, input_categories,max_char_len = 103):
        """
        bert_tokenizer :分词器
        file     :语料文件
        """
        self.bert_tokenizer = bert_tokenizer
        self.max_seq_len = max_char_len
        self.seqs, self.seq_masks, self.seq_segments, self.labels = self.get_input(df,input_categories, self.bert_tokenizer, self.max_seq_len)
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.seqs[idx], self.seq_masks[idx], self.seq_segments[idx], self.labels[idx]
    
    def _convert_to_transformer_inputs(self,question, answer, tokenizer, max_sequence_length):
        def return_id(str1, str2, truncation_strategy, length):

            inputs = tokenizer.encode_plus(str1, str2,
                add_special_tokens=True,
                max_length=length,
                truncation_strategy=truncation_strategy,
                #truncation=True
                )

            input_ids =  inputs["input_ids"]
            input_masks = [1] * len(input_ids)
            input_segments = inputs["token_type_ids"]
            padding_length = length - len(input_ids)
            padding_id = tokenizer.pad_token_id
            input_ids = input_ids + ([padding_id] * padding_length)
            input_masks = input_masks + ([0] * padding_length)
            input_segments = input_segments + ([0] * padding_length)

            return [input_ids, input_masks, input_segments]
    
        input_ids_q, input_masks_q, input_segments_q = return_id(
            question, answer, 'longest_first', max_sequence_length)

        return [input_ids_q, input_masks_q, input_segments_q]
        
    # 获取文本与标签
    def get_input(self, df,columns, tokenizer, max_sequence_length,test=False):

        input_ids_q, input_masks_q, input_segments_q = [], [], []
        input_ids_a, input_masks_a, input_segments_a = [], [], []
        for _, instance in tqdm(df[columns].iterrows()):
            query,reply = instance.query, instance.reply

            ids_q, masks_q, segments_q= \
            self._convert_to_transformer_inputs(query, reply, tokenizer, max_sequence_length)

            input_ids_q.append(ids_q)
            input_masks_q.append(masks_q)
            input_segments_q.append(segments_q)
            
        labels = df['label'].values
        return torch.Tensor(input_ids_q).type(torch.long),torch.Tensor(input_masks_q).type(torch.long),torch.Tensor(input_segments_q).type(torch.long),torch.Tensor(labels).type(torch.long)


In [9]:
class BertwwmModel(nn.Module):
    def __init__(self,dropout=0.5,num_classes=2):
        super(BertwwmModel,self).__init__()
        config = BertConfig.from_pretrained('./preTrainModel/chinese-roberta-wwm-ext-large/') 
        config.output_hidden_states = False 
        self.bertwwm = BertModel.from_pretrained('./preTrainModel/chinese-roberta-wwm-ext-large/', 
                                             config=config)
        self.dropout=dropout
#         self.device=torch.device("cuda")
        self.num_classes = num_classes
        self.linear = nn.Linear(1024*4, num_classes)
        for param in self.bertwwm.parameters():
            param.requires_grad=True
    
    def forward(self, q_id, q_mask, q_atn):
        q_embedding = self.bertwwm(q_id, attention_mask=q_mask, token_type_ids=q_atn)[0]
        q = nn.AdaptiveAvgPool2d((1,q_embedding.shape[-1]))(q_embedding).squeeze(1)
        a = nn.AdaptiveMaxPool2d((1,q_embedding.shape[-1]))(q_embedding).squeeze(1)
        t = q_embedding[:,-1]
        e = q_embedding[:, 0]
        merged = torch.cat([q, a, t, e], dim=1)
        x = nn.Dropout(self.dropout)(merged)
        logits=self.linear(x)
        probabilities =F.softmax(logits, dim=-1)
        return logits,probabilities
    

In [10]:
#gamma 2,alpha 0.25

class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0),input.size(1),-1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1,2)    # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1,input.size(2))   # N,H*W,C => N*H*W,C
        target = target.view(-1,1)

        logpt = F.log_softmax(input)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()

In [11]:
def correct_predictions(output_probabilities, targets):

    _, out_classes = output_probabilities.max(dim=1)
    correct = (out_classes == targets).sum()
    return correct.item()


def train(model, dataloader,optimizer, criterion,epoch_number, max_gradient_norm):

    # Switch the model to train mode.
    model.train()
#     device = model.device
    epoch_start = time.time()
    batch_time_avg = 0.0
    running_loss = 0.0
    correct_preds = 0
    tqdm_batch_iterator = tqdm(dataloader)
    for batch_index, (batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels) in enumerate(tqdm_batch_iterator):
        batch_start = time.time()
        # Move input and output data to the GPU if it is used.
        seqs, masks, segments, labels = batch_seqs.cuda(), batch_seq_masks.cuda(), batch_seq_segments.cuda(), batch_labels.cuda()
        optimizer.zero_grad()
        logits, probs  = model(seqs, masks, segments)
        loss = criterion(logits, labels)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_gradient_norm)
        optimizer.step()
        batch_time_avg += time.time() - batch_start
        running_loss += loss.item()
        correct_preds += correct_predictions(probs, labels)
        description = "Avg. batch proc. time: {:.4f}s, loss: {:.4f}"\
                      .format(batch_time_avg/(batch_index+1), running_loss/(batch_index+1))
        tqdm_batch_iterator.set_description(description)
    epoch_time = time.time() - epoch_start
    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = correct_preds / len(dataloader.dataset)
    return epoch_time, epoch_loss, epoch_accuracy


def validate(model, dataloader, criterion):

    # Switch to evaluate mode.
    model.eval()
#     device = model.device
    epoch_start = time.time()
    running_loss = 0.0
    running_accuracy = 0.0
    all_prob = []
    all_labels = []
    # Deactivate autograd for evaluation.
    with torch.no_grad():
        for (batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels) in dataloader:
            # Move input and output data to the GPU if one is used.
            seqs = batch_seqs.cuda()
            masks = batch_seq_masks.cuda()
            segments = batch_seq_segments.cuda()
            labels = batch_labels.cuda()
            logits, probs = model(seqs, masks, segments)
            loss = criterion(logits, labels)
            running_loss += loss.item()
            running_accuracy += correct_predictions(probs, labels)
            all_prob.extend(probs[:,1].cpu().numpy())
            all_labels.extend(batch_labels)
    epoch_time = time.time() - epoch_start
    epoch_loss = running_loss / len(dataloader)
    epoch_accuracy = running_accuracy / (len(dataloader.dataset))
    return epoch_time, epoch_loss, epoch_accuracy, roc_auc_score(all_labels, all_prob)



def test(model, dataloader):
    # Switch the model to eval mode.
    label_res=[]
    model.eval()
#     device = model.device
    time_start = time.time()
    batch_time = 0.0
    
    # Deactivate autograd for evaluation.
    with torch.no_grad():
        for (batch_seqs, batch_seq_masks, batch_seq_segments, batch_labels) in dataloader:
            batch_start = time.time()
            # Move input and output data to the GPU if one is used.
            seqs, masks, segments, labels = batch_seqs.cuda(), batch_seq_masks.cuda(), batch_seq_segments.cuda(), batch_labels.cuda()
            _, probabilities = model(seqs, masks, segments)
            _, out_classes = probabilities.max(dim=1)
#             print(out_classes)
            label_res.extend(out_classes.cpu().numpy())
            batch_time += time.time() - batch_start

    batch_time /= len(dataloader)
    total_time = time.time() - time_start
#     accuracy /= (len(dataloader.dataset))
    return batch_time, total_time,label_res

In [12]:
input_categories = ['query','reply']
output_categories = 'label'
MAX_SEQUENCE_LENGTH = 100
batch_size=32
print("\t* Loading test data...")
test_data = DataPrecessForSentence(bertwwm_tokenizer,df_test,input_categories,MAX_SEQUENCE_LENGTH)
test_loader = DataLoader(test_data, batch_size=batch_size)

171it [00:00, 1704.49it/s]

	* Loading test data...


53757it [00:31, 1691.55it/s]


In [13]:
#N折交叉验证

gkf = GroupKFold(n_splits=5).split(X=train_data.reply, groups=train_data.id)

valid_preds = [0,0,0,0,0]
test_preds = [0,0,0,0,0]

batch_size=32
epochs=3
lr=2e-05
patience=3
max_grad_norm=10.0

# criterion = nn.CrossEntropyLoss()

criterion = FocalLoss(gamma=0)
oof = np.zeros((len(train_data),1))
for fold, (train_idx, valid_idx) in enumerate(gkf):
    dev_res=[]
    test_res=[]
    best_score = 0.0
    start_epoch = 1
    # Data for loss curves plot
    epochs_count = []
    train_losses = []
    valid_losses = []
    #训练集
    t_data = DataPrecessForSentence(bertwwm_tokenizer, train_data.iloc[train_idx],input_categories,MAX_SEQUENCE_LENGTH)
    train_loader = DataLoader(t_data, shuffle=True, batch_size=batch_size)
    #验证集
    d_data = DataPrecessForSentence(bertwwm_tokenizer, train_data.iloc[valid_idx],input_categories,MAX_SEQUENCE_LENGTH)
    dev_loader = DataLoader(d_data, shuffle=True, batch_size=batch_size)
    #开始训练
    # -------------------- Model definition ------------------- #
    print("\t* Building model:{}...".format(fold))
    model = nn.DataParallel(BertwwmModel(), device_ids=[0, 1])
    model = model.cuda()
#     model = BertwwmModel().to(device)
    # 待优化的参数
    param_optimizer = list(model.named_parameters())
#     print(param_optimizer)
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
            {
                    'params':[p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
                    'weight_decay':0.01
            },
            {
                    'params':[p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
                    'weight_decay':0.0
            }
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=lr)
    
#     optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", 
                                                               factor=0.85, patience=0)
    
#     print("\n", 20 * "=", "Training Albert model on device: {},fold:{}".format(device,fold), 20 * "=")
    patience_counter = 0
    for epoch in range(start_epoch, epochs + 1):
        epochs_count.append(epoch)
        print("* Training epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader, optimizer, criterion, epoch, max_grad_norm)
        train_losses.append(epoch_loss)
        print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%"
              .format(epoch_time, epoch_loss, (epoch_accuracy*100)))
        print("* Validation for epoch {}:".format(epoch))
        epoch_time, epoch_loss, epoch_accuracy , epoch_auc= validate(model, dev_loader,criterion)
        valid_losses.append(epoch_loss)
        print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%, auc: {:.4f}\n"
              .format(epoch_time, epoch_loss, (epoch_accuracy*100), epoch_auc))
        # Update the optimizer's learning rate with the scheduler.
        scheduler.step(epoch_accuracy)
        # Early stopping on validation accuracy.
        if epoch_accuracy < best_score:
            patience_counter += 1 
        else:
            best_score = epoch_accuracy
            patience_counter = 0
            
            batch_time, total_time, dev_res= test(model, dev_loader)
            oof[valid_idx] =[[i] for i in dev_res]
            valid_preds[fold]=dev_res
            batch_time, total_time, test_res=test(model, test_loader)
            test_preds[fold]=test_res
#             f1,t = search_f1(valid_outputs, valid_preds[-1])
#             print('validation score = ', f1)
        if patience_counter >= patience:
            print("-> Early stopping: patience limit reached, stopping...")
            break


17268it [00:10, 1644.86it/s]
4317it [00:02, 1726.32it/s]


	* Building model:0...


  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 1:


Avg. batch proc. time: 1.4899s, loss: 0.3821: 100%|██████████| 540/540 [13:27<00:00,  1.50s/it]


-> Training time: 807.6578s, loss = 0.3821, accuracy: 84.2020%
* Validation for epoch 1:
-> Valid. time: 72.1575s, loss: 0.3081, accuracy: 87.5840%, auc: 0.9220



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 2:


Avg. batch proc. time: 1.7368s, loss: 0.2380: 100%|██████████| 540/540 [15:42<00:00,  1.75s/it]


-> Training time: 942.8159s, loss = 0.2380, accuracy: 90.8154%
* Validation for epoch 2:
-> Valid. time: 74.1526s, loss: 0.2895, accuracy: 88.9970%, auc: 0.9355



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 3:


Avg. batch proc. time: 1.8908s, loss: 0.1553: 100%|██████████| 540/540 [17:07<00:00,  1.90s/it]


-> Training time: 1027.9526s, loss = 0.1553, accuracy: 94.0700%
* Validation for epoch 3:


15it [00:00, 147.22it/s]

-> Valid. time: 92.1297s, loss: 0.3121, accuracy: 88.4410%, auc: 0.9382



17268it [00:44, 385.12it/s] 
4317it [00:10, 417.19it/s] 


	* Building model:1...


  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 1:


Avg. batch proc. time: 1.9737s, loss: 0.3740: 100%|██████████| 540/540 [17:54<00:00,  1.99s/it]


-> Training time: 1074.0781s, loss = 0.3740, accuracy: 84.4047%
* Validation for epoch 1:
-> Valid. time: 80.5689s, loss: 0.2999, accuracy: 87.6766%, auc: 0.9333



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 2:


Avg. batch proc. time: 2.0588s, loss: 0.2259: 100%|██████████| 540/540 [18:40<00:00,  2.08s/it]


-> Training time: 1120.5894s, loss = 0.2259, accuracy: 91.0239%
* Validation for epoch 2:
-> Valid. time: 94.1385s, loss: 0.3289, accuracy: 88.0241%, auc: 0.9293



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 3:


Avg. batch proc. time: 2.2317s, loss: 0.1417: 100%|██████████| 540/540 [20:15<00:00,  2.25s/it]


-> Training time: 1215.9491s, loss = 0.1417, accuracy: 94.6664%
* Validation for epoch 3:


12it [00:00, 117.34it/s]

-> Valid. time: 104.0590s, loss: 0.4135, accuracy: 86.9122%, auc: 0.9352



17268it [00:37, 457.38it/s] 
4317it [00:06, 715.97it/s] 


	* Building model:2...


  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 1:


Avg. batch proc. time: 2.1340s, loss: 0.4472: 100%|██████████| 540/540 [19:22<00:00,  2.15s/it]


-> Training time: 1162.1346s, loss = 0.4472, accuracy: 81.1269%
* Validation for epoch 1:
-> Valid. time: 91.4487s, loss: 0.3121, accuracy: 86.9585%, auc: 0.9203



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 2:


Avg. batch proc. time: 2.3829s, loss: 0.2870: 100%|██████████| 540/540 [21:39<00:00,  2.41s/it]


-> Training time: 1299.8991s, loss = 0.2870, accuracy: 88.4468%
* Validation for epoch 2:
-> Valid. time: 126.6862s, loss: 0.3035, accuracy: 87.9083%, auc: 0.9262



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 3:


Avg. batch proc. time: 1.9122s, loss: 0.1860: 100%|██████████| 540/540 [17:21<00:00,  1.93s/it]


-> Training time: 1042.0080s, loss = 0.1860, accuracy: 92.6511%
* Validation for epoch 3:


158it [00:00, 1574.10it/s]

-> Valid. time: 94.4370s, loss: 0.3701, accuracy: 86.7732%, auc: 0.9361



17268it [00:38, 445.71it/s] 
4317it [00:10, 393.69it/s] 


	* Building model:3...


  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 1:


Avg. batch proc. time: 2.8803s, loss: 0.3604: 100%|██████████| 540/540 [26:11<00:00,  2.91s/it]


-> Training time: 1571.3426s, loss = 0.3604, accuracy: 84.7000%
* Validation for epoch 1:
-> Valid. time: 107.3330s, loss: 0.2811, accuracy: 88.6264%, auc: 0.9326



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 2:


Avg. batch proc. time: 2.0232s, loss: 0.2242: 100%|██████████| 540/540 [18:23<00:00,  2.04s/it]


-> Training time: 1103.2572s, loss = 0.2242, accuracy: 90.9602%
* Validation for epoch 2:
-> Valid. time: 86.4420s, loss: 0.2816, accuracy: 89.4603%, auc: 0.9391



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 3:


Avg. batch proc. time: 1.9157s, loss: 0.1348: 100%|██████████| 540/540 [17:24<00:00,  1.93s/it]


-> Training time: 1044.5034s, loss = 0.1348, accuracy: 94.9560%
* Validation for epoch 3:
-> Valid. time: 87.4617s, loss: 0.3570, accuracy: 89.5298%, auc: 0.9334



17268it [00:20, 823.39it/s] 
4317it [00:05, 817.14it/s] 


	* Building model:4...


  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 1:


Avg. batch proc. time: 1.9393s, loss: 0.3886: 100%|██████████| 540/540 [17:36<00:00,  1.96s/it]


-> Training time: 1056.6350s, loss = 0.3886, accuracy: 84.1035%
* Validation for epoch 1:
-> Valid. time: 82.2755s, loss: 0.3132, accuracy: 87.7924%, auc: 0.9241



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 2:


Avg. batch proc. time: 1.9399s, loss: 0.2450: 100%|██████████| 540/540 [17:37<00:00,  1.96s/it]


-> Training time: 1057.7125s, loss = 0.2450, accuracy: 90.6243%
* Validation for epoch 2:
-> Valid. time: 107.8579s, loss: 0.3053, accuracy: 88.6032%, auc: 0.9329



  0%|          | 0/540 [00:00<?, ?it/s]

* Training epoch 3:


Avg. batch proc. time: 1.6354s, loss: 0.1622: 100%|██████████| 540/540 [14:50<00:00,  1.65s/it]


-> Training time: 890.8019s, loss = 0.1622, accuracy: 94.0352%
* Validation for epoch 3:
-> Valid. time: 69.3502s, loss: 0.3362, accuracy: 88.0241%, auc: 0.9253



In [14]:
from sklearn.metrics import f1_score
def search_f1(y_true, y_pred):
    best = 0
    best_t = 0
    for i in range(30,60):
        tres = i / 100
        y_pred_bin =  (y_pred > tres).astype(int)
        score = f1_score(y_true, y_pred_bin)
        if score > best:
            best = score
            best_t = tres
    print('best', best)
    print('thres', best_t)
    return best, best_t

def compute_output_arrays(df, columns):
    return np.asarray(df[columns])

In [15]:
outputs = compute_output_arrays(train_data, output_categories)
best_score, best_t = search_f1(outputs,oof)
sub = np.average(test_preds, axis=0)
sub = sub > best_t

best 0.24595469255663432
thres 0.3


In [16]:
df_test['label'] = sub.astype(int)
df_test[['id','id_sub','label']].to_csv('./submission_file/submission_roberta_wwm_large_focalloss.csv',index=False, header=None,sep='\t')

In [18]:
a = np.average(test_preds, axis=0)
a = a>0.5

In [19]:
df_test['label'] = a.astype(int)
df_test[['id','id_sub','label']].to_csv('./submission_file/submission_roberta_wwm_large_focalloss_0.5.csv',index=False, header=None,sep='\t')