In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from transformers import DebertaTokenizer, DebertaModel, DebertaForSequenceClassification
from torch.utils.data import Dataset, DataLoader

summaries_test = pd.read_csv('/kaggle/input/commonlit-evaluate-student-summaries/summaries_test.csv')
prompt_test = pd.read_csv('/kaggle/input/commonlit-evaluate-student-summaries/prompts_test.csv')



# Load pretrained DeBERTa model

In [2]:
class CustomDataset(Dataset):
    def __init__(self, data, tokenizer, max_length,mode="train"):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.mode = mode
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data.iloc[idx]
        text = item['text']
        inputs = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            return_tensors='pt'
        )
        if self.mode =="train":
            content_label = torch.tensor(item['content'], dtype=torch.float32)
            wording_label = torch.tensor(item['wording'], dtype=torch.float32)
            return {
                'input_ids': inputs['input_ids'].flatten(),
                'attention_mask': inputs['attention_mask'].flatten(),
                'content_label': content_label,
                'wording_label': wording_label
            }
        elif self.mode=="test":
            return {
                'input_ids': inputs['input_ids'].flatten(),
                'attention_mask': inputs['attention_mask'].flatten(),
            }
      


tokenizer = DebertaTokenizer.from_pretrained('/kaggle/input/hugging-face-models-safe-tensors/deberta-base')

max_length = 256
# train_dataset = CustomDataset(train_data, tokenizer, max_length,mode="train")
# val_dataset = CustomDataset(val_data, tokenizer, max_length, mode="train")

def collate_fn(batch):
    input_ids = [item['input_ids'] for item in batch]
    attention_mask = [item['attention_mask'] for item in batch]
    content_label = [item['content_label'] for item in batch]
    wording_label = [item['wording_label'] for item in batch]
    
    max_length = max(len(ids) for ids in input_ids)
    padded_input_ids = torch.zeros((len(input_ids), max_length), dtype=torch.long)
    padded_attention_mask = torch.zeros((len(input_ids), max_length), dtype=torch.long)
    
    for i, (ids, mask) in enumerate(zip(input_ids, attention_mask)):
        padding_length = max_length - len(ids)
        padded_input_ids[i, :len(ids)] = ids
        padded_attention_mask[i, :len(mask)] = mask
    
    return {
        'input_ids': padded_input_ids,
        'attention_mask': padded_attention_mask,
        'content_label': torch.stack(content_label),
        'wording_label': torch.stack(wording_label)
    }

# train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn = collate_fn)
# val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

In [3]:
# DeBERTa
class DeBERTaForRegression(nn.Module):
    def __init__(self):
        super(DeBERTaForRegression, self).__init__()
        self.deberta = DebertaForSequenceClassification.from_pretrained('/kaggle/input/hugging-face-models-safe-tensors/deberta-base', num_labels=2)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.logits

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = DeBERTaForRegression().to(device)
# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
# criterion = nn.MSELoss()

In [4]:
deberta_model = torch.load('/kaggle/input/deberta-commonlit-pretrained-model/deberta_trained1',map_location=device)

# Preprocessing

In [5]:
!pip install "/kaggle/input/pyspellchecker/pyspellchecker-0.7.2-py3-none-any.whl"

Processing /kaggle/input/pyspellchecker/pyspellchecker-0.7.2-py3-none-any.whl
Installing collected packages: pyspellchecker
Successfully installed pyspellchecker-0.7.2


In [6]:
import nltk
from nltk import ngrams
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from collections import Counter
from spellchecker import SpellChecker
import re
import string
from tqdm import tqdm
import spacy
from sklearn.preprocessing import StandardScaler
from textblob import TextBlob



tqdm.pandas()

# nltk.download('stopwords')
# nltk.download('punkt')

In [7]:
# Main idea of preprocessor is to capture the relation between prompt and summary as much as possible
# since this relationship is neglected in the roberta training model
class Preprocess:
    def __init__(self, df):
        self.spellchecker = SpellChecker() 
#         self.speller = Speller(lang='en')
        self.dataframe = df
        self.index = df.index.values
    
# ================================================================
# SUPPORT FUNCTIONS 

    def remove_punct(self,text):
        return re.sub(r'[^\w\s]', '', text)

    def remove_stopwords(self,text):
        words = word_tokenize(text)

        # Get the English stop words
        stop_words = set(stopwords.words('english'))

        # Remove stop words
        filtered_words = [word for word in words if word.lower() not in stop_words]

        # Join the filtered words back into a sentence
        filtered_text = ' '.join(filtered_words)

        return filtered_text
        
    
# ================================================================


# ================================================================
# TRANSFORMATION FUNCTIONS

    # count of words in summary
    def summary_text_count(self,row):
        tokenized_text = word_tokenize(row['text'])
        return len(tokenized_text)

    # word length of summary/word length of prompt
    def summary_prompt_length_ratio(self,row):
        prompt_length = len(word_tokenize(row['prompt_text']))
        if prompt_length == 0:
            return 0
        return row['summary_text_count'] / prompt_length

    def autocorrection(self,row):
        return 
    # unigram overlap (1 gram) between summary and prompt
    def unigram_overlap(self,row):
        try:
            prompt_tokens = word_tokenize(self.remove_stopwords(self.remove_punct(row['prompt_text'])))
            summary_tokens = word_tokenize(self.remove_stopwords(self.remove_punct(row['text'])))
        except:
            prompt_tokens = word_tokenize(self.remove_punct(row['prompt_text']))
            summary_tokens = word_tokenize(self.remove_punct(row['text']))
            print('Some problem with removing stopwords')
        
        # Calculate the number of common n-grams
        common_ngrams = set(prompt_tokens).intersection(set(summary_tokens))

        prompt_ngram_freq = Counter(prompt_tokens)
        summary_ngram_freq = Counter(summary_tokens)
        unigram_score = 0
        for ngram in common_ngrams:
            unigram_score += prompt_ngram_freq[ngram] * summary_ngram_freq[ngram] / sum(prompt_ngram_freq.values())
        return unigram_score
    
    # bigram overlap (2 gram) between summary and prompt
    def bigram_overlap(self,row):
        prompt_tokens = word_tokenize(row['prompt_text'])
        summary_tokens = word_tokenize(row['text'])

        # Generate n-grams for the original text and summary
        prompt_ngrams = list(ngrams(prompt_tokens, 2))
        summary_ngrams = list(ngrams(summary_tokens, 2))
        # Calculate the number of common n-grams
        common_ngrams = set(prompt_ngrams).intersection(set(summary_ngrams))
        prompt_ngram_freq = Counter(prompt_ngrams)
        summary_ngram_freq = Counter(summary_ngrams)
        bigram_score = 0
        for ngram in common_ngrams:
            bigram_score += prompt_ngram_freq[ngram] * summary_ngram_freq[ngram] / sum(prompt_ngram_freq.values())
        return bigram_score
    
    # trigram overlap (3 gram) between summary and prompt
    def trigram_overlap(self,row):
        prompt_tokens = word_tokenize(row['prompt_text'])
        summary_tokens = word_tokenize(row['text'])

        # Generate n-grams for the original text and summary
        prompt_ngrams = list(ngrams(prompt_tokens, 3))
        summary_ngrams = list(ngrams(summary_tokens, 3))
        # Calculate the number of common n-grams
        common_ngrams = set(prompt_ngrams).intersection(set(summary_ngrams))

        prompt_ngram_freq = Counter(prompt_ngrams)
        summary_ngram_freq = Counter(summary_ngrams)
        
        trigram_score = 0
        for ngram in common_ngrams:
            trigram_score += prompt_ngram_freq[ngram] * summary_ngram_freq[ngram] / sum(prompt_ngram_freq.values())
        return trigram_score
    
    
    # how many unique vocab student used relative to total vocab
    def vocab_uniqueness(self, row):
        summary_tokens = word_tokenize(self.remove_punct(row['text']))

        non_stopword_count = list(summary_tokens)
        unique_count = set(summary_tokens)

        return len(unique_count) / len(non_stopword_count)
    
    # return score of NER co occurence between summary and prompt
    def NER_co_occurrence(self, row):

        # Apply NER to the texts
        summary = nlp(row['text'])
        prompt = nlp(row['prompt_text'])

        # Extract the named entities from each text
        summary_entities = [ent.text for ent in summary.ents]
        prompt_entities = [ent.text for ent in prompt.ents]

        # Find the overlap between the named entities
        ent_overlap = set(summary_entities).intersection(set(prompt_entities))

        # Count the occurrences of each named entity in the overlap
        summary_ent_freq = Counter(summary_entities)
        prompt_ent_freq = Counter(prompt_entities)
        
        ent_score = 0
        for ent in ent_overlap:
            ent_score += prompt_ent_freq[ent] * summary_ent_freq[ent] / sum(prompt_ent_freq.values())

        return ent_score
        
        
    # count the number of misspelled word
    def spelling_error(self, row):
        wordlist=row['text'].split(' ')
        amount_miss = len(list(self.spellchecker.unknown(wordlist)))
        return amount_miss
    
    # number of quotes in summary
    def quote_count(self, row):
        summary = row['text']
        text = row['prompt_text']
        quotes_from_summary = re.findall(r'"([^"]*)"', summary)
        if len(quotes_from_summary)>0:
            return [quote in text for quote in quotes_from_summary].count(True)
        else:
            return 0
    
    def polarity_subjectivity(self, row):
        textblob_result = TextBlob(row['text'])
        return pd.Series([textblob_result.sentiment[0], textblob_result.sentiment[1]])
    
    # create a part of speech counting dataframe 
    def pos_tagging(self, df):
        def count_pos_tags(text):
            tokens = word_tokenize(text)
            tokens_without_punctuations = [word for word in tokens if word not in string.punctuation]
            pos_tags = pos_tag(tokens_without_punctuations)
            pos_counts = nltk.FreqDist(tag for word, tag in pos_tags)
            return pos_counts

        pos_counts_df = df['text'].progress_apply(count_pos_tags).apply(pd.Series)
        pos_counts_df = pos_counts_df.fillna(0)
        
        # include only these tags
        pos_included_tags = ['DT', 'JJ', 'NN', 'VBD', 'VB', 'WRB', 'NNS', 'TO', 'CD', 'PRP', 'IN',
               'VBP', 'WDT', 'VBZ', 'CC', 'VBG', 'JJR', 'RB', 'NNP', 'JJS', 'VBN',
               'EX', 'MD', 'PRP$', 'POS', 'WP', 'RBR', ':']
        
        # limit columns to only be the ones in the list, and if the columns are are in the original df, fill them all in with zeros
        pos_counts_df = pos_counts_df.filter(items=pos_included_tags).reindex(columns=pos_included_tags, fill_value=0)

        # Concatenate the new DataFrame with the original one
        result_df = pd.concat([df, pos_counts_df], axis=1)
        
        return result_df
# ================================================================


# ================================================================
# Run all the appropriate functions

    def run(self):
#         step 1: summary_text_count
        try:
            self.dataframe['summary_text_count'] = self.dataframe.progress_apply(self.summary_text_count, axis=1)
        except:
            print('Error occur when doing summary text count')
        
        # step 2: summary_prompt_length_ratio
        try:
            self.dataframe['summary_prompt_length_ratio'] = self.dataframe.progress_apply(self.summary_prompt_length_ratio, axis=1)
        except:
            print('Error occur when doing summary_prompt_length_ratio')
            
        try:
            # step 3: ngram_overlap
            self.dataframe['unigram_overlap'] = self.dataframe.progress_apply(self.unigram_overlap, axis=1)
            self.dataframe['bigram_overlap'] = self.dataframe.progress_apply(self.bigram_overlap, axis=1)
            self.dataframe['trigram_overlap'] = self.dataframe.progress_apply(self.trigram_overlap, axis=1)
        except:
            print('Error occur when doing ngram')
        
        # step 4: spelling
        try:
            self.dataframe['spelling_error'] = self.dataframe.progress_apply(self.spelling_error, axis=1)
        except:
            print('Error occur when doing spelling error')
#          # step 5: correct mispelled word
#         self.dataframe["fixed_summary_text"] = self.dataframe["text"].progress_apply(
#             lambda x: self.speller(x)
#         )
        
        # step 6: quote count
        try:
            self.dataframe['quote_count'] = self.dataframe.progress_apply(self.quote_count, axis=1)
        except:
            print('Error occur when doing quote count')
            
        # step 7: vocab_uniqueness
        try:
            self.dataframe['vocab_uniqueness'] = self.dataframe.progress_apply(self.vocab_uniqueness, axis=1)
        except:
            print('Error occur when doing vocab uniqueness')
#         # step 7: NER_co_occurrence
#         self.dataframe['NER_co_occurrence'] = self.dataframe.progress_apply(self.NER_co_occurrence, axis=1)
        
        # step 8: polarity and subjectivity
        try:
            self.dataframe[['polarity', 'subjectivity']] = self.dataframe.progress_apply(self.polarity_subjectivity, axis=1)
        except:
            print('Error occur when doing polarity')
            
        # step 9: pos_tagging concat
        try:
            self.dataframe = self.pos_tagging(self.dataframe)
        except:
            print('Error occur when doing pos tagging')
        
#         # step 10: extract/drop necessary features
#         self.dataframe.drop(['student_id','prompt_id', 'text', 'prompt_question', 'prompt_title', 'prompt_text'], axis=1, inplace=True)
        
#         # step 11: standard scaler
#         scaler = StandardScaler()
#         scaler.fit(self.dataframe)
#         scaled_data = scaler.transform(self.dataframe)
#         scaled_df = pd.DataFrame(scaled_data, columns=self.dataframe.columns, index=self.index)
        
        return self.dataframe


In [8]:
train = pd.read_csv('/kaggle/input/commonlit-datasets/train_preprocessed.csv')

In [9]:
train.drop(["Unnamed: 0", "fixed_summary_text"],axis=1,inplace=True)

In [10]:
from sklearn.model_selection import KFold, GroupKFold

gkf = GroupKFold(n_splits=4)

for i, (_, val_index) in enumerate(gkf.split(train, groups=train["prompt_id"])):
    train.loc[val_index, "fold"] = i

train.head(2)

Unnamed: 0,prompt_id,prompt_question,prompt_title,prompt_text,student_id,text,content,wording,summary_text_count,summary_prompt_length_ratio,...,JJS,VBN,EX,MD,PRP$,POS,WP,RBR,:,fold
0,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,00791789cc1f,1 element of an ideal tragedy is that it shoul...,-0.210614,-0.471415,59,0.084406,...,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
1,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,0086ef22de8f,The three elements of an ideal tragedy are: H...,-0.970237,-0.417058,30,0.042918,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
train_dataset = CustomDataset(train, tokenizer, max_length,mode="test")
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)

deberta_model.eval()
train_predictions = []
with torch.no_grad():
    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = deberta_model(input_ids=input_ids, attention_mask=attention_mask)
        content_pred, wording_pred = outputs[0].tolist()
        student_id = train.iloc[len(train_predictions)]['student_id']
        train_predictions.append([student_id, content_pred, wording_pred])

In [12]:
train_pred_df = pd.DataFrame(train_predictions, columns=['student_id','pred_content','pred_wording'])

In [13]:
train['pred_content'] = train_pred_df['pred_content']
train['pred_wording'] = train_pred_df['pred_wording']

In [14]:
targets = ["content", "wording"]

drop_columns = ["fold", "student_id", "prompt_id", "text",
                "prompt_question", "prompt_title", 
                "prompt_text"
               ] + targets

In [15]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error

model_dict = {}

for target in targets:
    models = []
    for fold in range(4):
        X_train_cv = train[train["fold"] != fold].drop(columns=drop_columns)
        y_train_cv = train[train["fold"] != fold][target]

        X_eval_cv = train[train["fold"] == fold].drop(columns=drop_columns)
        y_eval_cv = train[train["fold"] == fold][target]

        #Build the model
        model = xgb.XGBRegressor(base_score=0.5, booster='gbtree',    
                           n_estimators=1000,
                           early_stopping_rounds=100,
                           objective='reg:squarederror',
                           max_depth=4,
                           learning_rate=0.01,
                           alpha=0.5,
                           reg_lambda=1.0)
        model.fit(X_train_cv, y_train_cv,
            eval_set=[(X_train_cv, y_train_cv), (X_eval_cv, y_eval_cv)],
            verbose=100)
        
        models.append(model)
    
    model_dict[target] = models
# After hyperparameter tuning
# {'alpha': 0.5,
#  'gamma': 0,
#  'lambda': 1.0,
#  'learning_rate': 0.01,
#  'max_depth': 4,
#  'n_estimators': 1000}

[0]	validation_0-rmse:1.16257	validation_1-rmse:1.12700
[100]	validation_0-rmse:0.47131	validation_1-rmse:0.45867
[200]	validation_0-rmse:0.25553	validation_1-rmse:0.24181
[300]	validation_0-rmse:0.20294	validation_1-rmse:0.18824
[400]	validation_0-rmse:0.19000	validation_1-rmse:0.17700
[500]	validation_0-rmse:0.18422	validation_1-rmse:0.17523
[600]	validation_0-rmse:0.18091	validation_1-rmse:0.17491
[700]	validation_0-rmse:0.17858	validation_1-rmse:0.17477
[783]	validation_0-rmse:0.17673	validation_1-rmse:0.17483
[0]	validation_0-rmse:1.14043	validation_1-rmse:1.18256
[100]	validation_0-rmse:0.45835	validation_1-rmse:0.47873
[200]	validation_0-rmse:0.24157	validation_1-rmse:0.26625
[300]	validation_0-rmse:0.18830	validation_1-rmse:0.22355
[400]	validation_0-rmse:0.17558	validation_1-rmse:0.21714
[500]	validation_0-rmse:0.17043	validation_1-rmse:0.21639
[565]	validation_0-rmse:0.16848	validation_1-rmse:0.21650
[0]	validation_0-rmse:1.15704	validation_1-rmse:1.14026
[100]	validation_0-r

In [16]:
# cv
rmses = []

for target in targets:
    models = model_dict[target]

    preds = []
    trues = []
    
    for fold, model in enumerate(models):
        X_eval_cv = train[train["fold"] == fold].drop(columns=drop_columns)
        y_eval_cv = train[train["fold"] == fold][target]

        pred = model.predict(X_eval_cv)

        trues.extend(y_eval_cv)
        preds.extend(pred)
        
    rmse = np.sqrt(mean_squared_error(trues, preds))
    print(f"{target}_rmse : {rmse}")
    rmses = rmses + [rmse]

print(f"mcrmse : {sum(rmses) / len(rmses)}")

content_rmse : 0.19961580966393763
wording_rmse : 0.267893433532158
mcrmse : 0.23375462159804783


# Making submission

In [17]:
prompts_train = pd.read_csv("/kaggle/input/commonlit-evaluate-student-summaries/prompts_train.csv")
prompts_test = pd.read_csv("/kaggle/input/commonlit-evaluate-student-summaries/prompts_test.csv")

summaries_train = pd.read_csv("/kaggle/input/commonlit-evaluate-student-summaries/summaries_train.csv")
summaries_test = pd.read_csv("/kaggle/input/commonlit-evaluate-student-summaries/summaries_test.csv")

if len(prompts_test) == 2:
    prompts_test = prompts_train.copy()
    summaries_test = summaries_train.copy()
    summaries_test.drop(['content','wording'],axis=1,inplace=True)

test_pretrain = pd.merge(prompts_test, summaries_test, on='prompt_id')
test_pretrain.head(2)

Unnamed: 0,prompt_id,prompt_question,prompt_title,prompt_text,student_id,text
0,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,00791789cc1f,1 element of an ideal tragedy is that it shoul...
1,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,0086ef22de8f,The three elements of an ideal tragedy are: H...


In [18]:
test = Preprocess(test_pretrain.copy()).run()

100%|██████████| 7165/7165 [00:07<00:00, 923.33it/s]
100%|██████████| 7165/7165 [01:06<00:00, 107.47it/s]
100%|██████████| 7165/7165 [01:24<00:00, 84.76it/s]
100%|██████████| 7165/7165 [01:20<00:00, 89.36it/s]
100%|██████████| 7165/7165 [01:19<00:00, 89.88it/s]
100%|██████████| 7165/7165 [00:01<00:00, 4211.84it/s]
100%|██████████| 7165/7165 [00:00<00:00, 44665.95it/s]
100%|██████████| 7165/7165 [00:05<00:00, 1319.86it/s]
100%|██████████| 7165/7165 [00:08<00:00, 825.59it/s]
100%|██████████| 7165/7165 [00:40<00:00, 178.31it/s]


In [19]:
test_dataset = CustomDataset(test, tokenizer, max_length,mode="test")
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

deberta_model.eval()
test_predictions = []
with torch.no_grad():
    for batch in test_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        outputs = deberta_model(input_ids=input_ids, attention_mask=attention_mask)
        content_pred, wording_pred = outputs[0].tolist()
        student_id = test.iloc[len(test_predictions)]['student_id']
        test_predictions.append([student_id, content_pred, wording_pred])

In [20]:
test_pred_df = pd.DataFrame(test_predictions, columns=['student_id','pred_content','pred_wording'])

In [21]:
test['pred_content'] = test_pred_df['pred_content']
test['pred_wording'] = test_pred_df['pred_wording']

In [22]:
test.head(2)

Unnamed: 0,prompt_id,prompt_question,prompt_title,prompt_text,student_id,text,summary_text_count,summary_prompt_length_ratio,unigram_overlap,bigram_overlap,...,VBN,EX,MD,PRP$,POS,WP,RBR,:,pred_content,pred_wording
0,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,00791789cc1f,1 element of an ideal tragedy is that it shoul...,59,0.084406,0.111913,0.030086,...,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.257776,-0.04197
1,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,0086ef22de8f,The three elements of an ideal tragedy are: H...,30,0.042918,0.046931,0.007163,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.10806,-0.276407


In [23]:
drop_columns = ["student_id", "prompt_id", "text",
                "prompt_question", "prompt_title", 
                "prompt_text"
               ]

In [24]:
pred_dict = {}
for target in targets:
    models = model_dict[target]
    preds = []

    for fold, model in enumerate(models):
        X_eval_cv = test.drop(columns=drop_columns)

        pred = model.predict(X_eval_cv)
        preds.append(pred)
    
    pred_dict[target] = preds

In [25]:
for target in targets:
    preds = pred_dict[target]
    for i, pred in enumerate(preds):
        test[f"{target}_pred_{i}"] = pred

    test[target] = test[[f"{target}_pred_{fold}" for fold in range(4)]].mean(axis=1)

In [26]:
test.head(2)

Unnamed: 0,prompt_id,prompt_question,prompt_title,prompt_text,student_id,text,summary_text_count,summary_prompt_length_ratio,unigram_overlap,bigram_overlap,...,content_pred_0,content_pred_1,content_pred_2,content_pred_3,content,wording_pred_0,wording_pred_1,wording_pred_2,wording_pred_3,wording
0,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,00791789cc1f,1 element of an ideal tragedy is that it shoul...,59,0.084406,0.111913,0.030086,...,0.171836,0.192371,0.18753,0.186331,0.184517,-0.063305,-0.114135,-0.112279,-0.051406,-0.085281
1,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...,0086ef22de8f,The three elements of an ideal tragedy are: H...,30,0.042918,0.046931,0.007163,...,-1.098772,-1.04459,-1.08325,-1.104704,-1.082829,-0.322389,-0.323508,-0.280367,-0.35768,-0.320986


In [27]:
test[["student_id", "content", "wording"]].to_csv("submission.csv", index=False)