In [1]:
!pip install transformers datasets



In [2]:
import math
import time
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from tensorflow import keras
from torch.utils.data import DataLoader, Dataset
from transformers import logging
from transformers import TrainingArguments, Trainer
from transformers import BertTokenizer, DistilBertTokenizer, DistilBertModel, AutoTokenizer
from transformers import DefaultDataCollator, TFDistilBertForSequenceClassification
from transformers import DistilBertTokenizerFast, DataCollatorWithPadding
from datasets import load_dataset
import warnings
warnings.filterwarnings('ignore')

# Controlling the pseudo-randomness.
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [3]:
df = pd.read_csv('train_16project.csv')
df.head()

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5


In [4]:
from sklearn.model_selection import train_test_split

# First split: Divide into training (80%) and testing (20%)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Second split: Divide training data into training (80%) and validation (20%)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)


In [5]:
# Initialize the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Function to tokenize the data
def tokenize_data(df, tokenizer, max_length=512):
    return tokenizer(list(df['full_text']), max_length=max_length, truncation=True, padding="max_length", return_tensors="pt")

# Tokenizing the training and validation data
train_encodings = tokenize_data(train_df, tokenizer)
val_encodings = tokenize_data(val_df, tokenizer)
test_encodings = tokenize_data(test_df, tokenizer)

In [6]:
from transformers import DistilBertForSequenceClassification, DistilBertConfig

# Configuring the model to output 6 continuous values
config = DistilBertConfig.from_pretrained('distilbert-base-uncased')
config.num_labels = 6  # We need to output 6 scores per text
model = DistilBertForSequenceClassification(config)

# Modifying the classifier to a linear regression output
model.classifier = torch.nn.Linear(config.dim, 6)  # `config.dim` is the size of the hidden states


In [7]:
from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW, get_scheduler
from tqdm import tqdm

# Prepare the dataset for PyTorch
train_labels = torch.tensor(train_df[['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']].values, dtype=torch.float)
val_labels = torch.tensor(val_df[['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']].values, dtype=torch.float)
test_labels = torch.tensor(test_df[['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']].values, dtype=torch.float)

train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
val_dataset = TensorDataset(val_encodings['input_ids'], val_encodings['attention_mask'], val_labels)
test_dataset = TensorDataset(test_encodings['input_ids'], test_encodings['attention_mask'], test_labels)

# Preparing data loaders, model, and optimizer as previously described
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

In [8]:
num_epochs = 10
# Custom accuracy function
def custom_accuracy(y_true, y_pred):
    return torch.mean(((y_true - y_pred).abs() <= 0.25).float()).item()

# Setup model, optimizer, and scheduler

optimizer = AdamW(model.parameters(), lr= 5e-5)
scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=len(train_loader) * num_epochs
)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training, Validation, and Testing loop
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    train_accuracy = []
    for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
        b_input_ids, b_input_mask, b_labels = [t.to(device) for t in batch]
        outputs = model(b_input_ids, attention_mask=b_input_mask)
        loss = torch.nn.MSELoss()(outputs.logits, b_labels)
        acc = custom_accuracy(b_labels, outputs.logits)
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        train_loss += loss.item()
        train_accuracy.append(acc)

    # Validation step
    model.eval()
    val_accuracy = []
    val_loss = 0
    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Validating"):
            b_input_ids, b_input_mask, b_labels = [t.to(device) for t in batch]
            outputs = model(b_input_ids, attention_mask=b_input_mask)
            val_loss = torch.nn.MSELoss()(outputs.logits, b_labels)
            acc = custom_accuracy(b_labels, outputs.logits)
            val_accuracy.append(acc)
            val_loss += val_loss.item()

    print(f"Epoch {epoch+1}: Train Loss = {train_loss / len(train_loader)}; Train Accuracy = {sum(train_accuracy)/len(train_accuracy)}; Val Accuracy = {sum(val_accuracy)/len(val_accuracy)}; Validation Loss = {val_loss / len(val_loader)}")

# Testing loop
test_predictions = []
test_actuals = []
test_loss = 0
test_accuracy = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        b_input_ids, b_input_mask, b_labels = [t.to(device) for t in batch]
        outputs = model(b_input_ids, attention_mask=b_input_mask)
        test_loss = torch.nn.MSELoss()(outputs.logits, b_labels)
        acc = custom_accuracy(b_labels, outputs.logits)
        test_predictions.extend(outputs.logits.cpu().numpy())
        test_actuals.extend(b_labels.cpu().numpy())
        test_accuracy.append(acc)
        test_loss += test_loss.item()


# Create DataFrame to compare predictions with actuals
predictions_df = pd.DataFrame(test_predictions, columns=['pred_cohesion', 'pred_syntax', 'pred_vocabulary', 'pred_phraseology', 'pred_grammar', 'pred_conventions'])
actuals_df = pd.DataFrame(test_actuals, columns=['true_cohesion', 'true_syntax', 'true_vocabulary', 'true_phraseology', 'true_grammar', 'true_conventions'])
results_df = pd.concat([actuals_df, predictions_df], axis=1)
print(results_df.head())

print(f"Test Loss = {test_loss / len(test_loader)}; Test Accuracy = {sum(test_accuracy)/len(test_accuracy)}")


Training Epoch 1: 100%|██████████| 157/157 [00:29<00:00,  5.41it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.56it/s]


Epoch 1: Train Loss = 0.7176156873535958; Train Accuracy = 0.2597753090522946; Val Accuracy = 0.2614583410322666; Validation Loss = 0.02430631034076214


Training Epoch 2: 100%|██████████| 157/157 [00:28<00:00,  5.54it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.67it/s]


Epoch 2: Train Loss = 0.43954026006209623; Train Accuracy = 0.29129070254742717; Val Accuracy = 0.332812511920929; Validation Loss = 0.02292693965137005


Training Epoch 3: 100%|██████████| 157/157 [00:28<00:00,  5.54it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.66it/s]


Epoch 3: Train Loss = 0.36073068315815776; Train Accuracy = 0.32455326607272883; Val Accuracy = 0.33385417722165583; Validation Loss = 0.013147684745490551


Training Epoch 4: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.56it/s]


Epoch 4: Train Loss = 0.31040630276036113; Train Accuracy = 0.3481511066483844; Val Accuracy = 0.3489583443850279; Validation Loss = 0.011730359867215157


Training Epoch 5: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.52it/s]


Epoch 5: Train Loss = 0.2892473366609804; Train Accuracy = 0.35858989027655047; Val Accuracy = 0.34895834475755694; Validation Loss = 0.009571696631610394


Training Epoch 6: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.57it/s]


Epoch 6: Train Loss = 0.25687816786538265; Train Accuracy = 0.3750663578130637; Val Accuracy = 0.3601562589406967; Validation Loss = 0.008108175359666348


Training Epoch 7: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.53it/s]


Epoch 7: Train Loss = 0.2430078374921896; Train Accuracy = 0.3885571586478288; Val Accuracy = 0.3559895925223827; Validation Loss = 0.007200957741588354


Training Epoch 8: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.61it/s]


Epoch 8: Train Loss = 0.22558745893703144; Train Accuracy = 0.3953689067227066; Val Accuracy = 0.3518229257315397; Validation Loss = 0.010434132069349289


Training Epoch 9: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.59it/s]


Epoch 9: Train Loss = 0.21378897586066253; Train Accuracy = 0.405099974696044; Val Accuracy = 0.3578125096857548; Validation Loss = 0.007621882949024439


Training Epoch 10: 100%|██████████| 157/157 [00:28<00:00,  5.51it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.46it/s]


Epoch 10: Train Loss = 0.19162547303612826; Train Accuracy = 0.4232572643620193; Val Accuracy = 0.3442708425223827; Validation Loss = 0.0080813467502594


Training Epoch 11: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.58it/s]


Epoch 11: Train Loss = 0.18295323170104605; Train Accuracy = 0.43652690349111134; Val Accuracy = 0.3466145932674408; Validation Loss = 0.005754619371145964


Training Epoch 12: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.61it/s]


Epoch 12: Train Loss = 0.17153596488913153; Train Accuracy = 0.4497965439489693; Val Accuracy = 0.3440104268491268; Validation Loss = 0.006469431798905134


Training Epoch 13: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.55it/s]


Epoch 13: Train Loss = 0.16475045994208876; Train Accuracy = 0.46001416691549263; Val Accuracy = 0.3375000096857548; Validation Loss = 0.009290851652622223


Training Epoch 14: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.60it/s]


Epoch 14: Train Loss = 0.1555645954646882; Train Accuracy = 0.4700990917196699; Val Accuracy = 0.33359376043081285; Validation Loss = 0.00787627324461937


Training Epoch 15: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.62it/s]


Epoch 15: Train Loss = 0.1449854395287052; Train Accuracy = 0.4816436782764022; Val Accuracy = 0.3359375085681677; Validation Loss = 0.009493927471339703


Training Epoch 16: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.55it/s]


Epoch 16: Train Loss = 0.14071224629878998; Train Accuracy = 0.48847754366078955; Val Accuracy = 0.3341145932674408; Validation Loss = 0.009066085331141949


Training Epoch 17: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.56it/s]


Epoch 17: Train Loss = 0.1337209478685051; Train Accuracy = 0.5018798801929328; Val Accuracy = 0.3276041753590107; Validation Loss = 0.007139548659324646


Training Epoch 18: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.61it/s]


Epoch 18: Train Loss = 0.13031743447871724; Train Accuracy = 0.5084704677010797; Val Accuracy = 0.3286458421498537; Validation Loss = 0.00781539548188448


Training Epoch 19: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.62it/s]


Epoch 19: Train Loss = 0.12183431422065018; Train Accuracy = 0.5271806601126483; Val Accuracy = 0.32265626154839994; Validation Loss = 0.00824339035898447


Training Epoch 20: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.47it/s]


Epoch 20: Train Loss = 0.11472433833939255; Train Accuracy = 0.5388137130600632; Val Accuracy = 0.3138020928949118; Validation Loss = 0.00998920388519764


Training Epoch 21: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.60it/s]


Epoch 21: Train Loss = 0.11015945459437218; Train Accuracy = 0.5478149506316823; Val Accuracy = 0.30963542610406875; Validation Loss = 0.008740204386413097


Training Epoch 22: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.56it/s]


Epoch 22: Train Loss = 0.10349289546157145; Train Accuracy = 0.5612836345365853; Val Accuracy = 0.3174479268491268; Validation Loss = 0.006800277624279261


Training Epoch 23: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.53it/s]


Epoch 23: Train Loss = 0.09804348272692626; Train Accuracy = 0.5730051510273271; Val Accuracy = 0.3177083421498537; Validation Loss = 0.007335992064327002


Training Epoch 24: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.56it/s]


Epoch 24: Train Loss = 0.09308763806986961; Train Accuracy = 0.5905874231059081; Val Accuracy = 0.31145834252238275; Validation Loss = 0.007915565744042397


Training Epoch 25: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.55it/s]


Epoch 25: Train Loss = 0.08608715532786528; Train Accuracy = 0.607461980953338; Val Accuracy = 0.31536459289491175; Validation Loss = 0.00641854340210557


Training Epoch 26: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.59it/s]


Epoch 26: Train Loss = 0.08224100629045705; Train Accuracy = 0.6215941461787862; Val Accuracy = 0.30885417722165587; Validation Loss = 0.007298699114471674


Training Epoch 27: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.57it/s]


Epoch 27: Train Loss = 0.07797990006151473; Train Accuracy = 0.6334925882375924; Val Accuracy = 0.3101562596857548; Validation Loss = 0.006855201907455921


Training Epoch 28: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.60it/s]


Epoch 28: Train Loss = 0.07380304404884387; Train Accuracy = 0.6482661180435472; Val Accuracy = 0.3091145928949118; Validation Loss = 0.006631528027355671


Training Epoch 29: 100%|██████████| 157/157 [00:28<00:00,  5.52it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.64it/s]


Epoch 29: Train Loss = 0.07054018691941431; Train Accuracy = 0.6597664735879109; Val Accuracy = 0.3153645921498537; Validation Loss = 0.006642424967139959


Training Epoch 30: 100%|██████████| 157/157 [00:28<00:00,  5.53it/s]
Validating: 100%|██████████| 40/40 [00:02<00:00, 17.60it/s]


Epoch 30: Train Loss = 0.06872155208876178; Train Accuracy = 0.6594789635603595; Val Accuracy = 0.3127604264765978; Validation Loss = 0.006515701301395893


Testing: 100%|██████████| 49/49 [00:02<00:00, 17.23it/s]

   true_cohesion  true_syntax  true_vocabulary  true_phraseology  \
0            3.0          2.5              2.5               2.0   
1            3.0          2.0              3.0               3.5   
2            4.0          4.0              3.0               4.0   
3            3.0          3.0              3.5               3.0   
4            3.5          3.5              3.5               3.5   

   true_grammar  true_conventions  pred_cohesion  pred_syntax  \
0           2.0               2.0       2.583694     3.179252   
1           3.0               3.0       2.578758     2.604294   
2           4.0               4.0       3.813802     2.885073   
3           3.5               3.5       2.660313     2.904942   
4           3.0               3.5       3.603974     3.403027   

   pred_vocabulary  pred_phraseology  pred_grammar  pred_conventions  
0         3.240652          3.256781      3.038039          2.968935  
1         2.637927          2.458733      2.888671        




In [9]:
test_ids = test_df['text_id'].values  # Extract the IDs for later use

# Convert the test data into the format expected by the model
test_encodings = tokenize_data(test_df, tokenizer)
test_input_ids = test_encodings['input_ids']
test_attention_mask = test_encodings['attention_mask']
test_labels = torch.tensor(test_df[['cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']].values, dtype=torch.float)

# Model evaluation setup
model.eval()
test_predictions = []
test_losses = []
test_accuracies = []
with torch.no_grad():
    for idx in range(len(test_df)):
        # Extract inputs for the current test instance
        input_ids = test_input_ids[idx].unsqueeze(0).to(device)  # Add batch dimension
        attention_mask = test_attention_mask[idx].unsqueeze(0).to(device)  # Add batch dimension
        labels = test_labels[idx].unsqueeze(0).to(device)  # Add batch dimension for labels
        outputs = model(input_ids, attention_mask=attention_mask)

        # Calculate loss and accuracy
        loss = torch.nn.MSELoss()(outputs.logits, labels)
        acc = custom_accuracy(labels, outputs.logits)

        # Store predictions, loss, and accuracy
        test_predictions.append(outputs.logits.squeeze(0).cpu().numpy())  # Remove batch dimension and move to CPU
        test_losses.append(loss.item())
        test_accuracies.append(acc)

# Calculate average loss and accuracy
average_test_loss = sum(test_losses) / len(test_losses)
average_test_accuracy = sum(test_accuracies) / len(test_accuracies)

# Create DataFrame to compare predictions with IDs
predictions_df = pd.DataFrame(test_predictions, columns=['pred_cohesion', 'pred_syntax', 'pred_vocabulary', 'pred_phraseology', 'pred_grammar', 'pred_conventions'])
ids_df = pd.DataFrame(test_ids, columns=['text_id'])
results_df = pd.concat([ids_df, predictions_df], axis=1)
print(results_df.head())
print(f"Average Test Loss: {average_test_loss}")
print(f"Average Test Accuracy: {average_test_accuracy}")

        text_id  pred_cohesion  pred_syntax  pred_vocabulary  \
0  772D27D400BB       2.583693     3.179252         3.240652   
1  9E8F3C6405CA       2.578758     2.604295         2.637927   
2  948771F795EB       3.813803     2.885074         3.293267   
3  FE14D7378CFB       2.660312     2.904941         3.240855   
4  7AAE019F70D6       3.603974     3.403028         3.447347   

   pred_phraseology  pred_grammar  pred_conventions  
0          3.256783      3.038040          2.968935  
1          2.458734      2.888670          3.390501  
2          3.299035      2.712234          2.830544  
3          2.910870      2.729234          2.319380  
4          3.079239      3.113676          3.108289  
Average Test Loss: 0.38734501256877474
Average Test Accuracy: 0.31587910366697786


In [10]:
merged_df = pd.merge(test_df, results_df, on='text_id', how='left')
merged_df.to_csv('distilBERT_test_predictions.csv', index=False)

In [11]:
merged_df.head()

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions
0,772D27D400BB,It god to have a possitive attitude when you d...,3.0,2.5,2.5,2.0,2.0,2.0,2.583693,3.179252,3.240652,3.256783,3.03804,2.968935
1,9E8F3C6405CA,Why do people ask more then one person for adv...,3.0,2.0,3.0,3.5,3.0,3.0,2.578758,2.604295,2.637927,2.458734,2.88867,3.390501
2,948771F795EB,"We accomplish more when we are active, and are...",4.0,4.0,3.0,4.0,4.0,4.0,3.813803,2.885074,3.293267,3.299035,2.712234,2.830544
3,FE14D7378CFB,Do you agree or disagree about imagination bei...,3.0,3.0,3.5,3.0,3.5,3.5,2.660312,2.904941,3.240855,2.91087,2.729234,2.31938
4,7AAE019F70D6,I disagree with the principal saying that all ...,3.5,3.5,3.5,3.5,3.0,3.5,3.603974,3.403028,3.447347,3.079239,3.113676,3.108289


In [12]:
results_df.describe()

Unnamed: 0,pred_cohesion,pred_syntax,pred_vocabulary,pred_phraseology,pred_grammar,pred_conventions
count,783.0,783.0,783.0,783.0,783.0,783.0
mean,3.159366,3.078925,3.282201,3.11162,3.027177,3.077715
std,0.568206,0.518351,0.492014,0.530133,0.591721,0.571564
min,1.371086,1.692512,1.906839,1.56969,1.5257,1.369442
25%,2.776169,2.711581,2.916236,2.731097,2.569701,2.697888
50%,3.163273,3.096978,3.296294,3.121753,3.009525,3.091959
75%,3.553957,3.442828,3.616905,3.491226,3.470718,3.466598
max,4.670754,4.485359,5.219347,4.58018,4.569763,4.696817


In [13]:
test_df.describe()

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions
count,783.0,783.0,783.0,783.0,783.0,783.0
mean,3.126437,3.015964,3.212644,3.109195,3.038953,3.077267
std,0.651069,0.651626,0.574217,0.658812,0.696687,0.68045
min,1.0,1.0,1.0,1.0,1.0,1.0
25%,2.5,2.5,3.0,2.5,2.5,2.5
50%,3.0,3.0,3.0,3.0,3.0,3.0
75%,3.5,3.5,3.5,3.5,3.5,3.5
max,5.0,4.5,5.0,5.0,5.0,5.0
