In [1]:
from os.path import join

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from nltk.tokenize import word_tokenize
from fasttext import load_model 
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset

device = 'cuda' if torch.cuda.is_available() else 'cpu'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
np.random.seed(42)

# Prepare data

In [3]:
DATA_DIR = join('..', 'data', 'raw')
MODEL_DIR = join('..', 'models')

In [4]:
fasttext_model = load_model(join(MODEL_DIR, 'pretrained', 'cc.en.300.bin'))

In [5]:
train_full_df = pd.read_csv(join(DATA_DIR, 'train.tsv'), sep='\t')
test_df = pd.read_csv(join(DATA_DIR, 'dev.tsv'), sep='\t')
train_df, dev_df = train_test_split(train_full_df, test_size=0.1, random_state=42)

In [6]:
fasttext_model.get_word_vector('hi').shape

(300,)

In [7]:
fasttext_model.get_sentence_vector("What's your name?").shape

(300,)

In [8]:
train_df = train_df.fillna("")
dev_df = dev_df.fillna("")
test_df = test_df.fillna("")

In [9]:
def get_X_y(df):
    X = df[['question1', 'question2']]
    X['question1'] = X['question1'].str.replace('\n', '', regex=True)
    X['question2'] = X['question2'].str.replace('\n', '', regex=True)

    X['q1_embeds'] = X['question1'].apply(lambda x: fasttext_model.get_sentence_vector(x.lower().strip()))
    X['q2_embeds'] = X['question2'].apply(lambda x: fasttext_model.get_sentence_vector(x.lower().strip()))
    X = X[['q1_embeds', 'q2_embeds']]

    y = df['is_duplicate']
    y = torch.tensor(y.values, dtype=torch.float32)
    
    return X, y

In [10]:
X_train, y_train = get_X_y(train_df)
X_val, y_val = get_X_y(dev_df)
X_test, y_test = get_X_y(test_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_

# FCN with FastText sentence embedding

In [11]:
class CustomDataset(Dataset):

    def __init__(self, question_embeddings, labels):
        self.question_embeddings = question_embeddings
        self.labels = labels

    def __len__(self):
        return len(self.question_embeddings)

    def __getitem__(self, idx):
        q1_embed, q2_embed = self.question_embeddings.iloc[idx]
        x = torch.cat((torch.from_numpy(q1_embed), torch.from_numpy(q2_embed)), 0)
        return x.to(device), self.labels[idx].to(device)


class FCN(nn.Module):

    def __init__(self, input_dim, num_layers, hidden_dim, dropout=0):
        super(FCN, self).__init__()
        
        self.fcs = nn.ModuleList(
            [nn.Linear(input_dim, hidden_dim)]
            + [nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(p=dropout)
              ) 
              for _ in range(num_layers - 2)]
            + [nn.Linear(hidden_dim, 1)]
        )
        
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        for fc in self.fcs[:-1]:
            x = fc(x)
        x = self.fcs[-1](x)
        return self.sigmoid(x)

In [66]:
def train(
    model, criterion, optimizer, train_loader, val_loader,
    num_epochs=10, early_stopping_patience=None, device='cuda'
):
    
    def validate(model):
        model.eval()
        total_loss, sample_count = 0, 0
        with torch.no_grad():
            for (questions, labels) in val_loader:
                outputs = model(questions)
                loss = criterion(outputs.view(-1), labels)
                total_loss += loss.item()
                sample_count += labels.size(0)
        average_loss = total_loss / sample_count
        return average_loss
    
    best_loss = np.inf
    consecutive_no_improvement = 0
    
    model.to(device)
    
    for epoch in range(num_epochs):
        model.train()  
        total_loss, sample_counts = 0, 0
        for (questions, labels) in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            questions, labels = questions.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(questions)
            loss = criterion(outputs.view(-1), labels)
            total_loss += loss.item()
            sample_counts += labels.size(0)
            loss.backward()
            optimizer.step()
            
        val_loss = validate(model)
        print(f"Epoch {epoch+1}: train loss = {(total_loss / sample_counts):.4f}; val loss = {val_loss:.4f}")
        
        if val_loss < best_loss:
            best_loss = val_loss
            consecutive_no_improvement = 0
        else:
            consecutive_no_improvement += 1
            
        if early_stopping_patience and consecutive_no_improvement >= early_stopping_patience:
            print(f"Early stopping after {epoch + 1} epochs with no improvement.")
            break
            
    return model


def evaluate(model, train_loader, val_loader, print_report=False, device='cuda'):
    model.eval()
    model.to(device)
    def get_predictions_and_labels(loader):
        predictions = []
        true_labels = []
        for questions, labels in loader:
            questions, labels = questions.to(device), labels.to(device)
            outputs = model(questions)
            predicted = (outputs > 0.5).float().view(-1)
            predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
        return predictions, true_labels

    if print_report:
        train_predictions, train_labels = get_predictions_and_labels(train_loader)
        print("Train Classification Report:")
        print(classification_report(train_labels, train_predictions, zero_division=1))

        val_predictions, val_labels = get_predictions_and_labels(val_loader)
        print("Validation Classification Report:")
        print(classification_report(val_labels, val_predictions, zero_division=1))
    else: 
        val_predictions, val_labels = get_predictions_and_labels(val_loader)
    return accuracy_score(val_labels, val_predictions)


def train_full(model, criterion, optimizer, full_train_dataloader, num_epochs=10, device='cuda'):
    model.to(device)
    for epoch in range(num_epochs):
        model.train()  
        total_loss, sample_counts = 0, 0
        for (questions, labels) in tqdm(full_train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            questions, labels = questions.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(questions)
            loss = criterion(outputs.view(-1), labels)
            total_loss += loss.item()
            sample_counts += labels.size(0)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}: train loss = {(total_loss / sample_counts):.4f}")
    return model


def test_eval(model, full_train_dataloader, test_loader, device='cuda'):
    model.eval()
    model.to(device)
    def get_predictions_and_labels(loader):
        predictions = []
        true_labels = []
        for questions, labels in loader:
            questions, labels = questions.to(device), labels.to(device)
            outputs = model(questions)
            predicted = (outputs > 0.5).float().view(-1)
            predictions.extend(predicted.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
        return predictions, true_labels

    full_train_predictions, full_train_labels = get_predictions_and_labels(full_train_dataloader)
    print("Train Classification Report:")
    print(classification_report(full_train_labels, full_train_predictions, zero_division=1))

    test_predictions, test_labels = get_predictions_and_labels(test_loader)
    print("Test Classification Report:")
    print(classification_report(test_labels, test_predictions, zero_division=1))
    print(f"Train accuracy = {accuracy_score(full_train_labels, full_train_predictions)}")
    print(f"Test accuracy = {accuracy_score(test_labels, test_predictions)}")

In [23]:
batch_size = 512

train_dataset = CustomDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = CustomDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

test_dataset = CustomDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

full_train_dataset = ConcatDataset([train_dataset, val_dataset])
full_train_dataloader = DataLoader(full_train_dataset, batch_size=batch_size, shuffle=True)

In [16]:
model = FCN(input_dim=600, num_layers=4, hidden_dim=300)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model = train(
    model, criterion, optimizer, train_loader, val_loader, 
    num_epochs=10, early_stopping_patience=2)

Epoch 1/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0011; val loss = 0.0010


Epoch 2/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0010; val loss = 0.0010


Epoch 3/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0010; val loss = 0.0010


Epoch 4/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0009; val loss = 0.0009


Epoch 5/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0009; val loss = 0.0009


Epoch 6/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0009; val loss = 0.0009


Epoch 7/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0008; val loss = 0.0009


Epoch 8/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 8: train loss = 0.0008; val loss = 0.0009


Epoch 9/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 9: train loss = 0.0008; val loss = 0.0009


Epoch 10/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 10: train loss = 0.0008; val loss = 0.0009


In [17]:
evaluate(model, train_loader, val_loader, print_report=True)

Train Classification Report:
              precision    recall  f1-score   support

         0.0       0.83      0.89      0.86    206401
         1.0       0.78      0.70      0.74    121060

    accuracy                           0.82    327461
   macro avg       0.81      0.79      0.80    327461
weighted avg       0.81      0.82      0.81    327461

Validation Classification Report:
              precision    recall  f1-score   support

         0.0       0.81      0.87      0.84     23067
         1.0       0.74      0.66      0.69     13318

    accuracy                           0.79     36385
   macro avg       0.78      0.76      0.77     36385
weighted avg       0.79      0.79      0.79     36385



0.7892538133846365

In [18]:
# hyper-param tuning

hyperparams_set = [
    {'num_layers':2, 'hidden_dim':100, 'dropout':0, 'lr':0.001},
    {'num_layers':2, 'hidden_dim':100, 'dropout':0.5, 'lr':0.001},
    {'num_layers':4, 'hidden_dim':300, 'dropout':0.5, 'lr':0.001},
]

best_val_acc, best_config = 0, None

for config in hyperparams_set:
    print(config)
    
    model = FCN(
        input_dim=600, num_layers=config['num_layers'], 
        hidden_dim=config['hidden_dim'], dropout=config['dropout']
    )
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
    
    model = train(model, criterion, optimizer, train_loader, val_loader, 
                  num_epochs=10, early_stopping_patience=2)    
    
    val_acc = evaluate(model, print_report=False)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_config = config

{'num_layers': 2, 'hidden_dim': 100, 'dropout': 0, 'lr': 0.001}


Epoch 1/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0012; val loss = 0.0011


Epoch 2/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0011; val loss = 0.0011


Epoch 3/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0011; val loss = 0.0011


Epoch 4/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0011; val loss = 0.0011


Epoch 5/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0011; val loss = 0.0011


Epoch 6/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0011; val loss = 0.0011


Epoch 7/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0011; val loss = 0.0012


Epoch 8/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 8: train loss = 0.0011; val loss = 0.0011
Early stopping after 8 epochs with no improvement.
{'num_layers': 2, 'hidden_dim': 100, 'dropout': 0.5, 'lr': 0.001}


Epoch 1/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0012; val loss = 0.0012


Epoch 2/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0011; val loss = 0.0011


Epoch 3/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0011; val loss = 0.0011


Epoch 4/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0011; val loss = 0.0011


Epoch 5/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0011; val loss = 0.0011
Early stopping after 5 epochs with no improvement.
{'num_layers': 4, 'hidden_dim': 300, 'dropout': 0.5, 'lr': 0.001}


Epoch 1/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0011; val loss = 0.0011


Epoch 2/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0010; val loss = 0.0010


Epoch 3/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0010; val loss = 0.0010


Epoch 4/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0010; val loss = 0.0010


Epoch 5/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0009; val loss = 0.0009


Epoch 6/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0009; val loss = 0.0009


Epoch 7/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0009; val loss = 0.0009


Epoch 8/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 8: train loss = 0.0009; val loss = 0.0009


Epoch 9/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 9: train loss = 0.0009; val loss = 0.0009


Epoch 10/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 10: train loss = 0.0009; val loss = 0.0009


In [19]:
print(best_config, best_val_acc)

{'num_layers': 4, 'hidden_dim': 300, 'dropout': 0.5, 'lr': 0.001} 0.7849938161330219


In [16]:
hyperparams_set = [
    {'num_layers':4, 'hidden_dim':300, 'dropout':0.5, 'lr':0.1},
    {'num_layers':4, 'hidden_dim':300, 'dropout':0.5, 'lr':0.01},
    {'num_layers':4, 'hidden_dim':300, 'dropout':0.5, 'lr':0.0001},
]

best_val_acc, best_config = 0, None
for config in hyperparams_set:
    print(config)
    
    model = FCN(
        input_dim=600, num_layers=config['num_layers'], 
        hidden_dim=config['hidden_dim'], dropout=config['dropout']
    )
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
    
    model = train(model, criterion, optimizer, train_loader, val_loader,
                  num_epochs=10, early_stopping_patience=2)    
    
    val_acc = evaluate(model, train_loader, val_loader, print_report=False)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_config = config

{'num_layers': 4, 'hidden_dim': 300, 'dropout': 0.5, 'lr': 0.1}


Epoch 1/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0721; val loss = 0.0726


Epoch 2/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0723; val loss = 0.0723


Epoch 3/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0722; val loss = 0.0722


Epoch 4/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0723; val loss = 0.0724


Epoch 5/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0722; val loss = 0.0720


Epoch 6/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0722; val loss = 0.0724


Epoch 7/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0723; val loss = 0.0722
Early stopping after 7 epochs with no improvement.
{'num_layers': 4, 'hidden_dim': 300, 'dropout': 0.5, 'lr': 0.01}


Epoch 1/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0011; val loss = 0.0011


Epoch 2/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0011; val loss = 0.0011


Epoch 3/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0011; val loss = 0.0011


Epoch 4/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0011; val loss = 0.0011


Epoch 5/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0011; val loss = 0.0011


Epoch 6/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0011; val loss = 0.0011


Epoch 7/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0011; val loss = 0.0011
Early stopping after 7 epochs with no improvement.
{'num_layers': 4, 'hidden_dim': 300, 'dropout': 0.5, 'lr': 0.0001}


Epoch 1/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0012; val loss = 0.0011


Epoch 2/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0011; val loss = 0.0011


Epoch 3/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0011; val loss = 0.0011


Epoch 4/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0011; val loss = 0.0011


Epoch 5/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0011; val loss = 0.0011


Epoch 6/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0011; val loss = 0.0010


Epoch 7/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0010; val loss = 0.0010


Epoch 8/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 8: train loss = 0.0010; val loss = 0.0010


Epoch 9/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 9: train loss = 0.0010; val loss = 0.0010


Epoch 10/10:   0%|          | 0/640 [00:00<?, ?it/s]

Epoch 10: train loss = 0.0010; val loss = 0.0010


In [17]:
best_config, best_val_acc

({'num_layers': 4, 'hidden_dim': 300, 'dropout': 0.5, 'lr': 0.0001},
 0.74767074343823)

In [20]:
# retrain and full train + val set, evaluate on test set

model = FCN(input_dim=600, num_layers=4, hidden_dim=300, dropout=0.5)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model = train_full(model, criterion, optimizer, full_train_dataloader, num_epochs=10)    

Epoch 1/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 1: train loss = 0.0011


Epoch 2/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 2: train loss = 0.0010


Epoch 3/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 3: train loss = 0.0010


Epoch 4/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 4: train loss = 0.0010


Epoch 5/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 5: train loss = 0.0009


Epoch 6/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 6: train loss = 0.0009


Epoch 7/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 7: train loss = 0.0009


Epoch 8/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 8: train loss = 0.0009


Epoch 9/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 9: train loss = 0.0009


Epoch 10/10:   0%|          | 0/711 [00:00<?, ?it/s]

Epoch 10: train loss = 0.0009


In [24]:
test_eval(model, full_train_dataloader, test_loader)

Train Classification Report:
              precision    recall  f1-score   support

         0.0       0.82      0.87      0.84    229468
         1.0       0.75      0.68      0.71    134378

    accuracy                           0.80    363846
   macro avg       0.79      0.77      0.78    363846
weighted avg       0.80      0.80      0.80    363846

Test Classification Report:
              precision    recall  f1-score   support

         0.0       0.81      0.85      0.83     25545
         1.0       0.72      0.66      0.69     14885

    accuracy                           0.78     40430
   macro avg       0.76      0.75      0.76     40430
weighted avg       0.78      0.78      0.78     40430

Train accuracy = 0.797708371124047
Test accuracy = 0.7794212218649518


In [25]:
evaluate(model, train_loader, val_loader, print_report=True)

Train Classification Report:
              precision    recall  f1-score   support

         0.0       0.82      0.86      0.84    206401
         1.0       0.75      0.68      0.71    121060

    accuracy                           0.80    327461
   macro avg       0.79      0.77      0.78    327461
weighted avg       0.79      0.80      0.80    327461

Validation Classification Report:
              precision    recall  f1-score   support

         0.0       0.83      0.87      0.85     23067
         1.0       0.75      0.69      0.72     13318

    accuracy                           0.80     36385
   macro avg       0.79      0.78      0.78     36385
weighted avg       0.80      0.80      0.80     36385



0.8014291603682836