# Classification Objective
***
Our objective in this project is to predict the nationality associated with a given last name. 

The original dataset contains 10000 surnames from 18 different nationalities. This dataset is imbalanced, as there is larger representation from certain nationalities than that of others. 

Here are some imports needed:

In [1]:
from argparse import Namespace
from collections import Counter
import json
import os
import string

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import logging
logging.basicConfig(format='%(levelname)s : %(asctime)s : %(message)s', level=logging.INFO)

# MLP Model
***

In [2]:
class Two_Layer_MLP(nn.Module):
    def __init__(self, num_features, hidden_dim, output_dim, dropout_prob=0.5):
        """
        Args:
            num_features (int) - size of input vector
            hidden_dim (int) - the size after first Linear Layer
            output_dim (int) - size after second Linear Layer
        """
        super(Two_Layer_MLP, self).__init__()
        self.dropout_prob = dropout_prob
        self.fc1 = nn.Linear(num_features, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        print("Initialized a two-layer MLP")
        print("Number of features: {}".format(num_features))
        print("Number of hidden units: {}".format(hidden_dim))
        print("Number of output classes: {}".format(output_dim))
        print("Dropout Probability: {}".format(self.dropout_prob))
    
    def forward(self, x_in, apply_softmax=False):
        """
        compute forward pass
        
        Args:
            x_in (torch.Tensor) - input data tensor. x_in.shape is (batch, num_features)
            apply_softmax (bool) - a flag for the softmax activation. 
                                    should be False if used with cross-entropy loss
        Returns:
            resulting tensor. tensor.shape is (batch, output_dim)
        """
        intermediate = F.relu(self.fc1(x_in)) # output of first hidden layer
        output = self.fc2(F.dropout(intermediate,p=self.dropout_prob))       # final output vector
        
        if apply_softmax:
            output = F.softmax(output, dim=1)
        return output

Here is an example of how to instantiate an MLP

In [3]:
batch_size = 2   # 2 rows at once
num_features  = 3   # 3 original features
hidden_dim = 100 # 100 nodes in first hidden layer
output_dim = 4   # output 4-d vector

In [4]:
#initialize the Model
mlp = Two_Layer_MLP(num_features, hidden_dim, output_dim)
print(mlp)

Initialized a two-layer MLP
Number of features: 3
Number of hidden units: 100
Number of output classes: 4
Dropout Probability: 0.5
Two_Layer_MLP(
  (fc1): Linear(in_features=3, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)


Now we can test the integrity to make sure we got the dimensions correctly by passing some random inputs. 

In [5]:
def describe(tensor):
    """
    Given a PyTorch tensor, print some of its properties

    Args:
        x (torch.Tensor): input tensor to describe
    """
    print("Type: {}".format(tensor.type()))
    print("Shape/size: {}".format(tensor.shape))
    print("Values: \n{}".format(tensor))

x_input = torch.rand(size=(batch_size, num_features))
print("Input data:")
describe(x_input)
print()
y_output = mlp.forward(x_input, apply_softmax=False) # you may omit the 'forward' here
print("Output vectors:")
describe(y_output)

Input data:
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values: 
tensor([[0.4050, 0.9059, 0.8895],
        [0.9912, 0.9022, 0.4816]])

Output vectors:
Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[-0.1352, -0.0792,  0.0693,  0.1378],
        [ 0.2605,  0.1162,  0.1707,  0.2010]], grad_fn=<AddmmBackward>)


If we want, we can convert each of the output vectors (each row) into a vector of probabilities by enabling the softmax activation function:

In [6]:
y_output_softmax = mlp(x_input, apply_softmax=True)
print("Output vectors after softmax:")
describe(y_output_softmax)

Output vectors after softmax:
Type: torch.FloatTensor
Shape/size: torch.Size([2, 4])
Values: 
tensor([[0.2457, 0.2170, 0.3195, 0.2177],
        [0.2296, 0.2562, 0.3001, 0.2141]], grad_fn=<SoftmaxBackward>)


We can check to make sure that each of these 2 rows sums to one:

In [7]:
y_output_softmax.detach().numpy().sum(axis=1)

array([1.0000001, 1.0000001], dtype=float32)

# Data Vectorization Classes
***

In [8]:
class Vocabulary(object):
    """
    Vocabulary object manages the dictionary of tokens to indexes and the dictionary of indexes to tokens
    """
    def __init__(self, token_to_idx=None, add_unk=True, unk_token="<UNK>"):
        """
        Args:
            token_to_idx (dict): a pre-existing map of tokens to indices
            add_unk (bool): a flag indicating whether to add UNK token
            unk_token (str): the UNK tokne to add into the Vocabulary
        """
        # if the token->idx dictionary does not exist
        if token_to_idx is None:
            # create it
            token_to_idx = {}
        
        self._token_to_idx = token_to_idx
        self._idx_to_token = {idx: token for token, idx in self._token_to_idx.items()}
        # whether to add a token for unknown words
        self._add_unk = add_unk
        # the token to use for unknown words
        self._unk_token = unk_token
        
        # if adding token for unknown words
        if add_unk:
            # index to use for unknown words
            self.unk_index = self.add_token(unk_token)
        else:
            self.unk_index = -1
    def to_serializable(self):
        """
        this method returns a dictionary containing the token to index dictionary, 
        whether or not we added an UNK token, and the token used
        """
        return {"token_to_idx": self._token_to_idx,
               "add_unk": self._add_unk,
               "unk_token": self._unk_token}
    @classmethod
    def from_serializable(cls, contents):
        """
        instantiate a Vocabulary instance from a dictionary
        """
        return cls(**contents)
    def add_token(self, token):
        """
        Update the two dictionaries, adding the newest token
        
        Args:
            token (str): token to add to dictionary
        Returns:
            index (int): integer corresponding to token
        """
        try:
            index = self._token_to_idx[token]
        except KeyError:
            # get length of current list to be the index of new one 
            index = len(self._token_to_idx)
            self._token_to_idx[token] = index
            self._idx_to_token[index] = token
        return index
    def add_many(self, tokens):
        """
        Args:
            tokens (list): a list of string tokens
        Returns:
            indices (list): a list of indices corresponding to the tokens
        """
        return [self.add_token(token) for token in tokens]
    def get_index(self, token):
        """
        Get index from token.
        If unk_index >= 0 then it has been added into vocabulary to enabled the UNK functionality
        
        Args:
            token (str): the token whose index to look up
        Returns:
            index (int): the index corresponding to token
        """
        if self.unk_index >= 0:
            # None safe getitem; return self.unk_index if not found
            return self._token_to_idx.get(token, self.unk_index)
        else:
            return self._token_to_idx[token]
    def get_token(self, index):
        """
        Get token associated to index.
        
        Args:
            index (int): index to look up
        Returns:
            token (str): the token corresponding to index
        """
        if index not in self._idx_to_token:
            raise KeyError("Index {0:d} not in Vocabulary".format(index))
        return self._idx_to_token[index]
    def __str__(self):
        return "<Vocabulary(size={0:d})>".format(len(self))
    def __len__(self):
        return len(self._token_to_idx)

In [9]:
class SurnameVectorizer(object):
    def __init__(self, surname_vocab, nationality_vocab):
        self.surname_vocab = surname_vocab
        self.nationality_vocab = nationality_vocab
    def vectorize(self, surname):
        """
        Args:
            surname (str): the surname to vectorize
        Returns:
            one_hot (np.ndarray): a collapsed one-hot encoding
        """
        # initialize empty one-hot representation
        one_hot_vec = np.zeros(shape=(len(self.surname_vocab),), dtype=np.float32)
        for token in surname:
            one_hot_vec[self.surname_vocab.get_index(token)] = 1
        return one_hot_vec
    @classmethod
    def from_dataframe(cls, surname_df):
        """
        create an instance of vectorizer from a dataframe of surnames.
        create the constituent vocabularies and fill them from scratch
        """
        # initialize empty vocab using @ as unk token
        surname_vocab = Vocabulary(unk_token="@")
        nationality_vocab = Vocabulary(add_unk=False)
        for index, row in surname_df.iterrows():
            for letter in row.surname:
                surname_vocab.add_token(letter)
            nationality_vocab.add_token(row.nationality)
        return cls(surname_vocab, nationality_vocab)
    @classmethod
    def from_serializable(cls, contents):
        """
        Initialize a surname vectorizer from a dictionary containing vocabularies
        """
        surname_vocab = Vocabulary.from_serializable(contents["surname_vocab"])
        nationality_vocab = Vocabulary.from_serializable(contents["nationality_vocab"])
        return cls(surname_vocab=surname_vocab, nationality_vocab=nationality_vocab)
    def to_serializable(self):
        """
        create a dictionary containing the surname and nationality dictionaries
        """
        return {"surname_vocab": self.surname_vocab.to_serializable(),
               "nationality_vocab": self.nationality_vocab.to_serializable()}

In [10]:
class SurnameDataset(Dataset):
    def __init__(self, surname_df, vectorizer):
        self.surname_df = surname_df
        self._vectorizer = vectorizer
        
        self.train_df = self.surname_df[self.surname_df["split"] == "train"]
        self.train_size= len(self.train_df)
        
        self.val_df = self.surname_df[self.surname_df["split"] == "val"]
        self.validation_size = len(self.val_df)
        
        self.test_df = self.surname_df[self.surname_df["split"] == "test"]
        self.test_size = len(self.test_df)
        
        self._data_dictionary = {"train": (self.train_df, self.train_size),
                                "val": (self.val_df, self.validation_size),
                                "test": (self.test_df, self.test_size)}
        self.set_split("train")
        print("Initialized dataset split to train")
        
        class_counts = surname_df["nationality"].value_counts().to_dict()
        # sort the counts by its index in nationality dictionary
        sorted_counts = sorted(class_counts.items(), 
            key=lambda item: self._vectorizer.nationality_vocab.get_index(item[0]))
        frequencies = [count for _, count in sorted_counts]
        self.class_weights = 1.0 / torch.tensor(frequencies, dtype=torch.float32)
    @classmethod
    def load_dataset_and_make_vectorizer(cls, surname_csv):
        """
        Load a dataset from file and create a vectorizer from the training portion of it 
        
        Args:
            surname_csv (str): location of dataset
        Returns:
            an instance of SurnameDataset
        """
        surname_df = pd.read_csv(surname_csv)
        train_surname_df = surname_df[surname_df["split"] == "train"]
        return cls(surname_df, SurnameVectorizer.from_dataframe(train_surname_df))
    @classmethod
    def load_dataset_and_load_vectorizer(cls, surname_csv, vectorizer_filepath):
        surname_df = pd.read_csv(surname_csv)
        vectorizer = cls.load_vectorizer_only(vectorizer_filepath)
        return cls(surname_df, vectorizer)
    @staticmethod
    def load_vectorizer_only(vectorizer_filepath):
        """
        load only vectorizer from file
        
        Args:
            vectorizer_filepath (str): location of serialized vectorizer
        Returns:
            instance of SurnameVectorizer
        """
        with open(vectorizer_filepath) as fileHandle:
            return SurnameVectorizer.from_serializable( json.load(fileHandle) )
    def save_vectorizer(self, vectorizer_filepath):
        with open(vectorizer_filepath, "w") as outFile:
            json.dump(self._vectorizer.to_serializable(), outFile)
    def get_vectorizer(self):
        return self._vectorizer
    def set_split(self, split="train"):
        self._current_split = split
        self._current_df, self._current_size = self._data_dictionary[split]
    def __len__(self):
        return self._current_size
    def __getitem__(self, index):
        """
        entryoint for PyTorch Dataset
        """
        row = self._current_df.iloc[index]
        surname_vector = self._vectorizer.vectorize(row.surname)
        nationality_index = self._vectorizer.nationality_vocab.get_index(row.nationality)
        return {"x_surname": surname_vector, 
               "y_nationality": nationality_index}
    def get_num_batches(self, batch_size):
        """
        using currently active split, compute how many batches are in this set
        """
        return len(self) // batch_size

In [11]:
def generate_batches( dataset, batch_size, shuffle=True, drop_last=True, device="cpu" ):
    """
    a wrapper for PyTorch DataLoader that exposes a generator for lazy iteration.
    before consuming, ensures tensor is located on correct device
    """
    dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
    for data_dict in dataloader:
        out_data_dict = {}
        for name, tensor in data_dict.items():
            out_data_dict[name] = data_dict[name].to(device)
        yield out_data_dict

# Setting Up Parameters
***
Here are some helper functions for training.

In [12]:
def make_train_state(args):
    """
    yields a dictionary created from values of args
    """
    return {'stop_early': False,
            'early_stopping_step': 0,
            'early_stopping_best_val': 1e8,
            'learning_rate': args.learning_rate,
            'epoch_index': 0,
            'train_loss': [],
            'train_acc': [],
            'val_loss': [],
            'val_acc': [],
            'test_loss': -1,
            'test_acc': -1,
            'model_filename': args.model_state_file}
def update_train_state(args, model, train_state):
    """
    Update the training state.
    Implements early stopping to prevent overfitting
    Implements Model Checkpoitns to only save model if it is better
    """
    # Save one model at least
    if train_state['epoch_index'] == 0:
        torch.save(model.state_dict(), train_state['model_filename'])
        train_state['stop_early'] = False

    # Save model if performance improved
    elif train_state['epoch_index'] >= 1:
        loss_tm1, loss_t = train_state['val_loss'][-2:]

        # If loss worsened
        if loss_t >= train_state['early_stopping_best_val']:
            # Update step
            train_state['early_stopping_step'] += 1
        # Loss decreased
        else:
            # Save the best model
            if loss_t < train_state['early_stopping_best_val']:
                torch.save(model.state_dict(), train_state['model_filename'])

            # Reset early stopping step
            train_state['early_stopping_step'] = 0

        # Stop early ?
        train_state['stop_early'] = train_state['early_stopping_step'] >= args.early_stopping_criteria

    return train_state
def compute_accuracy(predictions, true_values):
    # on output vectors, pick column with highest prob
    _, prediction_indices = predictions.max(dim=1)
    n_correct = torch.eq(prediction_indices, true_values).sum().item()
    return n_correct / len(prediction_indices) * 100
def set_seed_everywhere( seed, cuda ):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed_all(seed)
def handle_dirs(dirpath):
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)

We define some args that we want to use 

In [13]:
args = Namespace(
    data_file = "data/surnames_with_splits.csv",
    vectorizer_file = "surname_vectorizer.json",
    model_state_file = "model.pth",
    save_dir = "data",
    hidden_dim=300,
    seed=2019,
    num_epochs=100,
    early_stopping_criteria=5,
    learning_rate=0.001,
    batch_size=64,
    cuda=False,
    reload_from_files=False
)

In [14]:
# prepend save directory to filenames
args.vectorizer_file = os.path.join(args.save_dir, args.vectorizer_file)
args.model_state_file = os.path.join(args.save_dir, args.model_state_file)
print("Vectorizer file full path:\n{}".format(args.vectorizer_file))
print("Model state file full path:\n{}".format(args.model_state_file))

# check to see if we can use CUDA
if not torch.cuda.is_available():
    args.cuda = False

args.device = torch.device("cuda" if args.cuda else "cpu")
print("Using CUDA: {}".format(args.cuda))
# set seed everywhere for reproducibility
set_seed_everywhere(args.seed, args.cuda)

Vectorizer file full path:
data/surname_vectorizer.json
Model state file full path:
data/model.pth
Using CUDA: False


In [15]:
if args.reload_from_files:
    print("Reloading surname dataset and vectorizer from file")
    dataset = SurnameDataset.load_dataset_and_load_vectorizer(args.data_file, args.vectorizer_file)
else:
    print("Loading dataset and creating a new vectorizer")
    dataset = SurnameDataset.load_dataset_and_make_vectorizer(args.data_file)
    dataset.save_vectorizer(args.vectorizer_file)

vectorizer = dataset.get_vectorizer()
classifier = Two_Layer_MLP(num_features=len(vectorizer.surname_vocab),
                          hidden_dim=args.hidden_dim,
                          output_dim=len(vectorizer.nationality_vocab))

Loading dataset and creating a new vectorizer
Initialized dataset split to train
Initialized a two-layer MLP
Number of features: 77
Number of hidden units: 300
Number of output classes: 18
Dropout Probability: 0.5


# Training Loop
***

In [16]:
print("Moving classifier to {}".format(args.device))
classifier = classifier.to(args.device)
print("Moving class weights of dataset object to {}".format(args.device))
dataset.class_weights = dataset.class_weights.to(args.device)

Moving classifier to cpu
Moving class weights of dataset object to cpu


In [17]:
%%time

loss_func = nn.CrossEntropyLoss(dataset.class_weights)
optimizer = optim.Adam(classifier.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=1)

train_state = make_train_state(args)

try:
    for epoch_index in range(args.num_epochs):
        train_state["epoch_index"] = epoch_index
        dataset.set_split("train")
        batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device)
        running_loss = 0.0
        running_acc = 0.0
        classifier.train()
        for batch_index, batch_dict in enumerate(batch_generator):
            # step 1. zero the gradients
            optimizer.zero_grad()
            # step 2. compute output
            predictions = classifier.forward(batch_dict["x_surname"])
            # step 3. compute loss
            loss = loss_func(predictions, batch_dict["y_nationality"])
            loss_t = loss.item()
            running_loss += (loss_t - running_loss) / (batch_index + 1)
            # step 4. compute gradients given loss
            loss.backward()
            # step 5. use optimizer to take gradient step
            optimizer.step()
            
            acc_t = compute_accuracy(predictions, batch_dict["y_nationality"])
            running_acc += (acc_t - running_acc) / (batch_index + 1)
            
        train_state["train_loss"].append(running_loss)
        train_state["train_acc"].append(running_acc)
        
        # now compute on validation set
        dataset.set_split("val")
        batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device)
        running_loss = 0.0
        running_acc = 0.0
        classifier.eval()
        
        for batch_index, batch_dict in enumerate(batch_generator):
            predictions = classifier.forward(batch_dict["x_surname"])
            loss = loss_func(predictions, batch_dict["y_nationality"])
            loss_t = loss.to("cpu").item()
            running_loss += (loss_t - running_loss) / (batch_index + 1)
            
            acc_t = compute_accuracy(predictions, batch_dict["y_nationality"])
            running_acc += (acc_t - running_acc) / (batch_index + 1)
        
        train_state["val_loss"].append(running_loss)
        train_state["val_acc"].append(running_acc)
        
        train_state = update_train_state(args=args, model=classifier, train_state=train_state)
        scheduler.step(train_state["val_loss"][-1])
        
        logging.info("Epoch {0:d} - Validation Accuracy {1:.2f}"\
                     .format(epoch_index+1, train_state['val_acc'][-1]))
        if train_state["stop_early"]:
            break
except KeyboardInterrupt:
    print("Exiting Loop")

INFO : 2019-08-22 13:31:47,854 : Epoch 1 - Validation Accuracy 38.88
INFO : 2019-08-22 13:31:51,042 : Epoch 2 - Validation Accuracy 38.56
INFO : 2019-08-22 13:31:53,791 : Epoch 3 - Validation Accuracy 39.06
INFO : 2019-08-22 13:31:56,524 : Epoch 4 - Validation Accuracy 38.44
INFO : 2019-08-22 13:31:59,266 : Epoch 5 - Validation Accuracy 40.62
INFO : 2019-08-22 13:32:01,988 : Epoch 6 - Validation Accuracy 39.00
INFO : 2019-08-22 13:32:04,588 : Epoch 7 - Validation Accuracy 39.56
INFO : 2019-08-22 13:32:07,262 : Epoch 8 - Validation Accuracy 35.94
INFO : 2019-08-22 13:32:10,011 : Epoch 9 - Validation Accuracy 38.94
INFO : 2019-08-22 13:32:12,726 : Epoch 10 - Validation Accuracy 39.69
INFO : 2019-08-22 13:32:15,442 : Epoch 11 - Validation Accuracy 38.69
INFO : 2019-08-22 13:32:18,092 : Epoch 12 - Validation Accuracy 38.62
INFO : 2019-08-22 13:32:20,756 : Epoch 13 - Validation Accuracy 40.81
INFO : 2019-08-22 13:32:23,487 : Epoch 14 - Validation Accuracy 40.94
INFO : 2019-08-22 13:32:26,23

CPU times: user 26min 42s, sys: 978 ms, total: 26min 43s
Wall time: 4min 33s


Now compute accuracy on test set

In [18]:
classifier.load_state_dict(torch.load(train_state["model_filename"]))

classifier = classifier.to(args.device)
dataset.class_weights = dataset.class_weights.to(args.device)
loss_func = nn.CrossEntropyLoss(dataset.class_weights)

dataset.set_split("test")
batch_generator = generate_batches(dataset, batch_size=args.batch_size, device=args.device)

running_loss = 0.0
running_acc = 0.0
classifier.eval()

for batch_index, batch_dict in enumerate(batch_generator):
    predictions = classifier.forward(batch_dict["x_surname"])
    loss = loss_func(predictions, batch_dict["y_nationality"])
    loss_t = loss.item()
    running_loss += (loss_t - running_loss) / (batch_index + 1)
    acc_t = compute_accuracy(predictions, batch_dict["y_nationality"])
    running_acc += (acc_t - running_acc) / (batch_index + 1)
    
train_state["test_loss"] = running_loss
train_state["test_acc"] = running_acc

In [19]:
print("Test loss: {}".format(train_state["test_loss"]))
print("Test Accuracy: {}".format(train_state["test_acc"]))

Test loss: 1.933490538597107
Test Accuracy: 39.18750000000001


# Performing Inference on New Examples
***

In [20]:
def predict_nationality(surname, classifier, vectorizer):
    """
    Predict the nationality from a new surname
    
    Args:
        surname (str): the surname to classifier
        classifier (Two_Layer_MLP_Layer_MLP): an instance of classifier
        vectorizer (SurnameVectorizer): an instance of vectorizer
    Returns:
        dictionary with the most likely nationality and its corresponding probability
    """
    vectorized_surname = vectorizer.vectorize(surname)
    vectorized_surname = torch.tensor(vectorized_surname).view(1,-1)
    result = classifier.forward(vectorized_surname, apply_softmax=True)
    
    probability_vals, indices = result.max(dim=1)
    index = indices.item()
    predicted_nationality = vectorizer.nationality_vocab.get_token(index)
    probability_value = probability_vals.item()
    
    return {"nationality": predicted_nationality, "probability": probability_value}

In [21]:
new_surname = input("Enter a surname: ")
classifier = classifier.to("cpu")
prediction = predict_nationality(new_surname, classifier, vectorizer)
print("{} -> {} (p={:.2f})".format(new_surname, prediction["nationality"],prediction["probability"] ))

Enter a surname: Corrado
Corrado -> Portuguese (p=0.49)


In [22]:
def predict_topn_nationalities(new_surname, classifier, vectorizer, topn=5):
    vectorized_surname = vectorizer.vectorize(new_surname)
    vectorized_surname = torch.tensor(vectorized_surname).view(1,-1)
    prediction_vector = classifier.forward(vectorized_surname, apply_softmax=True)
    probability_values, indices = torch.topk( prediction_vector, k=topn )
    
    probability_values = probability_values.detach().numpy()[0]
    indices = indices.detach().numpy()[0]
    
    results = []
    for prob_value, index in zip(probability_values, indices):
        nationality = vectorizer.nationality_vocab.get_token(index)
        results.append({"nationality": nationality, "probability": prob_value})
    return results

In [23]:
new_surname = input("Enter a surname: ")
classifier = classifier.to("cpu")
predictions = predict_topn_nationalities(new_surname, classifier, vectorizer)
print("Top predictions:")
print("================")
for prediction in predictions:
    print("{} -> {} (p={:.2f})".format(new_surname, prediction["nationality"],prediction["probability"] ))

Enter a surname: Corrado
Top predictions:
Corrado -> Portuguese (p=0.39)
Corrado -> Italian (p=0.20)
Corrado -> Irish (p=0.10)
Corrado -> English (p=0.08)
Corrado -> French (p=0.07)
