In [4]:
import pandas as pd
import numpy as np
from util.utilities import (
    temperature_scaled_mixing, 
    get_dataset,
    get_clf_data,
    one_hot_encode,
    explode_onehot_df,
    onehot_label_counts,
    split_mix_set,
    duplicates_1,
    duplicates_2,
    word_replace_dict,
    bad_labels,
    contact_labels
)
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss, classification_report

from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch import nn
from transformers import (
    DistilBertModel, 
    PreTrainedTokenizer, 
    DistilBertTokenizerFast
)
import torch
import pprint

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', None)

#### Focusing soley on Team Check-In and ID Documents Data
Removing all intervention data with preprocessing done in cb_exploration

In [5]:
df1 = get_clf_data(duplicate_labels_dict=duplicates_1, labels_to_ignore=bad_labels)
print(df1["DESCRIPTION"].unique())
df1.head(1)

--- LOADING DATA... ---


  df = pd.read_csv(strs[name], **args)


Total initial entries: 108804
Total initial notes: 64385
- Removed irrelevant columns for checkbox classification, kept: Index(['NOTEID', 'NOTE', 'DESCRIPTION', 'GROUPNAME'], dtype='object')
- Dropped empty notes
- Dropped interventions checkboxes
- Consolidated duplicate labels using the provided mappings.
{'Direct Contact': ['Client contact in office', 'Direct Contact through Outreach', 'Client contact', 'Client Contact out of office'], 'Attempted client contact': ['Attempted client contact through Outreach', 'Attempt to locate client'], 'Client Assessment Conducted': ['New Client assessment completed'], 'Coordinated Assessment Completed at DACC': ['Coordinated Assessment Completed through HOST contact']}
- Dropped entries containing the provided labels
Total labels: 17
Total entries after preprocessing: 77274
Total notes after preprocessing: 64062
--- FINISHED LOADING DATA. ---
['No Show' 'Collateral Contact' 'Direct Contact' 'Client Texas State ID'
 'Attempted client contact' 'Trea

Unnamed: 0,NOTEID,NOTE,DESCRIPTION
0,1485559,CL did not show up for appointment to complete CA.,No Show


#### Upsampling Heuristic 
- There is a problem I have to deal with here... How to properly handle upsampling with a multi-label dataset? 
- My solution: For upsampling purposes, weight each data point as if its only label was the one with the lowest number of occurences in the whole dataset
  - Ex. if a note had labels ["Direct Contact", "HMIS Profile Created/Updated"], then it is weighted based on "HMIS..." 

In [6]:
def min_descriptions(df: pd.DataFrame, label_col, min_label_col, duplicate_id_col):
    # For each row, add a column that contains the number of times the checkbox for that row was checked.
    # Ex. Note: ..., Description: No Show, FREQ: 1893 
    df["FREQ"] = df.groupby(label_col)[label_col].transform('count')

    # For each note, the description column now contains the label from the note's labelset
    # that is checked the least amount of times across the dataset.
    min_label = df.sort_values("FREQ").drop_duplicates(duplicate_id_col).set_index(duplicate_id_col)[label_col]

    # Merge back to entire dataset
    df = df.set_index(duplicate_id_col).merge(min_label.rename(min_label_col), left_index=True, right_index=True)

    return df.reset_index(drop=False)

In [7]:
df1 = min_descriptions(df1, "DESCRIPTION", "MIN_DESC", "NOTEID")
oh_df1, classes = one_hot_encode(df=df1, label_col="DESCRIPTION", new_col="LABELS", duplicate_id_col="NOTEID")

oh_df1[oh_df1["MIN_DESC"] == "Asked client to complete a DACC Customer survey"]

Unnamed: 0,NOTEID,NOTE,DESCRIPTION,FREQ,MIN_DESC,LABELS


In [8]:
# onehot_df_freq["MIN_DESC"].value_counts()

# It's possible that a label that occurs few times in the dataset never fits the bill of occuring the *fewest*
# number of times because it always occurs with another checkbox that occurs fewer times in the dataset.

print(len(classes))
print(len(oh_df1["MIN_DESC"].unique()))

17
17


#### The DistilBert Model
The huggingface page for DistilBert links to the following tutorial on multi-label classification which I use as a guide to build my classifier. I am using DistilBert because it preserves over 95% of BERT's performance and runs 60% faster. Due to my hardware limitations, a Distilled model was the best choice. 

I came back to this model after working on entailment because I had a better understanding of the pipeline. So I rewrote the data loader using the documentation, the following tutorial, as well as the DocNLI git repo as a loose guide.

https://colab.research.google.com/github/DhavalTaunk08/Transformers_scripts/blob/master/Transformers_multilabel_distilbert.ipynb

In [9]:
class MultiLabelDataset(Dataset):
    def __init__(self, tokenizer: PreTrainedTokenizer, df, text_col, label_col, max_len=None):
        """
        if max_len is undefined or None, tokenizer uses predefined model max length
        """

        self.tokenizer = tokenizer
        self.df = df
        self.max_len = max_len

        self.train_data = self.load_data(df[text_col], df[label_col])
    
    # Num notes
    def __len__(self):
        return len(self.df.index)

    def load_data(self, text_list, labels_list):
        token_ids = []
        mask_ids = []
        labels = []

        # DistilBert doesn't take token_type_ids as input. 
        # Sequences should just be separated using the special tokens

        # longest_first truncates token by token, from longest sequence in pair
        # or only_first, truncates only from first seq. in pair
        # Doesn't matter here, but matters later for entailment
        for text, label in tqdm(zip(text_list, labels_list)):
            inputs = self.tokenizer(
                text=text,
                add_special_tokens=True,
                padding="max_length",
                truncation="longest_first",
                max_length=self.max_len,
                return_token_type_ids=False, 
                return_attention_mask=True,
                return_tensors="pt"
            )

            ids = inputs["input_ids"]
            mask = inputs["attention_mask"]

            token_ids.append(ids)
            mask_ids.append(mask)
            labels.append(label)
        
        token_ids = torch.cat(token_ids)
        mask_ids = torch.cat(mask_ids)
        labels = torch.FloatTensor(np.array(labels))
        
        dataset = TensorDataset(token_ids, mask_ids, labels)

        # print("Dataset Length: ", len(dataset))
        return dataset

    def get_data_loader(self, batch_size=4, shuffle=True):
        data_loader = DataLoader(
            self.train_data,
            shuffle=shuffle,
            batch_size=batch_size
        )

        return data_loader
            

#### DistilBert For Classification
DistilBert with a classification head. This can be used for my entailment problem as well, just with a different dataloader

In [10]:
default_distilbert_dim = 768
distilbert_dropout = 0.1

class DistilBertForClassification(nn.Module):

    def __init__(self, num_classes):
        super(DistilBertForClassification, self).__init__()
        self.db = DistilBertModel.from_pretrained('distilbert-base-uncased')

        self.dense_layer = nn.Linear(default_distilbert_dim, default_distilbert_dim)
        self.dropout = nn.Dropout(distilbert_dropout)
        self.non_lin = nn.Tanh()
        self.classifier = nn.Linear(default_distilbert_dim, num_classes)
    
    def forward(self, input_ids, input_mask):
        db_output = self.db(input_ids=input_ids, attention_mask=input_mask)

        # DistilBert outputs a tuple where the first element is the hidden states
        # at the output of the model's last layer. It's of size:
        # batch_size, seq_len, hidden_size=768
        hidden_states = db_output[0]

        # Now we want the encoding of the [CLS] token. If you input a sequence 
        # of n words, the output will be a sequence of n tensors. Every sequence
        # begins with the [CLS] so we just need the first tensor. So we "pool"
        # the model by taking the hidden state corresponding to the first tensor.
        cls_tensor = hidden_states[:, 0]

        output = self.dense_layer(cls_tensor)
        output = self.non_lin(output)
        output = self.dropout(output)

        result = self.classifier(output)
        return result


#### Classification Class which handles data loading and training

In [11]:
class DistilBertMLClassifier():

    def __init__(self, text_col, label_col, label_names, train_data, test_data,
                 max_len=128, train_batch_size=4, valid_batch_size=4, 
                 epochs=1, learning_rate=1e-05, tokenizer=None):

        self.max_len = max_len
        self.train_batch_size = train_batch_size
        self.valid_batch_size = valid_batch_size
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.tokenizer = tokenizer
        
        if not tokenizer:
            self.tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased', truncation=True, do_lower_case=True)
                    
        # Creating the dataset and dataloader
        # The data loader chunks the data into smaller batches for the NN since you can't
        # feed all the data at the same time into the network

        print("TRAIN Dataset: {}".format(train_data.shape))
        print("TEST Dataset: {}".format(test_data.shape))

        self.training_set = MultiLabelDataset(tokenizer=self.tokenizer, df=train_data, 
                                         text_col=text_col, label_col=label_col, 
                                         max_len=self.max_len)
        self.testing_set = MultiLabelDataset(tokenizer=self.tokenizer, df=test_data, 
                                        text_col=text_col, label_col=label_col, 
                                        max_len=self.max_len)
        
        self.training_loader = self.training_set.get_data_loader(batch_size=train_batch_size)
        self.testing_loader = self.testing_set.get_data_loader(batch_size=valid_batch_size)

        self.label_names = label_names

        self.model = DistilBertForClassification(len(label_names))
        self.model.to(device)

        # The reason I'm using BCE Loss is because we are doing Multi-Label CLF
        # in which one example can have multiple gold labels. Thus, we want to 
        # assign a sigmoid that maps the encodings to a probability for EACH class,
        # rather than a softmax which produces a probability distribution over the labels.

        self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=learning_rate)
    
    def loss_function(self, outputs, targets):
        return nn.BCEWithLogitsLoss()(outputs, targets)
    
    def train(self):
        model = self.model
        training_loader = self.training_loader
        optimizer = self.optimizer

        model.train()
        for epoch in range(self.epochs):
            
            for batch_idx, (input_ids, input_mask, labels) in tqdm(enumerate(training_loader)):
                
                optimizer.zero_grad()

                input_ids = input_ids.to(device)
                input_mask = input_mask.to(device)
                labels = labels.to(device)

                outputs = model(input_ids, input_mask)
                loss = self.loss_function(outputs, labels.float())

                # Print loss every 5000 examples
                if batch_idx % 5000 == 0:
                    print("Epoch: %d, Loss: %f" % (epoch, loss.item()))
                
                loss.backward()
                optimizer.step()
    
    def test_model(self, test_set, return_predictions=False):
        model = self.model
        testing_loader = test_set

        final_targets = []
        final_outputs = []

        model.eval()
        with torch.no_grad():
            for batch_idx, (input_ids, input_mask, labels) in tqdm(enumerate(testing_loader)):
                
                input_ids = input_ids.to(device)
                input_mask = input_mask.to(device)

                outputs = model(input_ids, input_mask)

                curr_targets = labels.numpy().tolist()
                curr_outputs = torch.sigmoid(outputs).cpu().detach().numpy().tolist()
        
                final_targets.extend(curr_targets)
                final_outputs.extend(curr_outputs)
        
        if return_predictions:
            # A probability above .5 signifies that the label belongs to the note
            final_outputs = np.array(final_outputs) >= 0.5

        return final_outputs, final_targets

    def validation_report(self, outputs, targets):
        hamm_loss = hamming_loss(targets, outputs)

        print("Hamming Loss: %f" % hamm_loss)

        clf_report = classification_report(targets, outputs, target_names=self.label_names, zero_division=0, output_dict=True)
        clf_report = pd.DataFrame(clf_report).transpose()

        display(clf_report)

    def validate(self):
        final_outputs, final_targets = self.test_model(self.testing_loader, return_predictions=True)
        self.validation_report(final_outputs, final_targets)

    def __str__(self):
        return "Hyperparams:\n max_tokens_per_example: %d\n train_batch_size: %d\n valid_batch_size: %d\n epochs: %d\n learning rate: %f\n train_size: %d\n test_size: %d\n" % (
            self.max_len,
            self.train_batch_size,
            self.valid_batch_size,
            self.epochs,
            self.learning_rate,
            len(self.training_set),
            len(self.testing_set)
        )

In [12]:
# Method for qualitative inspection of classifier results
from torch.utils.data import DataLoader

def test_classifier(test_data, label_names, clf: DistilBertMLClassifier):
    notes = test_data
    small_test = MultiLabelDataset(clf.tokenizer, notes, "NOTE", "LABELS", max_len=clf.max_len)

    outputs, targets = clf.test_model(small_test.get_data_loader(), return_predictions=True)

    # Prints Validation Results
    # clf.validation_report(outputs, targets)

    # Print Qualitative Output

    res = []
    act = []
    for i in range(len(outputs)):
        f = outputs[i]
        a = targets[i]

        lst = []
        lst2 = []
        for j in range(len(f)):
            if f[j]: 
                lst.append(label_names[j])
            if a[j]:
                lst2.append(label_names[j])
        res.append(lst)
        act.append(lst2)

    notes["LABELS"] = res
    notes["ACTUALS"] = act
    
    return notes

#### Data Setup

In [13]:
df1["DESCRIPTION"].value_counts()

Direct Contact                                                                  40439
Collateral Contact                                                              24754
Attempted client contact                                                         4947
No Show                                                                          1893
Legal Issues                                                                     1834
Client Texas State ID                                                             769
Client Birth certificate                                                          687
Release of information forms signed for all appropriate vendors and agencies      384
HMIS profile created/updated                                                      363
Client Assessment Conducted                                                       303
Coordinated Assessment Completed at DACC                                          295
Coordinated Assessment Scheduled at DACC              

In [14]:
# Checking the data before proceeding
display(oh_df1.head(1))
dict(sorted(onehot_label_counts(oh_df1, "LABELS", classes).items(), key=lambda x:x[1], reverse=True))

Unnamed: 0,NOTEID,NOTE,DESCRIPTION,FREQ,MIN_DESC,LABELS
0,1485559,CL did not show up for appointment to complete CA.,No Show,1893,No Show,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]"


{'Direct Contact': 40439,
 'Collateral Contact': 24754,
 'Attempted client contact': 4947,
 'No Show': 1893,
 'Legal Issues': 1834,
 'Client Texas State ID': 769,
 'Client Birth certificate': 687,
 'Release of information forms signed for all appropriate vendors and agencies': 384,
 'HMIS profile created/updated': 363,
 'Client Assessment Conducted': 303,
 'Coordinated Assessment Completed at DACC': 295,
 'Coordinated Assessment Scheduled at DACC': 269,
 'Treatment Plan completed and signed': 129,
 'HMIS Release of Information - Agreed': 101,
 'Client DPS report': 80,
 'Client assigned DACC CSR': 20,
 'Client declined to sign release of information': 7}

Of course there is a shortage of data, but it looks like certain labels have no casenotes that contain any information about that label. For these kind of labels, upsampling won't help. 

In [15]:
# None of the casenotes with the label "HMIS...Declined" actually contain any information corresponding to the label
df1[df1["DESCRIPTION"].str.contains("HMIS Release of Information - Declined")]

Unnamed: 0,NOTEID,NOTE,DESCRIPTION,FREQ,MIN_DESC


**Some Reminders** 
- Precision = True Positives / (True Positives + False Positives)
  - What proportion of positive identifications was actually correct?
- Recall = True Positives / (True Positives + False Negatives)
  - What proportion of actual positives was identified correctly? 
- Accuracy = (True Positives + True Negatives) / Total
- Micro avg. is across all samples
- Macro Avg. is across each label
  - We want to maximize the macro avg. scores


#### Train Test Split

In [16]:
from sklearn.preprocessing import MinMaxScaler

test_size = 0.2
train_data, test_data = train_test_split(oh_df1, test_size=test_size, random_state=76)

# train_data: original unaltered train data
# test_data: will remain the same for each model.

train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

train_data_counts = pd.Series(onehot_label_counts(train_data, "LABELS", classes))

def get_min_max_labels(counts: pd.Series):
    counts = counts / counts.sum()
    print("Max Label: %s, %f%%" % (counts.idxmax(), counts.max()*100))
    print("Min Label: %s, %f%%" % (counts.idxmin(), counts.min()*100))

display(train_data_counts)
get_min_max_labels(train_data_counts)

Attempted client contact                                                         4027
Client Assessment Conducted                                                       234
Client Birth certificate                                                          552
Client DPS report                                                                  61
Client Texas State ID                                                             622
Client assigned DACC CSR                                                           13
Client declined to sign release of information                                      6
Collateral Contact                                                              19741
Coordinated Assessment Completed at DACC                                          238
Coordinated Assessment Scheduled at DACC                                          213
Direct Contact                                                                  32333
HMIS Release of Information - Agreed                  

Max Label: Direct Contact, 52.350113%
Min Label: Client declined to sign release of information, 0.009715%


#### Dataset 1, unmixed

In [17]:
# max token len is 128 by default
clf1 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes, train_data=train_data, test_data=test_data)

print(str(clf1))
print("---TESTING---")
clf1.train()
print("---VALIDATION---")
clf1.validate()

# 4 is the default batch size
# print(clf1)
# print(len(clf1.training_loader) * 4)
# print(len(clf1.testing_loader) * 4)

TRAIN Dataset: (51249, 6)
TEST Dataset: (12813, 6)


51249it [00:25, 1984.31it/s]
12813it [00:06, 1860.34it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 128
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 1
 learning rate: 0.000010
 train_size: 51249
 test_size: 12813

---TESTING---


3it [00:03,  1.02it/s]

Epoch: 0, Loss: 0.691327


5003it [03:56, 21.07it/s]

Epoch: 0, Loss: 0.064201


10004it [07:44, 24.75it/s]

Epoch: 0, Loss: 0.008882


12813it [09:39, 22.12it/s]


---VALIDATION---


3204it [00:31, 102.48it/s]


Hamming Loss: 0.016201


Unnamed: 0,precision,recall,f1-score,support
Attempted client contact,0.734252,0.81087,0.770661,920.0
Client Assessment Conducted,0.0,0.0,0.0,69.0
Client Birth certificate,1.0,0.051852,0.098592,135.0
Client DPS report,0.0,0.0,0.0,19.0
Client Texas State ID,0.565217,0.088435,0.152941,147.0
Client assigned DACC CSR,0.0,0.0,0.0,7.0
Client declined to sign release of information,0.0,0.0,0.0,1.0
Collateral Contact,0.915626,0.833433,0.872598,5013.0
Coordinated Assessment Completed at DACC,0.941176,0.842105,0.888889,57.0
Coordinated Assessment Scheduled at DACC,0.772727,0.607143,0.68,56.0


In [18]:
import math

largeK = math.pow(2, 21)
# use the same train-test split as before

# Now we upsample the train_data based on the criteria I outlined above
mixed_data_1 = temperature_scaled_mixing(df=train_data, label_col="MIN_DESC", T=1.5, K=largeK)
mixed_data_2 = temperature_scaled_mixing(df=train_data, label_col="MIN_DESC", T=2, K=largeK)
mixed_data_3 = temperature_scaled_mixing(df=train_data, label_col="MIN_DESC", T=4, K=largeK)

mixed_data_1.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

def compare_mixed_dataset(og_data, mixed_data, classes):

    og_data_counts = onehot_label_counts(og_data, "LABELS", classes)
    mixed_data_counts = onehot_label_counts(mixed_data, "LABELS", classes)
    stats = pd.DataFrame(data={"labels": og_data_counts.keys(), 
                            "original": og_data_counts.values(), 
                            "mixed": mixed_data_counts.values()})

    stats.loc['Total'] = stats.sum(numeric_only=True)
    display(stats)

One issue is that since Direct Contact occurs with almost everything else, increasing T will also cause its counts to rise/stay high. It would be worth running a model without Direct contact. More over, upsampling labels like "HMIS...Declined" or "Client not assigned DACC CSR" is a bad idea because on top of having such few examples, their examples don't have any information in the text about label.

### Dataset 1, T=1.5

In [19]:
compare_mixed_dataset(train_data, mixed_data_1, classes)
get_min_max_labels(pd.Series(onehot_label_counts(mixed_data_1, "LABELS", classes)))

clf2 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes, train_data=mixed_data_1, test_data=test_data)

print(str(clf2))
print("---TESTING---")
clf2.train()
print("---VALIDATION---")
clf2.validate()

Unnamed: 0,labels,original,mixed
0,Attempted client contact,4027.0,5738.0
1,Client Assessment Conducted,234.0,1071.0
2,Client Birth certificate,552.0,1560.0
3,Client DPS report,61.0,345.0
4,Client Texas State ID,622.0,1725.0
5,Client assigned DACC CSR,13.0,119.0
6,Client declined to sign release of information,6.0,74.0
7,Collateral Contact,19741.0,18367.0
8,Coordinated Assessment Completed at DACC,238.0,1109.0
9,Coordinated Assessment Scheduled at DACC,213.0,957.0


Max Label: Direct Contact, 43.318629%
Min Label: Client declined to sign release of information, 0.103306%
TRAIN Dataset: (51249, 6)
TEST Dataset: (12813, 6)


51249it [00:23, 2210.16it/s]
12813it [00:05, 2185.88it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 128
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 1
 learning rate: 0.000010
 train_size: 51249
 test_size: 12813

---TESTING---


2it [00:00, 15.33it/s]

Epoch: 0, Loss: 0.678567


5003it [03:30, 23.74it/s]

Epoch: 0, Loss: 0.073661


10004it [07:01, 23.76it/s]

Epoch: 0, Loss: 0.055251


12813it [08:59, 23.75it/s]


---VALIDATION---


3204it [00:32, 98.09it/s]


Hamming Loss: 0.016394


Unnamed: 0,precision,recall,f1-score,support
Attempted client contact,0.756098,0.775,0.765432,920.0
Client Assessment Conducted,0.695652,0.695652,0.695652,69.0
Client Birth certificate,0.673759,0.703704,0.688406,135.0
Client DPS report,0.625,0.526316,0.571429,19.0
Client Texas State ID,0.455882,0.632653,0.529915,147.0
Client assigned DACC CSR,0.0,0.0,0.0,7.0
Client declined to sign release of information,0.0,0.0,0.0,1.0
Collateral Contact,0.927289,0.80391,0.861203,5013.0
Coordinated Assessment Completed at DACC,0.892857,0.877193,0.884956,57.0
Coordinated Assessment Scheduled at DACC,0.868421,0.589286,0.702128,56.0


### Dataset 1, T=2

In [20]:
compare_mixed_dataset(train_data, mixed_data_2, classes)
get_min_max_labels(pd.Series(onehot_label_counts(mixed_data_2, "LABELS", classes)))

clf2 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes, train_data=mixed_data_2, test_data=test_data)

print(str(clf2))
print("---TESTING---")
clf2.train()
print("---VALIDATION---")
clf2.validate()

Unnamed: 0,labels,original,mixed
0,Attempted client contact,4027.0,6344.0
1,Client Assessment Conducted,234.0,2177.0
2,Client Birth certificate,552.0,2521.0
3,Client DPS report,61.0,675.0
4,Client Texas State ID,622.0,2562.0
5,Client assigned DACC CSR,13.0,344.0
6,Client declined to sign release of information,6.0,236.0
7,Collateral Contact,19741.0,16995.0
8,Coordinated Assessment Completed at DACC,238.0,2186.0
9,Coordinated Assessment Scheduled at DACC,213.0,1752.0


Max Label: Direct Contact, 38.423760%
Min Label: Client declined to sign release of information, 0.289039%
TRAIN Dataset: (51249, 6)
TEST Dataset: (12813, 6)


51249it [00:23, 2185.43it/s]
12813it [00:05, 2163.91it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 128
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 1
 learning rate: 0.000010
 train_size: 51249
 test_size: 12813

---TESTING---


1it [00:00,  6.00it/s]

Epoch: 0, Loss: 0.705014


5004it [03:30, 23.73it/s]

Epoch: 0, Loss: 0.045126


10005it [07:01, 23.70it/s]

Epoch: 0, Loss: 0.051273


12813it [09:05, 23.50it/s]


---VALIDATION---


3204it [00:36, 88.99it/s]

Hamming Loss: 0.016481





Unnamed: 0,precision,recall,f1-score,support
Attempted client contact,0.740557,0.809783,0.773624,920.0
Client Assessment Conducted,0.698413,0.637681,0.666667,69.0
Client Birth certificate,0.655172,0.703704,0.678571,135.0
Client DPS report,0.666667,0.421053,0.516129,19.0
Client Texas State ID,0.452489,0.680272,0.543478,147.0
Client assigned DACC CSR,0.666667,0.285714,0.4,7.0
Client declined to sign release of information,0.0,0.0,0.0,1.0
Collateral Contact,0.919764,0.809495,0.861114,5013.0
Coordinated Assessment Completed at DACC,0.862069,0.877193,0.869565,57.0
Coordinated Assessment Scheduled at DACC,0.690909,0.678571,0.684685,56.0


### Dataset 1, T=4

In [21]:
compare_mixed_dataset(train_data, mixed_data_3, classes)
get_min_max_labels(pd.Series(onehot_label_counts(mixed_data_3, "LABELS", classes)))

clf2 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes, train_data=mixed_data_3, test_data=test_data)

print(str(clf2))
print("---TESTING---")
clf2.train()
print("---VALIDATION---")
clf2.validate()

Unnamed: 0,labels,original,mixed
0,Attempted client contact,4027.0,5799.0
1,Client Assessment Conducted,234.0,4734.0
2,Client Birth certificate,552.0,3841.0
3,Client DPS report,61.0,1717.0
4,Client Texas State ID,622.0,4009.0
5,Client assigned DACC CSR,13.0,1174.0
6,Client declined to sign release of information,6.0,956.0
7,Collateral Contact,19741.0,13635.0
8,Coordinated Assessment Completed at DACC,238.0,4654.0
9,Coordinated Assessment Scheduled at DACC,213.0,3674.0


Max Label: Direct Contact, 32.761561%
Min Label: Client declined to sign release of information, 0.931728%
TRAIN Dataset: (51249, 6)
TEST Dataset: (12813, 6)


51249it [00:26, 1949.16it/s]
12813it [00:06, 1906.19it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 128
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 1
 learning rate: 0.000010
 train_size: 51249
 test_size: 12813

---TESTING---


1it [00:00,  7.58it/s]

Epoch: 0, Loss: 0.665086


5003it [04:03, 20.47it/s]

Epoch: 0, Loss: 0.066888


10003it [08:06, 19.23it/s]

Epoch: 0, Loss: 0.023495


12813it [10:14, 20.86it/s]


---VALIDATION---


3204it [00:33, 94.95it/s]

Hamming Loss: 0.018194





Unnamed: 0,precision,recall,f1-score,support
Attempted client contact,0.69158,0.794565,0.739504,920.0
Client Assessment Conducted,0.630952,0.768116,0.69281,69.0
Client Birth certificate,0.494845,0.711111,0.583587,135.0
Client DPS report,0.666667,0.526316,0.588235,19.0
Client Texas State ID,0.370107,0.707483,0.485981,147.0
Client assigned DACC CSR,0.5,0.285714,0.363636,7.0
Client declined to sign release of information,0.0,0.0,0.0,1.0
Collateral Contact,0.897717,0.831638,0.863415,5013.0
Coordinated Assessment Completed at DACC,0.796875,0.894737,0.842975,57.0
Coordinated Assessment Scheduled at DACC,0.525,0.75,0.617647,56.0


In [22]:
# Lets do a visual inspection of the test data on labels that appear less than 10 times in the test set 
# Because our main goal is test recall. So those labels that appear fewest are the ones to check.
from IPython.display import display

# For each class, sample 5 notes from the testset and run classifier
def run_tests(clf, num_samples):
    for class_label in classes:
        print("Label: %s" % class_label)
        qual_test = test_data[test_data["MIN_DESC"] == class_label]

        if qual_test.empty:
            continue

        qual_test = qual_test.sample(min(num_samples, len(qual_test.index)))

        qual_test.reset_index(inplace=True, drop=True)
        display(test_classifier(qual_test, classes, clf)[["NOTE", "LABELS", "ACTUALS"]])

#### Other thoughts

Some things I could try
- Few shot classification. 
  - Getting expert data by asking case managers to write sentences that would commonly correspond to a label. Maybe 5 sentences per label (per case manager?).
  - Might be better than our case data because so many case notes don't actually contain information about the boxes that are checked.  
- Training using the label text as the input and output

### Part 3: Re-run CLFs with Data Cleaning Method 2

In [23]:
all_labels_to_ignore = contact_labels + bad_labels

df2 = get_clf_data(duplicate_labels_dict=duplicates_2,
                   word_replace_dict=word_replace_dict,
                   labels_to_ignore=all_labels_to_ignore)
df2["DESCRIPTION"].value_counts()

--- LOADING DATA... ---


  df = pd.read_csv(strs[name], **args)


Total initial entries: 108804
Total initial notes: 64385
- Removed irrelevant columns for checkbox classification, kept: Index(['NOTEID', 'NOTE', 'DESCRIPTION', 'GROUPNAME'], dtype='object')
- Dropped empty notes
- Dropped interventions checkboxes
- Consolidated duplicate labels using the provided mappings.
{'Direct Contact': ['Client contact in office', 'Direct Contact through Outreach', 'Client contact', 'Client Contact out of office'], 'Attempted client contact': ['Attempted client contact through Outreach', 'Attempt to locate client'], 'Client Assessment Conducted': ['New Client assessment completed'], 'Coordinated Assessment Completed at DACC': ['Coordinated Assessment Completed through HOST contact', 'Coordinated Assessment Scheduled at DACC'], 'Release of information forms signed for all appropriate vendors and agencies': ['Client declined to sign release of information']}
- Dropped entries containing the provided labels
- Substituted occurences of words in text with provided wo

Legal Issues                                                                    1834
Client Texas State ID                                                            769
Client Birth certificate                                                         687
Coordinated Assessment Completed at DACC                                         400
Release of information forms signed for all appropriate vendors and agencies     390
HMIS profile created/updated                                                     363
Client Assessment Conducted                                                      303
Treatment Plan completed and signed                                              129
HMIS Release of Information - Agreed                                             101
Client DPS report                                                                 80
Client assigned DACC CSR                                                          20
Name: DESCRIPTION, dtype: int64

### No Upsampling

In [24]:
df2 = min_descriptions(df2, "DESCRIPTION", "MIN_DESC", "NOTEID")
oh_df2, classes = one_hot_encode(df=df2, label_col="DESCRIPTION", new_col="LABELS", duplicate_id_col="NOTEID")

test_size = 0.2
train_data, test_data = train_test_split(oh_df2, test_size=test_size, random_state=76)

train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

train_data_counts = pd.Series(onehot_label_counts(train_data, "LABELS", classes))

display(train_data_counts)
get_min_max_labels(train_data_counts)

Client Assessment Conducted                                                      230
Client Birth certificate                                                         539
Client DPS report                                                                 67
Client Texas State ID                                                            607
Client assigned DACC CSR                                                          14
Coordinated Assessment Completed at DACC                                         327
HMIS Release of Information - Agreed                                              79
HMIS profile created/updated                                                     291
Legal Issues                                                                    1479
Release of information forms signed for all appropriate vendors and agencies     314
Treatment Plan completed and signed                                               95
dtype: int32

Max Label: Legal Issues, 36.590797%
Min Label: Client assigned DACC CSR, 0.346363%


In [25]:
mixed_data_1 = temperature_scaled_mixing(df=train_data, label_col="MIN_DESC", T=1.5, K=largeK)
mixed_data_2 = temperature_scaled_mixing(df=train_data, label_col="MIN_DESC", T=2, K=largeK)
mixed_data_3 = temperature_scaled_mixing(df=train_data, label_col="MIN_DESC", T=4, K=largeK)

mixed_data_1.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)

### Dataset 2, unmixed

In [26]:
# max token len is 128 by default
epochs=4
max_len=256

clf3 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes, train_data=train_data, test_data=test_data,
                              epochs=epochs, max_len=max_len)

print(str(clf3))
print("---TESTING---")
clf3.train()
print("---VALIDATION---")
clf3.validate()

TRAIN Dataset: (3284, 6)
TEST Dataset: (821, 6)


3284it [00:01, 1821.45it/s]
821it [00:00, 1729.62it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 256
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 4
 learning rate: 0.000010
 train_size: 3284
 test_size: 821

---TESTING---


1it [00:00,  9.69it/s]

Epoch: 0, Loss: 0.712360


821it [00:57, 14.38it/s]
2it [00:00, 14.50it/s]

Epoch: 1, Loss: 0.129594


821it [00:57, 14.35it/s]
2it [00:00, 14.72it/s]

Epoch: 2, Loss: 0.051868


821it [00:59, 13.82it/s]
2it [00:00, 13.76it/s]

Epoch: 3, Loss: 0.022838


821it [01:01, 13.45it/s]


---VALIDATION---


206it [00:04, 45.11it/s]

Hamming Loss: 0.035544





Unnamed: 0,precision,recall,f1-score,support
Client Assessment Conducted,0.847458,0.684932,0.757576,73.0
Client Birth certificate,0.951613,0.797297,0.867647,148.0
Client DPS report,1.0,0.692308,0.818182,13.0
Client Texas State ID,0.888889,0.839506,0.863492,162.0
Client assigned DACC CSR,0.0,0.0,0.0,6.0
Coordinated Assessment Completed at DACC,0.953846,0.849315,0.898551,73.0
HMIS Release of Information - Agreed,0.666667,0.272727,0.387097,22.0
HMIS profile created/updated,0.925,0.513889,0.660714,72.0
Legal Issues,0.934286,0.921127,0.92766,355.0
Release of information forms signed for all appropriate vendors and agencies,0.77193,0.578947,0.661654,76.0


### Upsampling

### Dataset 2, T=1.5

In [27]:
compare_mixed_dataset(train_data, mixed_data_1, classes)
get_min_max_labels(pd.Series(onehot_label_counts(mixed_data_1, "LABELS", classes)))

clf4 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes, 
                              train_data=mixed_data_1, test_data=test_data,
                              epochs=epochs, max_len=max_len)

print(str(clf4))
print("---TESTING---")
clf4.train()
print("---VALIDATION---")
clf4.validate()

Unnamed: 0,labels,original,mixed
0,Client Assessment Conducted,230.0,357.0
1,Client Birth certificate,539.0,538.0
2,Client DPS report,67.0,128.0
3,Client Texas State ID,607.0,605.0
4,Client assigned DACC CSR,14.0,45.0
5,Coordinated Assessment Completed at DACC,327.0,492.0
6,HMIS Release of Information - Agreed,79.0,152.0
7,HMIS profile created/updated,291.0,453.0
8,Legal Issues,1479.0,1118.0
9,Release of information forms signed for all appropriate vendors and agencies,314.0,481.0


Max Label: Legal Issues, 24.685361%
Min Label: Client assigned DACC CSR, 0.993597%
TRAIN Dataset: (3284, 6)
TEST Dataset: (821, 6)


3284it [00:02, 1619.22it/s]
821it [00:00, 1563.03it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 256
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 4
 learning rate: 0.000010
 train_size: 3284
 test_size: 821

---TESTING---


1it [00:00,  6.44it/s]

Epoch: 0, Loss: 0.696689


821it [01:01, 13.41it/s]
2it [00:00, 13.35it/s]

Epoch: 1, Loss: 0.203403


821it [01:00, 13.46it/s]
2it [00:00, 13.01it/s]

Epoch: 2, Loss: 0.088473


821it [01:00, 13.47it/s]
2it [00:00, 13.53it/s]

Epoch: 3, Loss: 0.170850


821it [01:01, 13.37it/s]


---VALIDATION---


206it [00:04, 43.29it/s]

Hamming Loss: 0.036873





Unnamed: 0,precision,recall,f1-score,support
Client Assessment Conducted,0.823529,0.767123,0.794326,73.0
Client Birth certificate,0.895105,0.864865,0.879725,148.0
Client DPS report,0.9,0.692308,0.782609,13.0
Client Texas State ID,0.902778,0.802469,0.849673,162.0
Client assigned DACC CSR,1.0,0.166667,0.285714,6.0
Coordinated Assessment Completed at DACC,0.955224,0.876712,0.914286,73.0
HMIS Release of Information - Agreed,0.75,0.272727,0.4,22.0
HMIS profile created/updated,0.785714,0.611111,0.6875,72.0
Legal Issues,0.974684,0.867606,0.918033,355.0
Release of information forms signed for all appropriate vendors and agencies,0.627907,0.710526,0.666667,76.0


### Dataset 2, T=2

In [28]:
compare_mixed_dataset(train_data, mixed_data_2, classes)
get_min_max_labels(pd.Series(onehot_label_counts(mixed_data_2, "LABELS", classes)))

clf4 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes, 
                              train_data=mixed_data_2, test_data=test_data,
                              epochs=epochs, max_len=max_len)

print(str(clf4))
print("---TESTING---")
clf4.train()
print("---VALIDATION---")
clf4.validate()

Unnamed: 0,labels,original,mixed
0,Client Assessment Conducted,230.0,444.0
1,Client Birth certificate,539.0,539.0
2,Client DPS report,67.0,182.0
3,Client Texas State ID,607.0,589.0
4,Client assigned DACC CSR,14.0,74.0
5,Coordinated Assessment Completed at DACC,327.0,581.0
6,HMIS Release of Information - Agreed,79.0,186.0
7,HMIS profile created/updated,291.0,576.0
8,Legal Issues,1479.0,894.0
9,Release of information forms signed for all appropriate vendors and agencies,314.0,552.0


Max Label: Legal Issues, 18.516984%
Min Label: Client assigned DACC CSR, 1.532726%
TRAIN Dataset: (3284, 6)
TEST Dataset: (821, 6)


3284it [00:02, 1575.30it/s]
821it [00:00, 1624.38it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 256
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 4
 learning rate: 0.000010
 train_size: 3284
 test_size: 821

---TESTING---


1it [00:00,  8.40it/s]

Epoch: 0, Loss: 0.740872


821it [01:02, 13.21it/s]
2it [00:00, 12.26it/s]

Epoch: 1, Loss: 0.131627


821it [01:05, 12.52it/s]
2it [00:00, 12.72it/s]

Epoch: 2, Loss: 0.091691


821it [01:06, 12.38it/s]
2it [00:00, 12.45it/s]

Epoch: 3, Loss: 0.086049


821it [01:05, 12.47it/s]


---VALIDATION---


206it [00:04, 43.05it/s]

Hamming Loss: 0.033883





Unnamed: 0,precision,recall,f1-score,support
Client Assessment Conducted,0.816901,0.794521,0.805556,73.0
Client Birth certificate,0.908451,0.871622,0.889655,148.0
Client DPS report,1.0,0.769231,0.869565,13.0
Client Texas State ID,0.876543,0.876543,0.876543,162.0
Client assigned DACC CSR,0.5,0.166667,0.25,6.0
Coordinated Assessment Completed at DACC,0.915493,0.890411,0.902778,73.0
HMIS Release of Information - Agreed,0.590909,0.590909,0.590909,22.0
HMIS profile created/updated,0.777778,0.680556,0.725926,72.0
Legal Issues,0.949704,0.904225,0.926407,355.0
Release of information forms signed for all appropriate vendors and agencies,0.693333,0.684211,0.688742,76.0


### Dataset 2, T=4

In [29]:
compare_mixed_dataset(train_data, mixed_data_3, classes)
get_min_max_labels(pd.Series(onehot_label_counts(mixed_data_3, "LABELS", classes)))

clf2 = DistilBertMLClassifier(text_col="NOTE", label_col="LABELS", label_names=classes,
                              train_data=mixed_data_3, test_data=test_data,
                              epochs=epochs, max_len=max_len)

print(str(clf2))
print("---TESTING---")
clf2.train()
print("---VALIDATION---")
clf2.validate()

Unnamed: 0,labels,original,mixed
0,Client Assessment Conducted,230.0,597.0
1,Client Birth certificate,539.0,475.0
2,Client DPS report,67.0,233.0
3,Client Texas State ID,607.0,508.0
4,Client assigned DACC CSR,14.0,149.0
5,Coordinated Assessment Completed at DACC,327.0,679.0
6,HMIS Release of Information - Agreed,79.0,234.0
7,HMIS profile created/updated,291.0,694.0
8,Legal Issues,1479.0,668.0
9,Release of information forms signed for all appropriate vendors and agencies,314.0,689.0


Max Label: HMIS profile created/updated, 13.259457%
Min Label: Client assigned DACC CSR, 2.846771%
TRAIN Dataset: (3284, 6)
TEST Dataset: (821, 6)


3284it [00:02, 1269.12it/s]
821it [00:00, 1104.23it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hyperparams:
 max_tokens_per_example: 256
 train_batch_size: 4
 valid_batch_size: 4
 epochs: 4
 learning rate: 0.000010
 train_size: 3284
 test_size: 821

---TESTING---


0it [00:00, ?it/s]

Epoch: 0, Loss: 0.688893


821it [01:06, 12.33it/s]
2it [00:00, 12.76it/s]

Epoch: 1, Loss: 0.106751


821it [01:02, 13.16it/s]
2it [00:00, 14.40it/s]

Epoch: 2, Loss: 0.123304


821it [00:57, 14.36it/s]
2it [00:00, 15.10it/s]

Epoch: 3, Loss: 0.035911


821it [00:55, 14.72it/s]


---VALIDATION---


206it [00:04, 48.92it/s]

Hamming Loss: 0.035655





Unnamed: 0,precision,recall,f1-score,support
Client Assessment Conducted,0.857143,0.821918,0.839161,73.0
Client Birth certificate,0.919118,0.844595,0.880282,148.0
Client DPS report,0.733333,0.846154,0.785714,13.0
Client Texas State ID,0.829268,0.839506,0.834356,162.0
Client assigned DACC CSR,0.333333,0.166667,0.222222,6.0
Coordinated Assessment Completed at DACC,0.929577,0.90411,0.916667,73.0
HMIS Release of Information - Agreed,0.6875,0.5,0.578947,22.0
HMIS profile created/updated,0.859649,0.680556,0.75969,72.0
Legal Issues,0.970874,0.84507,0.903614,355.0
Release of information forms signed for all appropriate vendors and agencies,0.733333,0.578947,0.647059,76.0


In [None]:
run_tests(clf3, 3)