In [1]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [1]:
%pip install transformers[torch]

Collecting transformers[torch]
  Downloading transformers-4.44.2-py3-none-any.whl (9.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (751 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m751.2/751.2 KB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting huggingface-hub<1.0,>=0.23.2
  Downloading huggingface_hub-0.24.7-py3-none-any.whl (417 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.5/417.5 KB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tokenizers<0.20,>=0.19
  Using cached tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Collecting filelock
  Downloading filelock-3.16.0-py3-none-any.whl (16 kB)
Collecting safetensors>=0.4.1
  Downloading safetens

In [2]:
%pip install torch

Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel

model_name = "google-bert/bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dataset_dir = "data/train_test_val"

dataset = {
    'train': pd.read_csv(dataset_dir + '/train.csv').reset_index(drop=True),  # """ encoding='cp1252' """ insert between train_data.csv and .reset index as parameter
    'test': pd.read_csv(dataset_dir + '/test.csv').reset_index(drop=True),  # """ encoding='cp1252' """ insert between train_data.csv and .reset index as parameter
    'val': pd.read_csv(dataset_dir + '/val.csv').reset_index(drop=True),  # """ encoding='cp1252' """ insert between train_data.csv and .reset index as parameter
}

dataset['val']
dataset['test']
dataset['train']

Unnamed: 0,ID,Text,Murder,Homicide,Robbery,Physical Injuries,Rape,Theft,Carnapping,Others
0,1495,i was preparing to go to bed when i heard a lo...,0,0,1,1,0,0,0,0
1,354,one evening i was watching television when i h...,1,0,0,0,0,0,0,0
2,1233,taking a shortcut home across a dark footbridg...,0,0,1,0,0,0,0,0
3,614,i was the designated driver that night so i wa...,0,1,0,0,0,0,0,0
4,3507,a cousin of mine was kidnapped as a baby by hi...,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
2395,1131,late one night i was watching tv when i heard ...,0,0,1,0,0,0,0,0
2396,1295,while lounging by the pool i saw a man pacing ...,0,0,1,0,0,0,0,0
2397,861,the couple next door had always been volatile ...,0,1,0,0,0,0,0,0
2398,3508,a friend of mine was kidnapped while doing his...,0,0,0,0,0,0,0,1


In [27]:
LABELS = [label for label in dataset['train'].keys() if label not in ['ID', 'Text']]
id2label = {idx:label for idx, label in enumerate(LABELS)}
label2id = {label:idx for idx, label in enumerate(LABELS)}
LABELS

['Murder',
 'Homicide',
 'Robbery',
 'Physical Injuries',
 'Rape',
 'Theft',
 'Carnapping',
 'Others']

In [5]:
class CrimeDataset(Dataset):
    def __init__(self, data, tokenizer, labels, max_token_len=128):
        self.data = data
        self.tokenizer = tokenizer
        self.labels = labels
        self.max_token_len = max_token_len
        self.encoded_dataset = self.encode_dataset()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.encoded_dataset[index]
    

    def encode_dataset(self):
        encoded_dataset = []
        for index, data in tqdm(self.data.iterrows()):
            encoded_data = self.encode_data(data)
            encoded_dataset.append(encoded_data)
        return encoded_dataset

    def encode_data(self, data):
        text = data["Text"]

        #preprocess text
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            return_tensors='pt',
            padding='max_length',
            truncation=True,
            max_length=128,
            return_token_type_ids=False,
            return_attention_mask=True
        )
        labels = [data[label] for label in self.labels]

        representation = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(labels)
        }
        return representation
    


class CrimeDataLoader:
    def __init__(self, dataset, labels, tokenizer, batch_size=8):
        self.train_dataset = CrimeDataset(dataset['train'], tokenizer, labels)
        self.val_dataset = CrimeDataset(dataset['val'], tokenizer, labels)
        self.test_dataset = CrimeDataset(dataset['test'], tokenizer, labels)
        self.batch_size = batch_size

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=0, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=0, shuffle=False)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=0, shuffle=False)

In [6]:
dataloader = CrimeDataLoader(dataset=dataset, tokenizer=tokenizer, labels=LABELS)

def split_dataloader(dataloader):
    train_dataloader = dataloader.train_dataloader()
    val_dataloader = dataloader.val_dataloader()
    test_dataloader = dataloader.test_dataloader()

    return train_dataloader, val_dataloader, test_dataloader

train_dataloader, val_dataloader, test_dataloader = split_dataloader(dataloader)

2400it [00:01, 2086.17it/s]
400it [00:00, 2097.94it/s]
1200it [00:00, 2107.96it/s]


In [7]:
example = CrimeDataset(dataset['train'], tokenizer, LABELS).__getitem__(2)
print(example.keys())
print()
print("Decoded sentence")
print(tokenizer.decode(example['input_ids']))
print()
print("Label IDs of example 1")
print(example['labels'])
print()
print("IDs to Labels")
print([id2label[idx] for idx, label in enumerate(example['labels']) if label == 1.0])

2400it [00:01, 2033.62it/s]

dict_keys(['input_ids', 'attention_mask', 'labels'])

Decoded sentence
[CLS] taking a shortcut home across a dark footbridge a man suddenly blocked my path he pulled out a knife and demanded my wallet using menacing threats i was scared and handed it over without hesitation [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]

Label IDs of example 1
tensor([0., 0., 1., 0., 0., 0., 0., 0.])

IDs to Labels
['Robbery']





  from .autonotebook import tqdm as notebook_tqdm


In [9]:
train_dataset = CrimeDataset(dataset['train'], tokenizer, LABELS)
test_dataset = CrimeDataset(dataset['test'], tokenizer, LABELS)


2400it [00:01, 1905.14it/s]
1200it [00:00, 2000.11it/s]


In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_name, 
                                                           problem_type="multi_label_classification", 
                                                           num_labels=len(LABELS),
                                                           id2label=id2label,
                                                           label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
batch_size = 8
metric_name = "f1"

from transformers import TrainingArguments, Trainer

args = TrainingArguments(
    f"models/bert-classifier-wrapper",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
    #push_to_hub=True,
)



In [13]:
%pip install scikit-learn

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting threadpoolctl>=3.1.0
  Downloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Collecting joblib>=1.2.0
  Downloading joblib-1.4.2-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.8/301.8 KB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting scipy>=1.6.0
  Downloading scipy-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.1/41.1 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully in

In [15]:
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch
import numpy as np
    
# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, 
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds, 
        labels=p.label_ids)
    return result

In [16]:
trainer = Trainer(
    model,
    args,
    train_dataset=CrimeDataset(dataset['train'], tokenizer, LABELS),
    eval_dataset=CrimeDataset(dataset['test'], tokenizer, LABELS),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

2400it [00:01, 2082.01it/s]
1200it [00:00, 2182.61it/s]


In [17]:
trainer.train()

                                                  
 20%|██        | 300/1500 [03:27<11:42,  1.71it/s]

{'eval_loss': 0.11899271607398987, 'eval_f1': 0.9621740727139185, 'eval_roc_auc': 0.9651633291407689, 'eval_accuracy': 0.9166666666666666, 'eval_runtime': 31.4072, 'eval_samples_per_second': 38.208, 'eval_steps_per_second': 4.776, 'epoch': 1.0}


 33%|███▎      | 500/1500 [05:26<09:46,  1.70it/s]  

{'loss': 0.2095, 'grad_norm': 0.4453684091567993, 'learning_rate': 1.3333333333333333e-05, 'epoch': 1.67}


                                                  
 40%|████      | 600/1500 [06:56<08:45,  1.71it/s]

{'eval_loss': 0.05384119600057602, 'eval_f1': 0.9841954022988506, 'eval_roc_auc': 0.9864242630546592, 'eval_accuracy': 0.9683333333333334, 'eval_runtime': 31.4065, 'eval_samples_per_second': 38.209, 'eval_steps_per_second': 4.776, 'epoch': 2.0}


                                                    
 60%|██████    | 900/1500 [10:27<05:51,  1.71it/s]

{'eval_loss': 0.037070807069540024, 'eval_f1': 0.9824686940966011, 'eval_roc_auc': 0.9870021394830524, 'eval_accuracy': 0.97, 'eval_runtime': 31.4505, 'eval_samples_per_second': 38.155, 'eval_steps_per_second': 4.769, 'epoch': 3.0}


 67%|██████▋   | 1000/1500 [11:28<04:53,  1.70it/s] 

{'loss': 0.0467, 'grad_norm': 0.12222081422805786, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.33}


                                                   
 80%|████████  | 1200/1500 [13:57<02:55,  1.70it/s]

{'eval_loss': 0.03159654140472412, 'eval_f1': 0.9824057450628366, 'eval_roc_auc': 0.9855304478809073, 'eval_accuracy': 0.9666666666666667, 'eval_runtime': 31.4643, 'eval_samples_per_second': 38.138, 'eval_steps_per_second': 4.767, 'epoch': 4.0}


100%|██████████| 1500/1500 [16:56<00:00,  1.70it/s]

{'loss': 0.0306, 'grad_norm': 0.1690213978290558, 'learning_rate': 0.0, 'epoch': 5.0}


                                                   
100%|██████████| 1500/1500 [17:30<00:00,  1.70it/s]

{'eval_loss': 0.030043931677937508, 'eval_f1': 0.9795918367346939, 'eval_roc_auc': 0.985042226228277, 'eval_accuracy': 0.9666666666666667, 'eval_runtime': 31.3409, 'eval_samples_per_second': 38.289, 'eval_steps_per_second': 4.786, 'epoch': 5.0}


100%|██████████| 1500/1500 [17:36<00:00,  1.42it/s]

{'train_runtime': 1056.8096, 'train_samples_per_second': 11.355, 'train_steps_per_second': 1.419, 'train_loss': 0.09562087059020996, 'epoch': 5.0}





TrainOutput(global_step=1500, training_loss=0.09562087059020996, metrics={'train_runtime': 1056.8096, 'train_samples_per_second': 11.355, 'train_steps_per_second': 1.419, 'total_flos': 789375688704000.0, 'train_loss': 0.09562087059020996, 'epoch': 5.0})

In [18]:
trainer.evaluate()

100%|██████████| 150/150 [00:30<00:00,  4.87it/s]


{'eval_loss': 0.05384119600057602,
 'eval_f1': 0.9841954022988506,
 'eval_roc_auc': 0.9864242630546592,
 'eval_accuracy': 0.9683333333333334,
 'eval_runtime': 30.9894,
 'eval_samples_per_second': 38.723,
 'eval_steps_per_second': 4.84,
 'epoch': 5.0}

In [19]:
text = "Parang gago tong mga pari na to"

encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(trainer.model.device) for k,v in encoding.items()}

outputs = trainer.model(**encoding)

In [20]:
output = outputs.logits
output.shape

torch.Size([1, 8])

In [21]:
# apply sigmoid + threshold
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(output.squeeze().cpu())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1
# turn predicted id's into actual label names
predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
print(predicted_labels)

[]


In [23]:
from transformers import AutoModelForSequenceClassification

# Load the best model
best_model = AutoModelForSequenceClassification.from_pretrained(args.output_dir + "/checkpoint-1500")


In [24]:
import numpy as np
from sklearn.metrics import multilabel_confusion_matrix

def calculate_metrics(confusion_matrix):
    TP = confusion_matrix[1, 1]  # True Positives
    FP = confusion_matrix[0, 1]  # False Positives0..
    FN = confusion_matrix[1, 0]  # False Negatives
    TN = confusion_matrix[0, 0]  # True Negatives

    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if precision + recall > 0 else 0

    return {
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1_score
    }

def hamming_loss(y_true, y_pred):
    xor_result = np.logical_xor(y_true, y_pred)
    xor_sum = np.sum(xor_result)
    hamming_loss = xor_sum / (y_true.shape[0] * y_true.shape[1])
    return hamming_loss

def multilabel_metrics(predictions, labels, mode="validation", threshold=0.5):
    sigmoid = torch.nn.Sigmoid()
    probabilities = sigmoid(torch.Tensor(predictions))

    y_pred = np.zeros(probabilities.shape)
    y_pred[np.where(probabilities >= threshold)] = 1

    y_true = np.zeros(labels.shape)
    y_true[np.where(labels == 1)] = 1

    confusion_matrix = multilabel_confusion_matrix(y_true, y_pred)

    print("Confusion Matrix")
    print(confusion_matrix)

    label_metrics = {}
    classes = ['Age', 'Gender', 'Physical', 'Race', 'Religion', 'Others']

    for i, class_name in enumerate(classes):
        metrics = calculate_metrics(confusion_matrix[i])
        label_metrics[class_name] = metrics

        precision = metrics['Precision'] * 100
        recall = metrics['Recall'] * 100
        f1 = metrics['F1-Score'] * 100

        print(f"{class_name}")
        print(f"    Precision: {precision:.2f}%")
        print(f"    Recall: {recall:.2f}%")
        print(f"    F-Measure: {f1:.2f}%")

    label_metrics['hamming_loss'] = hamming_loss(y_true, y_pred)
    print(f"\nHamming Loss ({mode}): {label_metrics['hamming_loss']}")

    return label_metrics


In [28]:
def get_cm_eval(y_pred, y_true): # TN, FP, FN, TP

    cm_eval = []

    for idx, label in enumerate(y_pred):
         
        if   y_pred[idx] == 0 and y_true[idx] == 0:
            cm_eval.append("TN") 
        elif y_pred[idx] == 1 and y_true[idx] == 0:
            cm_eval.append("FP") 
        elif y_pred[idx] == 0 and y_true[idx] == 1:
            cm_eval.append("FN") 
        elif y_pred[idx] == 1 and y_true[idx] == 1:
            cm_eval.append("TP") 

    return cm_eval
    

def test_model(model, test_dataset, labels=LABELS):

    model.eval()
    threshold = 0.5

    # initialize dataframe for each labels
    header_row = ['ID', 'Text', 'Actual Labels', 'Predicted Labels', 'Evaluation']
    df_set = {}
    for label in labels:
        df_set[label] = pd.DataFrame(columns=header_row) 

    # get rows
    for input_id, data in test_dataset.iterrows():

            input_sentence = data['Text']
            encoded_data = encode_data(data)

            # get true labels
            true_labels = encoded_data['labels']
            y_true = np.zeros(true_labels.shape)
            y_true[np.where(true_labels >= threshold)] = 1

            # get predictions
            with torch.no_grad():
                outputs = model(input_ids=encoded_data['input_ids'], attention_mask=encoded_data['attention_mask'])

            logits = outputs.logits
            sigmoid = torch.nn.Sigmoid()
            probabilities = sigmoid(logits.squeeze().cpu())
            y_pred = np.zeros(probabilities.shape)
            y_pred[np.where(probabilities >= threshold)] = 1

             # get evaluation
            for idx, label in enumerate(labels):

                df_row = [input_id, input_sentence]

                if   y_pred[idx] == 0 and y_true[idx] == 0:
                    y_eval = "TN" 
                elif y_pred[idx] == 1 and y_true[idx] == 0:
                    y_eval = "FP" 
                elif y_pred[idx] == 0 and y_true[idx] == 1:
                    y_eval = "FN" 
                elif y_pred[idx] == 1 and y_true[idx] == 1:
                    y_eval = "TP"  

                df_row.append(y_true[idx].astype(int))
                df_row.append(y_pred[idx].astype(int))
                df_row.append(y_eval)

                print(df_row)
                df_set[label].loc[len(df_set[label])] = df_row

            print()

    return df_set

# save dataframe to csv for experiment 1
df_set = test_model(trainer.model, dataset['test'])

NameError: name 'encode_data' is not defined

In [103]:
def get_metrics(eval_count: dict):
    TP = eval_count['TP'] # True Positives
    FP = eval_count['FP']  # False Positives
    FN = eval_count['FN']  # False Negatives
    TN = eval_count['TN']  # True Negatives

    precision = TP / (TP + FP) if TP + FP > 0 else 0
    recall = TP / (TP + FN) if TP + FN > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if precision + recall > 0 else 0

    return precision, recall, f1_score

# Experiment Paper 2
def exp2(df_set, labels=LABELS):
    
    header_row = ['Label', 'True Positives (TP)', 'True Negatives (TN)', 'False Positives (FP)', 'False Negatives (FN)', 'Precision', 'Recall', 'F-measure']
    exp2_df = pd.DataFrame(columns=header_row)

    for idx, label in enumerate(labels):

        # evaluation count for each label
        eval_count = {
            'TP' : 0,
            'TN' : 0,
            'FP' : 0,
            'FN' : 0
        }

        label_eval_column = df_set[label]['Evaluation']

        for eval in label_eval_column:
            eval_count[eval] += 1
        
        precision, recall, f1_score = get_metrics(eval_count) 

        exp2_df.loc[len(exp2_df)] = [
            label, 
            eval_count['TP'],
            eval_count['TN'],
            eval_count['FP'],
            eval_count['FN'],
            precision,
            recall,
            f1_score
        ]
    
    return exp2_df

exp2_df = exp2(df_set, labels=LABELS)
exp2_df

Unnamed: 0,Label,True Positives (TP),True Negatives (TN),False Positives (FP),False Negatives (FN),Precision,Recall,F-measure
0,Age,123,489,11,7,0.91791,0.946154,0.931818
1,Gender,118,481,15,16,0.887218,0.880597,0.883895
2,Physical,112,465,17,36,0.868217,0.756757,0.808664
3,Race,93,507,14,16,0.869159,0.853211,0.861111
4,Religion,112,511,5,2,0.957265,0.982456,0.969697
5,Others,73,519,15,23,0.829545,0.760417,0.793478
