In [1]:
!pip install datasets



In [2]:
#Load the Dataset
from datasets import load_dataset
import pandas as pd
import random
import re
import torch
import os

In [3]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# Load the 'us_bills' subset
dataset = load_dataset("pile-of-law/pile-of-law", "us_bills")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/pile-of-law--pile-of-law/c1090502f95031ebfad49ede680394da5532909fa46b7a0452be8cddecc9fa60
INFO:datasets.info:Loading Dataset Infos from /root/.cache/huggingface/modules/datasets_modules/datasets/pile-of-law--pile-of-law/c1090502f95031ebfad49ede680394da5532909fa46b7a0452be8cddecc9fa60
Overwrite dataset info from restored data version if exists.
INFO:datasets.builder:Overwrite dataset info from restored data version if exists.
Loading Dataset info from /root/.cache/huggingface/datasets/pile-of-law__

In [7]:
# Number of bills
len(set(dataset['train']['text']))

84362

In [None]:
#dataset['train']['text'][10]

In [6]:
def clean_text(bills):
    clean_bills = []
    for bill in bills:
      # Remove all newline and tab characters
      text = bill.replace('\n', ' ').replace('\t', ' ')
      # Remove sequences of exactly three lowercase letters
      text = re.sub(r'\[([a-z]{1,3})\]', ' ', text, flags=re.IGNORECASE)
      clean_bills.append(text.lower())

    return clean_bills

In [None]:
print(clean_text(dataset['train']['text'][:2]))

['    113 s2875 is: national guard investigations transparency and improvement act of 2014 u.s. senate 2014-09-18 text/xml en pursuant to title 17 section 105 of the united states code, this file is not subject to copyright protection and is in the public domain.      ii   113th congress2d session   s. 2875   in the senate of the united states       september 18, 2014    mr. begich introduced the following bill; which was read twice and referred to the committee on armed services      a bill   to codify in law the establishment and duties of the office of complex administrative     investigations in the national guard bureau, and for other purposes.    1.short titlethis act may be cited as the national guard investigations transparency and improvement act of 2014.2.codification in law of establishment and duties of the office of complex administrative     investigations in the national guard bureau(a)in generalthere is in the office of the chief of the national guard bureau the office 

In [None]:
dataset['train']['text'][:2]

['\n\t\n\n113 S2875 IS: National Guard Investigations Transparency and Improvement Act of 2014\nU.S. Senate\n2014-09-18\ntext/xml\nEN\nPursuant to Title 17 Section 105 of the United States Code, this file is not subject to copyright protection and is in the public domain.\n\n\n\n\t\tII\n\t\t113th CONGRESS2d Session\n\t\tS. 2875\n\t\tIN THE SENATE OF THE UNITED STATES\n\t\t\n\t\t\tSeptember 18, 2014\n\t\t\tMr. Begich introduced the following bill; which was read twice and referred to the Committee on Armed Services\n\t\t\n\t\tA BILL\n\t\tTo codify in law the establishment and duties of the Office of Complex Administrative\n\t\t\t Investigations in the National Guard Bureau, and for other purposes.\n\t\n\t1.Short titleThis Act may be cited as the National Guard Investigations Transparency and Improvement Act of 2014.2.Codification in law of establishment and duties of the Office of Complex Administrative\n\t\t\t Investigations in the National Guard Bureau(a)In generalThere is in the Offi

In [7]:
# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

In [8]:
def predict_next_word(text, model=model, tokenizer=tokenizer):
    """
    Predicts the next word for a given text using GPT-2.
    """
    # Tokenize the input text with truncation to max_length
    input_ids = text #.unsqueeze(0) #tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=1024)

    #### print("Predict next word, input shape: ", input_ids.shape)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #print(device)
    #print("input_ids device: ", input_ids.device)
    # Generate logits
    with torch.no_grad():
        outputs = model(input_ids.to(device))
        logits = outputs.logits

    # Predict the next token
    predicted_token_id = torch.argmax(logits[:, -1, :], dim=-1).item()
    predicted_token = tokenizer.decode(predicted_token_id)

    return predicted_token


    # Predict the next token
    predicted_token_id = torch.argmax(logits[:, -1, :], dim=-1).item()
    predicted_token = tokenizer.decode(predicted_token_id)

    return predicted_token


In [9]:
def predict_every_n_words(bill_text, bill_number, model, interval_min=20, interval_max=40):
    words = tokenizer.encode(bill_text, return_tensors="pt", truncation=True, max_length=1024) #bill_text.split()
    n = words.shape[1]
    results = []

    #### print(words.shape)
    #### print(words)

    random.seed(42)

    # Start from the first 20 words and predict every 20-40 words
    i = 15
    while i < n and i < 1024:
        # Prepare the input chunk (first i words)
        text_chunk = words[:,:i] #' '.join(words[:i])
        actual_next_word = words[:,i]
        #### print("Current text chunk: ",  tokenizer.decode(text_chunk[0], skip_special_tokens=True))
        # Predict the next word
        predicted_next_word = predict_next_word(text_chunk, model=model)
        english_actual_next_word = tokenizer.decode(actual_next_word, skip_special_tokens=True)
        # Store the results: actual, predicted, and the number of words in the input text
        res = {
            "bill_number": bill_number,
            "input_text_length": i,
            "actual_next_word": english_actual_next_word,
            "predicted_next_word": predicted_next_word,
            "correct_prediction": english_actual_next_word.lower() == predicted_next_word.lower()
        }
        results.append(res)

        #### print("Appending row: ", res)

        # Move to the next interval of 20-40 words
        interval = random.randint(interval_min, interval_max)
        i += interval

    return results


In [10]:
def evaluate_bills(bills, model):
    overall_predictions = []
    total_correct = 0
    total_total = 0

    for idx, bill_text in enumerate(bills):
        print(f"Processing bill {idx} of length {len(bill_text)} characters...")

        # Get the results from predict_every_n_words
        results = predict_every_n_words(bill_text, idx, model=model)
        display(pd.DataFrame(results))
        # Extend the overall predictions list with the results from this bill
        overall_predictions.extend(results)

        # Calculate accuracy for the current bill
        correct_predictions = sum(1 for result in results if result['correct_prediction'])
        bill_accuracy = correct_predictions / len(results) if len(results) > 0 else 0

        print(f"Bill {idx} accuracy: {bill_accuracy * 100:.2f}%\n")

        # Track the total correct and total predictions
        total_correct += correct_predictions
        total_total += len(results)

    df_overall_predictions = pd.DataFrame(overall_predictions) #dataframe

    # Calculate overall accuracy for all bills
    overall_accuracy = total_correct / total_total if total_total > 0 else 0
    print(f"Overall accuracy for all bills: {overall_accuracy * 100:.2f}%")

    return df_overall_predictions


In [None]:
# Clean text for both train and validation splits
#cleaned_train = clean_text(dataset["train"]["text"])
cleaned_validation = clean_text(dataset["validation"]["text"][:10]) #subset - 5 samples only

df_predictions_bills = evaluate_bills(cleaned_validation, model)

Processing bill 0 of length 6705 characters...


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)

In [None]:
df_predictions_bills2 = evaluate_bills(cleaned_validation, model)

Processing bill 0 of length 48228 characters...


Unnamed: 0,bill_number,input_text_length,actual_next_word,predicted_next_word,correct_prediction
0,0,15,the,the,True
1,0,55,the,the,True
2,0,78,i,,False
3,0,98,united,united,True
4,0,126,second,second,True
5,0,153,,,True
6,0,180,budget,,False
7,0,204,for,for,True
8,0,227,(,,False
9,0,264,1,1,True


Bill 0 accuracy: 57.14%

Overall accuracy for all bills: 57.14%


In [13]:
#### Fine Tuning ####

In [11]:
!pip install wandb



In [12]:
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import Dataset
from torch.utils.data import DataLoader

In [13]:
# Clean text for both train and validation splits
cleaned_train = clean_text(dataset["train"]["text"][:2])
cleaned_validation = clean_text(dataset["validation"]["text"][1:2]) #subset - 5 samples only
os.environ["WANDB_DISABLED"] = "true"

In [14]:
# Custom function to generate chunks from documents
def generate_chunks_from_text(text, min_length=100, max_length=1024, tokenizer = tokenizer):
    print("I be")
    tokens = tokenizer.encode(text, truncation=False, padding=False)  # Encode without truncation
    print("Doubting")
    chunks = []
    while len(tokens) >= min_length:
        # Generate a random chunk length between min_length and max_length (or remaining tokens)
        chunk_length = random.randint(min_length, min(max_length, len(tokens)))  # Random length for each chunk
        chunk = tokens[:chunk_length]
        chunks.append(chunk)
        tokens = tokens[chunk_length:]  # Remove the processed chunk from the tokens
    print("Generated n chunks: ", len(chunks))
    return chunks

# Function to manually expand the dataset by adding multiple rows for each document
def split_into_chunks(train_bills, min_length=100, max_length=1024, tokenizer=tokenizer):
    all_input_ids = []
    all_labels = []
    all_attention_masks = []  # To store attention masks

    for text in train_bills:
        # Split the document into multiple chunks
        chunks = generate_chunks_from_text(text, min_length, max_length, tokenizer)
        for chunk in chunks:
            all_input_ids.append(chunk)
            all_labels.append(chunk)  # For causal language modeling, labels are the same as input_ids

            # Generate attention mask: 1 for real tokens, 0 for padding (for now, we assume no padding in chunks)
            attention_mask = [1] * len(chunk)
            all_attention_masks.append(attention_mask)

    print("Raw input ids and label ids dims:", len(all_input_ids), " x ", len(all_input_ids[0]), " or ", len(all_input_ids[1]), ' and ', len(all_labels), ' x ', len(all_labels[0]), ' or ', len(all_labels[1]))

    # Now, pad the sequences to ensure they have consistent lengths
    input_ids_padded = tokenizer.pad(
        {"input_ids": all_input_ids},  # Only need to pad input_ids
        padding='max_length',  # Pad to the longest sequence in the batch
        max_length=max_length,  # Set max length
        return_tensors="pt"  # Return as pytorch tensors
    )

    # Pad labels as well
    labels_padded = tokenizer.pad(
        {"input_ids": all_labels},  # Same padding as input_ids
        padding='max_length',
        max_length=max_length,
        return_tensors="pt"
    )

    # Pad the attention masks (1 for real tokens, 0 for padding)
    attention_masks_padded = tokenizer.pad(
        {"input_ids": all_attention_masks},  # Same padding for attention masks
        padding='max_length',
        max_length=max_length,
        return_tensors="pt"
    )
    print("In split into chunks, size of all 3 columns: ", input_ids_padded['input_ids'].shape)
    # Return the padded dataset with attention masks
    return {
        "input_ids": input_ids_padded["input_ids"],
        "labels": labels_padded["input_ids"],
        "attention_mask": attention_masks_padded["input_ids"]  # Add the attention mask
    }

# Function to train the model with explicit parameters
def train_model(clean_tr, clean_val, tokenizer, model, epochs=3, output_dir="./drive/MyDrive/nlp_proj_results"):
    # Step 1: Clean the text data
    train_bills = clean_tr
    val_bills = clean_val

    # Set padding token to eos_token
    tokenizer.pad_token = tokenizer.eos_token

    # Step 2: Tokenize the cleaned data
    # random.seed(42)
    # def tokenize_function(examples):
    #     #return tokenizer(examples['text'], return_tensors="pt", truncation=True, padding="max_length", max_length=1024)
    #     min_length = 100
    #     max_length = 1024
    #     all_input_ids = []
    #     all_labels = []

    #     for text in examples['text']:
    #         tokens = tokenizer.encode(text, truncation=False, padding=False)  # Encode without truncation
    #         text_length = len(tokens)

    #         # Split the document into multiple chunks of random lengths
    #         chunks = []
    #         while len(tokens) >= min_length:
    #             # Generate a random chunk length between min_length and max_length (or remaining tokens)
    #             random_max_length = random.randint(min_length, min(max_length, len(tokens)))  # Random length for each chunk
    #             chunk = tokens[:random_max_length]
    #             chunks.append(chunk)
    #             tokens = tokens[random_max_length:]  # Remove the processed chunk from the tokens

    #         # Append the chunks for this text
    #         for chunk in chunks:
    #             all_input_ids.append(chunk)  # Add the chunk as an input example
    #             all_labels.append(chunk)

    #     # Return the tokenized chunks as input_ids
    #     res = {
    #         "input_ids": all_input_ids,
    #         "labels": all_labels  # For causal language modeling, labels are the same as input_ids
    #     }
    #     print("Tokenize fn returning res: ", len(res['input_ids']), 'by ', len(res['input_ids'][1]))
    #     #print("Input ids of shape: ", )
    #     return res

    # Convert lists into dataset format
    train_dataset = Dataset.from_dict({"text": train_bills})
    #val_dataset = Dataset.from_dict({"text": val_bills})

    # Tokenize the datasets
    train_dataset = Dataset.from_dict(split_into_chunks(train_bills, min_length=100, max_length=1024))
    #print("Train dataset size: ", train_dataset['input_ids'].shape)
    print("Train dataset: ", train_dataset)
    #train_dataset = train_dataset.map(tokenize_function, batched=True)
    #val_dataset = val_dataset.map(tokenize_function, batched=True)

    # Remove original text column since we now have tokenized data
    #train_dataset = train_dataset.remove_columns(["text"])
    #val_dataset = val_dataset.remove_columns(["text"])

    data_collator = DataCollatorForLanguageModeling(
      tokenizer=tokenizer,
      mlm=False  # This is not a masked language model task, so set to False
    )

    # Step 3: Define Training Arguments
    training_args = TrainingArguments(
        output_dir=output_dir,              # Directory to save model checkpoints
        #evaluation_strategy="epoch",         # Evaluate after every epoch
        #learning_rate=learning_rate,        # Learning rate
        #per_device_train_batch_size=batch_size,        # Batch size per device (adjust based on GPU memory)
        #per_device_eval_batch_size=batch_size,         # Batch size for evaluation
        num_train_epochs=epochs,                   # Number of training epochs
        #weight_decay=0.01,                    # Weight decay for regularization
        logging_dir="./logs",                 # Directory to save logs
        logging_steps=100,                    # Log every 100 steps
    )

    # Step 4: Initialize the Trainer
    trainer = Trainer(
        model=model,                          # The pre-trained model
        args=training_args,                   # The training arguments
        data_collator=data_collator,          # Data collator for batching
        train_dataset=train_dataset,          # The training dataset
    )

    #train_dataloader = DataLoader(train_dataset, batch_size=16, collate_fn=data_collator)
    #print("Train dataloader: ", train_dataloader.keys())
    # Inspect the first batch
    # for batch in train_dataloader:
    #     print(f"Batch keys: {batch.keys()}")  # Check the keys in the batch
    #     print(f"Batch input_ids shape: {batch['input_ids'].shape}")
    #     print(f"Batch attention_mask shape: {batch['attention_mask'].shape}")
    #     break  # Only check the first batch

    # print("Train dataset: ", train_dataloader)
    # # Fetch and print the first batch
    # for batch in train_dataloader:
    #     # print(f"Batch input:")

    #     # # Print input_ids
    #     # print("input_ids:", batch['input_ids'])
    #     # print("Shape of input_ids:", batch['input_ids'].shape)

    #     # # Decode the input_ids to see the actual text
    #     # decoded_inputs = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch['input_ids']]
    #     # print("Decoded inputs:", decoded_inputs)

    #     # # Print attention_mask (if it exists)
    #     # if 'attention_mask' in batch:
    #     #     print("attention_mask:", batch['attention_mask'])

    #     # # Print labels (they should be the same as input_ids for causal LM)
    #     # print("labels:", batch['labels'])
    #     # decoded_labels = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch['labels']]
    #     # print("Decoded labels:", decoded_labels)

    #     print("="*50)  # Separator for batch prints

    # Step 5: Start training
    trainer.train()

    # Step 6: Evaluate the model after training
    # eval_results = trainer.evaluate()
    # print(f"Evaluation Results: {eval_results}")

    # Step 7: Save the fine-tuned model
    model.save_pretrained(output_dir)
    #tokenizer.save_pretrained(output_dir)

    # Return only the fine-tuned model (no need to return the tokenizer)
    return model

In [15]:
#torch.cuda.empty_cache()
fine_tuned_gpt2 = train_model(cleaned_train, cleaned_validation, tokenizer, model, epochs=3)

Token indices sequence length is longer than the specified maximum sequence length for this model (1094 > 1024). Running this sequence through the model will result in indexing errors
Failed to cast a sequence to int8. Falling back to int64.
INFO:datasets.arrow_writer:Failed to cast a sequence to int8. Falling back to int64.
Failed to cast a sequence to int8. Falling back to int64.
INFO:datasets.arrow_writer:Failed to cast a sequence to int8. Falling back to int64.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


I be
Doubting
Generated n chunks:  2
I be
Doubting
Generated n chunks:  3
Raw input ids and label ids dims: 5  x  992  or  101  and  5  x  992  or  101
In split into chunks, size of all 3 columns:  torch.Size([5, 1024])
Train dataset:  Dataset({
    features: ['input_ids', 'labels', 'attention_mask'],
    num_rows: 5
})


Step,Training Loss


In [16]:
df_predictions_bills_finetuned = evaluate_bills(cleaned_validation, model=fine_tuned_gpt2)

Processing bill 0 of length 48228 characters...


Unnamed: 0,bill_number,input_text_length,actual_next_word,predicted_next_word,correct_prediction
0,0,15,the,fiscal,False
1,0,55,the,the,True
2,0,78,i,,False
3,0,98,united,united,True
4,0,126,second,second,True
5,0,153,,,True
6,0,180,budget,,False
7,0,204,for,for,True
8,0,227,(,,False
9,0,264,1,1,True


Bill 0 accuracy: 57.14%

Overall accuracy for all bills: 57.14%


In [1]:
# Experimenting to see the training input for fine tuning model

In [None]:
Tokenize fn returning res:  {'input_ids': [[220, 220, 220, 17318, 264, 2078, 2425, 318, 25, 2260, 4860, 10986, 13902, 290, 9025, 719, 286, 1946, 334, 13, 82, 13, 34548, 1946, 12, 2931, 12, 1507, 2420, 14, 19875, 551, 12997, 284, 3670, 1596, 2665, 13343, 286, 262, 16503, 2585, 2438, 11, 428, 2393, 318, 407, 2426, 284, 6634, 4800, 290, 318, 287, 262, 1171, 7386, 13, 220, 220, 220, 220, 220, 21065, 220, 220, 17318, 400, 8681, 17, 67, 6246, 220, 220, 264, 13, 2579, 2425, 220, 220, 287, 262, 34548, 286, 262, 16503, 2585, 220, 220, 220, 220, 220, 220, 384, 457, 1491, 1248, 11, 1946, 220, 220, 220, 285, 81, 13, 4123, 488, 5495, 262, 1708, 2855, 26, 543, 373, 1100, 5403, 290, 6412, 284, 262, 5583, 319, 6936, 2594, 220, 220, 220, 220, 220, 257, 2855, 220, 220, 284, 14873, 1958, 287, 1099, 262, 9323, 290, 10741, 286, 262, 2607, 286, 3716, 11553, 220, 220, 220, 220, 10986, 287, 262, 2260, 4860, 10583, 11, 290, 329, 584, 4959, 13, 220, 220, 220, 352, 13, 19509, 3670, 5661, 719, 743, 307, 9181, 355, 262, 2260, 4860, 10986, 13902, 290, 9025, 719, 286, 1946, 13, 17, 13, 19815, 2649, 287, 1099, 286, 9323, 290, 10741, 286, 262, 2607, 286, 3716, 11553, 220, 220, 220, 220, 10986, 287, 262, 2260, 4860, 10583, 7, 64, 8, 259, 2276, 8117, 318, 287, 262, 2607, 286, 262, 4039, 286, 262, 2260, 4860, 10583, 262, 2607, 286, 3716, 220, 220, 220, 220, 11553, 10986, 357, 259, 428, 2665, 6412, 284, 355, 262, 2607, 737, 7, 65, 8, 37295, 290, 20865, 1169, 2607, 2236, 307, 739, 262, 4571, 290, 20865, 286, 262, 4039, 286, 262, 2260, 4860, 220, 220, 220, 220, 10583, 12195, 66, 8, 67, 8249, 7, 16, 8, 259, 2276, 1169, 10741, 286, 262, 2607, 2236, 307, 284, 25340, 3716, 11553, 10986, 286, 6067, 220, 220, 220, 220, 11270, 284, 1866, 286, 262, 2260, 4860, 618, 287, 1181, 3722, 11, 1390, 220, 220, 220, 220, 10986, 286, 3206, 4641, 7411, 257, 2888, 286, 262, 2260, 4860, 220, 220, 220, 220, 287, 884, 3722, 11, 2402, 262, 2581, 286, 597, 286, 262, 1708, 37498, 64, 8, 1169, 4039, 286, 262, 2260, 4860, 10583, 12195, 65, 8, 272, 9224, 315, 415, 2276, 286, 257, 1181, 393, 7674, 393, 262, 4783, 286, 951, 2178, 544, 12195, 66, 8, 1169, 8153, 220, 286, 257, 1181, 393, 7674, 11, 393, 262, 25771, 2276, 286, 262, 2260, 4860, 286, 262, 220, 220, 220, 220, 4783, 286, 951, 2178, 544, 12195, 17, 8, 41887, 11553, 10986, 1640, 4959, 286, 428, 8371, 11, 257, 3716, 11553, 3645, 318, 597, 3645, 357, 292, 220, 220, 220, 220, 7368, 416, 262, 4039, 286, 262, 2260, 4860, 10583, 329, 4959, 220, 220, 220, 220, 286, 428, 2665, 8, 7411, 5087, 3501, 4485, 284, 8468, 13357, 287, 220, 220, 220, 220, 3645, 11, 1390, 262, 1708, 37498, 64, 8, 6138, 507, 286, 12934, 1022, 262, 16503, 2585, 290, 257, 1181, 393, 7674, 12195, 65, 8, 6759, 1010, 10616, 16976, 3047, 1871, 10240, 3790, 12195, 66, 8, 6759, 1010, 8620, 262, 761, 329, 281, 4795, 3645, 287, 1502, 284, 4155, 22692, 290, 220, 220, 220, 220, 32521, 414, 287, 3645, 12195, 18, 8, 6759, 1010, 11270, 284, 1866, 286, 262, 2260, 4860, 287, 1181, 1185, 436, 258, 12123, 1771, 393, 407, 257, 2300, 18436, 284, 257, 2888, 286, 262, 2260, 4860, 618, 287, 1181, 220, 220, 220, 220, 3722, 329, 4959, 286, 428, 2665, 2236, 307, 925, 416, 262, 4039, 286, 220, 220, 220, 220, 262, 2260, 4860, 10583, 287, 10213, 351, 9987, 7368, 416, 262, 220, 220, 220, 220, 4039, 286, 262, 2260, 4860, 10583, 220, 329, 4959, 286, 428, 2665, 12195, 67, 8, 17351, 286, 2260, 220, 4860, 10583, 3513, 286, 2457, 989, 301, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 220, 2190, 597, 2457, 989, 286, 262, 2607, 319, 257, 2300, 220, 220, 220, 220, 220, 739, 428, 2665, 355, 611, 884, 989, 547, 262, 989, 286, 281, 24110, 220, 220, 220, 220, 2276, 286, 262, 5011, 286, 3761, 393, 257, 2422, 5011, 319, 884, 220, 220, 220, 220, 2300, 12195, 68, 8, 48922, 284, 8681, 7, 16, 8, 7266, 20124, 282, 286, 2457, 3136, 284, 11702, 8570, 602, 27287, 262, 12695, 416, 262, 2607, 286, 257, 2457, 989, 319, 281, 3645, 21846, 416, 262, 2607, 220, 220, 220, 220, 12997, 284, 428, 2665, 11, 262, 4039, 286, 262, 2260, 4860, 10583, 2236, 220, 220, 220, 220, 9199, 884, 989, 357, 4480, 597, 7620, 13720, 1321, 220, 220, 220, 220, 20431, 44740, 8],
 [284, 262, 1866, 286, 8681, 422, 262, 1181, 393, 220, 220, 220, 220, 7674, 5213, 12195, 17, 8, 1236, 723, 989, 301, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 9199, 284, 8681, 1123, 614, 257, 989, 319, 262, 220, 220, 220, 220, 10986, 21846, 416, 262, 2607, 12997, 284, 428, 2665, 1141, 220, 220, 220, 220, 262, 18148, 614, 13, 1123, 989, 2236, 2291, 11, 329, 262, 614, 5017, 416, 220, 220, 220, 220, 884, 989, 11, 220, 262, 1708, 37498, 64, 8, 64, 10638, 6764, 286, 262, 220, 220, 220, 220, 10986, 21846, 1141, 884, 614, 11, 220, 220, 220, 220, 1390, 597, 11257, 287, 6067, 2426, 284, 3645, 290, 287, 6373, 220, 220, 220, 220, 355, 257, 1255, 286, 10986, 12195, 65, 8, 17018, 11],
  [900, 6071, 416, 1181, 290, 7674, 11, 319, 262, 10986, 21846, 1141, 884, 614, 220, 220, 220, 220, 7411, 7636, 286, 3206, 4641, 7411, 257, 2888, 286, 262, 2260, 220, 220, 220, 220, 4860, 12195, 66, 8, 10508, 584, 1321, 290, 6067, 319, 262, 10986, 21846, 1141, 884, 614, 355, 262, 4039, 220, 220, 220, 220, 286, 262, 2260, 4860, 10583, 14358, 5035, 12195, 69, 8, 6259, 4954, 290, 584, 1451, 397, 6392, 6386, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 4155, 326, 262, 2607, 16047, 262, 8213, 220, 220, 220, 220, 290, 584, 9889, 3306, 329, 262, 17655, 286, 262, 10741],
   [286, 262, 220, 220, 220, 220, 2607, 739, 428, 2665, 12195, 70, 8, 1676, 771, 942, 290, 12064, 301, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 2071, 11, 290, 743, 422, 640, 284, 640, 4296, 11, 220, 220, 220, 220, 9021, 290, 7729, 3306, 329, 262, 17655, 286, 262, 10741, 286, 220, 220, 220, 220, 262, 2607, 739, 428, 2665, 12195, 71, 8, 45956, 282, 286, 22754, 15395, 12064, 17351, 286, 262, 2260, 4860, 10583, 12064, 269, 782, 8482, 657, 7029, 13, 486, 11, 14567, 474, 2062, 1542, 11, 2321, 11, 2236, 423, 645, 220, 220, 220, 220, 2252, 2700, 393, 1245, 13]],
                             'labels': [[220, 220, 220, 17318, 264, 2078, 2425, 318, 25, 2260, 4860, 10986, 13902, 290, 9025, 719, 286, 1946, 334, 13, 82, 13, 34548, 1946, 12, 2931, 12, 1507, 2420, 14, 19875, 551, 12997, 284, 3670, 1596, 2665, 13343, 286, 262, 16503, 2585, 2438, 11, 428, 2393, 318, 407, 2426, 284, 6634, 4800, 290, 318, 287, 262, 1171, 7386, 13, 220, 220, 220, 220, 220, 21065, 220, 220, 17318, 400, 8681, 17, 67, 6246, 220, 220, 264, 13, 2579, 2425, 220, 220, 287, 262, 34548, 286, 262, 16503, 2585, 220, 220, 220, 220, 220, 220, 384, 457, 1491, 1248, 11, 1946, 220, 220, 220, 285, 81, 13, 4123, 488, 5495, 262, 1708, 2855, 26, 543, 373, 1100, 5403, 290, 6412, 284, 262, 5583, 319, 6936, 2594, 220, 220, 220, 220, 220, 257, 2855, 220, 220, 284, 14873, 1958, 287, 1099, 262, 9323, 290, 10741, 286, 262, 2607, 286, 3716, 11553, 220, 220, 220, 220, 10986, 287, 262, 2260, 4860, 10583, 11, 290, 329, 584, 4959, 13, 220, 220, 220, 352, 13, 19509, 3670, 5661, 719, 743, 307, 9181, 355, 262, 2260, 4860, 10986, 13902, 290, 9025, 719, 286, 1946, 13, 17, 13, 19815, 2649, 287, 1099, 286, 9323, 290, 10741, 286, 262, 2607, 286, 3716, 11553, 220, 220, 220, 220, 10986, 287, 262, 2260, 4860, 10583, 7, 64, 8, 259, 2276, 8117, 318, 287, 262, 2607, 286, 262, 4039, 286, 262, 2260, 4860, 10583, 262, 2607, 286, 3716, 220, 220, 220, 220, 11553, 10986, 357, 259, 428, 2665, 6412, 284, 355, 262, 2607, 737, 7, 65, 8, 37295, 290, 20865, 1169, 2607, 2236, 307, 739, 262, 4571, 290, 20865, 286, 262, 4039, 286, 262, 2260, 4860, 220, 220, 220, 220, 10583, 12195, 66, 8, 67, 8249, 7, 16, 8, 259, 2276, 1169, 10741, 286, 262, 2607, 2236, 307, 284, 25340, 3716, 11553, 10986, 286, 6067, 220, 220, 220, 220, 11270, 284, 1866, 286, 262, 2260, 4860, 618, 287, 1181, 3722, 11, 1390, 220, 220, 220, 220, 10986, 286, 3206, 4641, 7411, 257, 2888, 286, 262, 2260, 4860, 220, 220, 220, 220, 287, 884, 3722, 11, 2402, 262, 2581, 286, 597, 286, 262, 1708, 37498, 64, 8, 1169, 4039, 286, 262, 2260, 4860, 10583, 12195, 65, 8, 272, 9224, 315, 415, 2276, 286, 257, 1181, 393, 7674, 393, 262, 4783, 286, 951, 2178, 544, 12195, 66, 8, 1169, 8153, 220, 286, 257, 1181, 393, 7674, 11, 393, 262, 25771, 2276, 286, 262, 2260, 4860, 286, 262, 220, 220, 220, 220, 4783, 286, 951, 2178, 544, 12195, 17, 8, 41887, 11553, 10986, 1640, 4959, 286, 428, 8371, 11, 257, 3716, 11553, 3645, 318, 597, 3645, 357, 292, 220, 220, 220, 220, 7368, 416, 262, 4039, 286, 262, 2260, 4860, 10583, 329, 4959, 220, 220, 220, 220, 286, 428, 2665, 8, 7411, 5087, 3501, 4485, 284, 8468, 13357, 287, 220, 220, 220, 220, 3645, 11, 1390, 262, 1708, 37498, 64, 8, 6138, 507, 286, 12934, 1022, 262, 16503, 2585, 290, 257, 1181, 393, 7674, 12195, 65, 8, 6759, 1010, 10616, 16976, 3047, 1871, 10240, 3790, 12195, 66, 8, 6759, 1010, 8620, 262, 761, 329, 281, 4795, 3645, 287, 1502, 284, 4155, 22692, 290, 220, 220, 220, 220, 32521, 414, 287, 3645, 12195, 18, 8, 6759, 1010, 11270, 284, 1866, 286, 262, 2260, 4860, 287, 1181, 1185, 436, 258, 12123, 1771, 393, 407, 257, 2300, 18436, 284, 257, 2888, 286, 262, 2260, 4860, 618, 287, 1181, 220, 220, 220, 220, 3722, 329, 4959, 286, 428, 2665, 2236, 307, 925, 416, 262, 4039, 286, 220, 220, 220, 220, 262, 2260, 4860, 10583, 287, 10213, 351, 9987, 7368, 416, 262, 220, 220, 220, 220, 4039, 286, 262, 2260, 4860, 10583, 220, 329, 4959, 286, 428, 2665, 12195, 67, 8, 17351, 286, 2260, 220, 4860, 10583, 3513, 286, 2457, 989, 301, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 220, 2190, 597, 2457, 989, 286, 262, 2607, 319, 257, 2300, 220, 220, 220, 220, 220, 739, 428, 2665, 355, 611, 884, 989, 547, 262, 989, 286, 281, 24110, 220, 220, 220, 220, 2276, 286, 262, 5011, 286, 3761, 393, 257, 2422, 5011, 319, 884, 220, 220, 220, 220, 2300, 12195, 68, 8, 48922, 284, 8681, 7, 16, 8, 7266, 20124, 282, 286, 2457, 3136, 284, 11702, 8570, 602, 27287, 262, 12695, 416, 262, 2607, 286, 257, 2457, 989, 319, 281, 3645, 21846, 416, 262, 2607, 220, 220, 220, 220, 12997, 284, 428, 2665, 11, 262, 4039, 286, 262, 2260, 4860, 10583, 2236, 220, 220, 220, 220, 9199, 884, 989, 357, 4480, 597, 7620, 13720, 1321, 220, 220, 220, 220, 20431, 44740, 8],
 [284, 262, 1866, 286, 8681, 422, 262, 1181, 393, 220, 220, 220, 220, 7674, 5213, 12195, 17, 8, 1236, 723, 989, 301, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 9199, 284, 8681, 1123, 614, 257, 989, 319, 262, 220, 220, 220, 220, 10986, 21846, 416, 262, 2607, 12997, 284, 428, 2665, 1141, 220, 220, 220, 220, 262, 18148, 614, 13, 1123, 989, 2236, 2291, 11, 329, 262, 614, 5017, 416, 220, 220, 220, 220, 884, 989, 11, 220, 262, 1708, 37498, 64, 8, 64, 10638, 6764, 286, 262, 220, 220, 220, 220, 10986, 21846, 1141, 884, 614, 11, 220, 220, 220, 220, 1390, 597, 11257, 287, 6067, 2426, 284, 3645, 290, 287, 6373, 220, 220, 220, 220, 355, 257, 1255, 286, 10986, 12195, 65, 8, 17018, 11],
 [900, 6071, 416, 1181, 290, 7674, 11, 319, 262, 10986, 21846, 1141, 884, 614, 220, 220, 220, 220, 7411, 7636, 286, 3206, 4641, 7411, 257, 2888, 286, 262, 2260, 220, 220, 220, 220, 4860, 12195, 66, 8, 10508, 584, 1321, 290, 6067, 319, 262, 10986, 21846, 1141, 884, 614, 355, 262, 4039, 220, 220, 220, 220, 286, 262, 2260, 4860, 10583, 14358, 5035, 12195, 69, 8, 6259, 4954, 290, 584, 1451, 397, 6392, 6386, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 4155, 326, 262, 2607, 16047, 262, 8213, 220, 220, 220, 220, 290, 584, 9889, 3306, 329, 262, 17655, 286, 262, 10741],
  [286, 262, 220, 220, 220, 220, 2607, 739, 428, 2665, 12195, 70, 8, 1676, 771, 942, 290, 12064, 301, 258, 4039, 286, 262, 2260, 4860, 10583, 2236, 2071, 11, 290, 743, 422, 640, 284, 640, 4296, 11, 220, 220, 220, 220, 9021, 290, 7729, 3306, 329, 262, 17655, 286, 262, 10741, 286, 220, 220, 220, 220, 262, 2607, 739, 428, 2665, 12195, 71, 8, 45956, 282, 286, 22754, 15395, 12064, 17351, 286, 262, 2260, 4860, 10583, 12064, 269, 782, 8482, 657, 7029, 13, 486, 11, 14567, 474, 2062, 1542, 11, 2321, 11, 2236, 423, 645, 220, 220, 220, 220, 2252, 2700, 393, 1245, 13]]}
