# Import Packages

In [1]:
!pip install transformers loralib sentencepiece 



In [2]:
import os
from os.path import join, dirname, abspath
import matplotlib.pyplot as plt
import random
import copy

import numpy as np
import pandas as pd

import torch
from torch import nn

# Options

In [3]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore
seed_everything()

In [4]:
class PATH:
    root   = '/root/court-decision-prediction'
    yaml   = join(root, 'court_decision_prediction/configs.yaml')
    data   = join(root, 'data/open')
    train  = join(data, 'train.csv')
    test   = join(data, 'test.csv')
    sample = join(data, 'sample_submission.csv')
    submit = join(root, 'submission')

# 1. Load dataset

In [222]:
train_data = pd.read_csv(PATH.train)
train_data

Unnamed: 0,ID,first_party,second_party,facts,first_party_winner
0,TRAIN_0000,Phil A. St. Amant,Herman A. Thompson,"On June 27, 1962, Phil St. Amant, a candidate ...",1
1,TRAIN_0001,Stephen Duncan,Lawrence Owens,Ramon Nelson was riding his bike when he suffe...,0
2,TRAIN_0002,Billy Joe Magwood,"Tony Patterson, Warden, et al.",An Alabama state court convicted Billy Joe Mag...,1
3,TRAIN_0003,Linkletter,Walker,Victor Linkletter was convicted in state court...,0
4,TRAIN_0004,William Earl Fikes,Alabama,"On April 24, 1953 in Selma, Alabama, an intrud...",1
...,...,...,...,...,...
2473,TRAIN_2473,"HollyFrontier Cheyenne Refining, LLC, et al.","Renewable Fuels Association, et al.",Congress amended the Clean Air Act through the...,1
2474,TRAIN_2474,"Grupo Mexicano de Desarrollo, S. A.","Alliance Bond Fund, Inc.","Alliance Bond Fund, Inc., an investment fund, ...",1
2475,TRAIN_2475,Peguero,United States,"In 1992, the District Court sentenced Manuel D...",0
2476,TRAIN_2476,Immigration and Naturalization Service,St. Cyr,"On March 8, 1996, Enrico St. Cyr, a lawful per...",0


# 2. Modeling

In [8]:
from transformers import AutoTokenizer, LlamaForCausalLM, LlamaTokenizer
import loralib as lora

# model_select = 'AlekseyKorshuk/vicuna-7b'
model_select = 'lmsys/vicuna-13b-v1.3'
tokenizer    = LlamaTokenizer.from_pretrained(model_select)
model        = LlamaForCausalLM.from_pretrained(model_select).to(torch.bfloat16).cuda()

# model.requires_grad = False
# lora.mark_only_lora_as_trainable(model)

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/585 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00003.bin:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00003.bin:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [233]:
def generate_input(first_party, second_party, facts):
    prompt = f"""
    A chat between a curious user and an artificial intelligence assistant.
    The assistant gives helpful and concise answers to the user's questions.
    
    USER: 
        - first_party: {first_party}
        - second_party: {second_party}
        - facts:
        {facts}
        
        - Question:
        Summarize actions and states of first_party and second_party with list.
            
    ASSISTANT: 
    """
#         Who wins the case, first_party or second_party?
#         Answer with listed four reasons.

    #     Tell me just the answer, without any detailed reasons.

    input_ids = tokenizer(prompt, return_tensors='pt').input_ids.cuda()
    return input_ids


def decode_output(outputs):
    rst = []
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
#     if 'first' in answer.lower():
#         rst.append(1)
#     else:
#         rst.append(0)
    rst.append(answer)
    return rst

In [234]:
with torch.no_grad():
    input_ids  = generate_input(first_party, second_party, facts)
    output_ids = model.generate(input_ids, max_new_tokens=128)
    answer     = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(answer)


    A chat between a curious user and an artificial intelligence assistant.
    The assistant gives helpful and concise answers to the user's questions.
    
    USER: 
        - first_party: Leiter Minerals, Inc.,
        - second_party: United States, The California Company, Allen L. Lobrano
        - facts:
        In December 1938, Thomas Leiter conveyed approximately 8,711 acres of land in Plaquemines Parish, Louisiana to the United States. The deed contained a mineral reservation under which Leiter retained the right to mine and remove all valuable minerals until April 1, 1945. The deed allowed for the extension of this reservation for an additional five years so long as operations were conducted profitably during the previous five years for an average of fifty days a year. If at the end of the original term or an additional extended term the operation had not carried on for fifty days a year, Leiter’s right to mine would terminate, and complete title would become vested in the 

In [226]:
for idx in range(len(train_data)):
    row = train_data.values[idx]
    _, first_party, second_party, facts, label = row
    with torch.no_grad():
        input_ids  = generate_input(first_party, second_party, facts)
        output_ids = model.generate(input_ids, max_new_tokens=64)        
        answer     = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
    print(label, answer)
    break

1 
    Second party, Herman A. Thompson, wins the case.
0 
    Second party, Lawrence Owens.
1 
    Second party (Tony Patterson, Warden, et al.) wins the case.
0 
    Based on the facts provided, it is likely that Victor Linkletter would win the case and be granted a retrial. The Mapp v. Ohio decision applied the exclusionary rule to state criminal proceedings, which means that illegally obtained evidence cannot be used at trial. Since Linkletter was conv
1 
    It is not clear from the information provided who wins the case between the first and second parties. The facts describe a criminal trial in which the defendant, William Earl Fikes, was convicted and sentenced to death. It is possible that the state, represented by Alabama, won the case, but it
1 
    It is not clear from the information provided who wins the case between the first and second parties. The information provided only states that Clarkstown sued Carbone in a New York Supreme Court, and that Carbone responded by su

1 
    It is not possible to determine who wins the case based on the information provided. The outcome of the case would depend on the specific arguments and evidence presented by both parties, as well as the decision of the court.
0 
    It is not clear from the information provided who wins the case between Gooding and Wilson. The information provided only mentions that Johnny Wilson was convicted of violating a state statute and that he successfully sought habeas corpus relief from a Georgia federal district court. It does not provide any information about the outcome
1 
    Second party (Michael H. Resh, et al.) wins the case.
1 
    Second-party (Planned Parenthood Federation of America, Inc., et al.) wins the case.
0 
    The second party, Gralike, wins the case.
0 
    The second party, Henri Tatro, et ex., wins the case.
0 
    The Supreme Court ultimately ruled in favor of the first party, Phillips.
1 
    The second party, John C. Tesmer, et al., wins the case.
1 
    The se

KeyboardInterrupt: 

In [183]:
# from torch.utils.data import Dataset, DataLoader

# class FactsDataset(Dataset):
#     def __init__(self, data):
#         self.data = data.values
#     def __len__(self):
#         return len(self.data)
#     def __getitem__(self, idx):
#         row = self.data[idx]
#         first_party  = row[1]
#         second_party = row[2]
#         facts        = row[3]
#         label        = row[4]
#         input_ids    = tokenizer(self._get_prompt(first_party, second_party, facts), return_tensors='pt').input_ids.cuda()
#         return input_ids, label
    
#     @staticmethod
#     def _get_prompt(first_party, second_party, facts):
#         return f"""
#             A chat between a curious user and an artificial intelligence assistant.
#             The assistant gives helpful and concise answers to the user's questions.

#             USER: 
#                 - first_party: {first_party}
#                 - second_party: {second_party}
#                 - facts:
#                 {facts}

#                 - Question:
#                 Who wins the case, first_party or second_party?
#                 Tell me just the answer, without any detailed reasons.

#             ASSISTANT: 
#             """

# ds = FactsDataset(train)
# # dl = DataLoader(ds, batch_size=1, shuffle=True)

---