In [1]:
import tqdm, json, os, re, operator
import string, pprint, itertools, seaborn, collections, numpy as np, csv, requests, nltk, random
import JudgmentsProcessor
import JudgmentsClassifier
import pickle

In [2]:
def process_judgment(judgment):
    data_to_query = (",").join(judgment)
    r = requests.post(data=data_to_query.encode("utf-8"), url="http://localhost:9200")
    response_text = r.text
    splited_response = response_text.splitlines()
    splited_response = [" ".join(x.replace("\t", " ").replace("none", "")[1:].split(":")[:2][:1]).replace(" ", ":")
                        for x in splited_response if ":" in x]
    splited_response = [x.split(":")[0] for x in splited_response]
    return splited_response

judgments = []
signatures = []
judgment_succeeded = []

def read_file(file_path, is_transforming=False):
   
    print ("Processing file" + file_path)
    with open (file_path) as file:
        json_content = json.load(file)
        item_count = 0
        for item in tqdm.tqdm(json_content):
            judgment = []
            if item['courtType'] in ['COMMON', 'SUPREME']:
                courtCases = item['courtCases']
                signatures_for_judgment = []
                for courtCase in courtCases:
                    signature = courtCase['caseNumber']
                    signatures_for_judgment.append(signature)
                signatures.append(signatures_for_judgment)
                item_count += 1
                text_content = re.sub("<.*?>", "", item["textContent"])
                text_content = text_content.replace('-\n', '')
                word_content = text_content.split()
                topicSpecificPunctuation = '„”–§…«»'
                translator = str.maketrans('', '', string.punctuation+topicSpecificPunctuation)

                for word in word_content:
                    word = word.translate(translator).lower()
                    if len(word)>0:
                        judgment.append(word)
                if not is_transforming:
                    judgments.append(judgment)
                else:
                    processed = process_judgment(judgment)
                    judgments.append(processed)
                    judgment_succeeded.append(len(judgment) > 0)

In [3]:
def read_all_judgments_from_2018(is_transforming=False):
        for filename in os.listdir("../data_filtered_2/"):
            read_file("../data_filtered_2/" + filename, is_transforming)

In [4]:
read_all_judgments_from_2018()

100%|██████████| 9/9 [00:00<00:00, 250.78it/s]
 47%|████▋     | 47/100 [00:00<00:00, 445.63it/s]

Processing file../data_filtered_2/judgments-3163.json
Processing file../data_filtered_2/judgments-3168.json


100%|██████████| 100/100 [00:00<00:00, 358.63it/s]
 28%|██▊       | 28/100 [00:00<00:00, 276.24it/s]

Processing file../data_filtered_2/judgments-3164.json


100%|██████████| 100/100 [00:00<00:00, 277.07it/s]
 40%|████      | 40/100 [00:00<00:00, 399.40it/s]

Processing file../data_filtered_2/judgments-3171.json


100%|██████████| 100/100 [00:00<00:00, 332.95it/s]
 24%|██▍       | 24/100 [00:00<00:00, 239.62it/s]

Processing file../data_filtered_2/judgments-3165.json


100%|██████████| 100/100 [00:00<00:00, 253.30it/s]
 30%|███       | 30/100 [00:00<00:00, 284.33it/s]

Processing file../data_filtered_2/judgments-3167.json


100%|██████████| 100/100 [00:00<00:00, 352.33it/s]
 31%|███       | 31/100 [00:00<00:00, 289.61it/s]

Processing file../data_filtered_2/judgments-3169.json


100%|██████████| 100/100 [00:00<00:00, 270.26it/s]
 42%|████▏     | 34/81 [00:00<00:00, 332.86it/s]

Processing file../data_filtered_2/judgments-3173.json


100%|██████████| 81/81 [00:00<00:00, 350.16it/s]
 34%|███▍      | 34/100 [00:00<00:00, 337.95it/s]

Processing file../data_filtered_2/judgments-3172.json


100%|██████████| 100/100 [00:00<00:00, 372.89it/s]
 36%|███▌      | 36/100 [00:00<00:00, 334.06it/s]

Processing file../data_filtered_2/judgments-3166.json


100%|██████████| 100/100 [00:00<00:00, 281.98it/s]
 31%|███       | 31/100 [00:00<00:00, 277.09it/s]

Processing file../data_filtered_2/judgments-3170.json


100%|██████████| 100/100 [00:00<00:00, 282.72it/s]


In [5]:
with open('judgment_succeeded.pickle', 'rb') as handle:
    judgment_succeeded = pickle.load(handle)

In [6]:
zipped = zip(judgments, signatures, judgment_succeeded)
zipped_filtered = [triple[:2] for triple in list(zipped) if triple[2]]

In [7]:
judgments, signatures = map(list, zip(*zipped_filtered))

In [8]:
signatures[0]

['VI Ca 651/17']

In [23]:
test_idx = random.sample(range(0, 782), int(0.25*781.0))
train_idx = list(range(0, 782))
train_idx = [el for el in train_idx if el not in test_idx]

In [24]:
judge_processor = JudgmentsProcessor.JudgmentsProcessor(judgments, signatures, 'filteredRank.csv')

In [25]:
X, Y, counter = judge_processor.process_judgments()

['na', 'do', 'nie', 'że', 'dnia', 'się', 'art', 'od', 'przez', 'sąd', 'roku', 'pracy', 'za', 'jest', 'oraz', 'zł', 'tym', 'to', 'sądu', 'co']


In [26]:
pprint.pprint (counter)

Counter({'sprawy z zakresu ubezpieczenia społecznego': 257,
         'sprawy cywilne': 214,
         'sprawy karne': 143,
         'sprawy gospodarcze': 107,
         'sprawy w zakresie prawa pracy': 72,
         'sprawy w zakresie prawa rodzinnego': 42,
         'sprawy o wykroczenia': 33,
         'sprawy w zakresie prawa konkurencji': 5})


In [27]:
print (len(X))
print (len(Y))

781
781


In [28]:
judgment_classifier = JudgmentsClassifier.JudgmentsClassifier(X, Y, train_idx, test_idx)
result_dict, clf, hyper_params = judgment_classifier.transform_and_train_classifier()

  y = column_or_1d(y, warn=True)
  .format(len(labels), len(target_names))


In [29]:
print (hyper_params)
print(result_dict['accuracy_score'])
print(result_dict['classification_report'])
print(result_dict['micro_report'])
print(result_dict['macro_report'])

{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
0.912371134021
                                            precision    recall  f1-score   support

                            sprawy cywilne       0.79      0.91      0.85        45
sprawy z zakresu ubezpieczenia społecznego       1.00      1.00      1.00        64
                              sprawy karne       1.00      0.94      0.97        31
                        sprawy gospodarcze       0.85      0.72      0.78        32
             sprawy w zakresie prawa pracy       0.89      0.80      0.84        10
        sprawy w zakresie prawa rodzinnego       1.00      1.00      1.00         8
                      sprawy o wykroczenia       0.80      1.00      0.89         4

                               avg / total       0.92      0.91      0.91       194

(0.91237113402061853, 0.91237113402061853, 0.91237113402061853, None)
(0.90417175417175411, 0.90933499743983615, 0.90324038005783869, None)


In [30]:
import pickle
with open('judgments.pickle', 'rb') as handle:
    judgments = pickle.load(handle)
with open('signatures.pickle', 'rb') as handle:
    signatures = pickle.load(handle)
    
with open('judgment_succeeded.pickle', 'rb') as handle:
    judgment_succeeded = pickle.load(handle)
    


In [31]:
zipped = zip(judgments, signatures, judgment_succeeded)
zipped_filtered = [triple[:2] for triple in list(zipped) if triple[2]]
judgments, signatures = map(list, zip(*zipped_filtered))

In [32]:
judge_processor = JudgmentsProcessor.JudgmentsProcessor(judgments, signatures, 'taggedRank.csv')

In [33]:
X, Y, counter = judge_processor.process_judgments()

['w', 'z', 'na', 'do', 'rok', 'być', 'dzień', 'on', 'sąd', 'praca', '1', 'ten', 'od', 'przez', 'który', 'o', 'art', 'k', 'sprawa', 'złoty']


In [34]:
print (len(X))

781


In [None]:
judgment_classifier = JudgmentsClassifier.JudgmentsClassifier(X, Y, train_idx, test_idx)
result_dict, clf, hyper_params = judgment_classifier.transform_and_train_classifier()

In [22]:
print (hyper_params)
print(result_dict['accuracy_score'])
print(result_dict['classification_report'])
print(result_dict['micro_report'])
print(result_dict['macro_report'])

{'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}
0.907692307692
                                            precision    recall  f1-score   support

                            sprawy cywilne       0.87      0.87      0.87        55
sprawy z zakresu ubezpieczenia społecznego       1.00      0.98      0.99        54
                              sprawy karne       0.97      0.94      0.95        32
                        sprawy gospodarcze       0.73      0.95      0.83        20
             sprawy w zakresie prawa pracy       0.92      0.67      0.77        18
        sprawy w zakresie prawa rodzinnego       1.00      1.00      1.00        11
                      sprawy o wykroczenia       0.67      0.80      0.73         5

                               avg / total       0.92      0.91      0.91       195

(0.90769230769230769, 0.90769230769230769, 0.90769230769230758, None)
(0.8801402898177092, 0.88691077441077437, 0.8776165232710379, None)
