In [61]:
import tqdm, json, os, re, operator
import string, pprint, itertools, seaborn, collections, numpy as np, csv, requests, nltk
import JudgmentsProcessor
import JudgmentsClassifier

In [62]:
def process_judgment(judgment):
    data_to_query = (",").join(judgment)
    r = requests.post(data=data_to_query.encode("utf-8"), url="http://localhost:9200")
    response_text = r.text
    splited_response = response_text.splitlines()
    splited_response = [" ".join(x.replace("\t", " ").replace("none", "")[1:].split(":")[:2][:1]).replace(" ", ":")
                        for x in splited_response if ":" in x]
    splited_response = [x.split(":")[0] for x in splited_response]
    return splited_response

judgments = []
signatures = []
judgment_succeeded = []

def read_file(file_path, is_transforming=False):
   
    print ("Processing file" + file_path)
    with open (file_path) as file:
        json_content = json.load(file)
        item_count = 0
        for item in tqdm.tqdm(json_content):
            judgment = []
            if item['courtType'] in ['COMMON', 'SUPREME']:
                courtCases = item['courtCases']
                signatures_for_judgment = []
                for courtCase in courtCases:
                    signature = courtCase['caseNumber']
                    signatures_for_judgment.append(signature)
                signatures.append(signatures_for_judgment)
                item_count += 1
                text_content = re.sub("<.*?>", "", item["textContent"])
                text_content = text_content.replace('-\n', '')
                word_content = text_content.split()
                topicSpecificPunctuation = '„”–§…«»'
                translator = str.maketrans('', '', string.punctuation+topicSpecificPunctuation)

                for word in word_content:
                    word = word.translate(translator).lower()
                    if len(word)>0:
                        judgment.append(word)
                if not is_transforming:
                    judgments.append(judgment)
                else:
                    processed = process_judgment(judgment)
                    judgments.append(processed)
                    judgment_succeeded.append(len(judgment) > 0)

In [63]:
def read_all_judgments_from_2018(is_transforming=False):
        for filename in os.listdir("../data_filtered_2/"):
            read_file("../data_filtered_2/" + filename, is_transforming)

In [64]:
read_all_judgments_from_2018()

100%|██████████| 9/9 [00:00<00:00, 111.15it/s]
  0%|          | 0/100 [00:00<?, ?it/s]

Processing file../data_filtered_2/judgments-3163.json
Processing file../data_filtered_2/judgments-3168.json


100%|██████████| 100/100 [00:00<00:00, 241.02it/s]
 22%|██▏       | 22/100 [00:00<00:00, 209.63it/s]

Processing file../data_filtered_2/judgments-3164.json


100%|██████████| 100/100 [00:00<00:00, 190.61it/s]
 19%|█▉        | 19/100 [00:00<00:00, 170.38it/s]

Processing file../data_filtered_2/judgments-3171.json


100%|██████████| 100/100 [00:00<00:00, 242.83it/s]
 13%|█▎        | 13/100 [00:00<00:00, 128.80it/s]

Processing file../data_filtered_2/judgments-3165.json


100%|██████████| 100/100 [00:00<00:00, 195.59it/s]
 13%|█▎        | 13/100 [00:00<00:00, 126.97it/s]

Processing file../data_filtered_2/judgments-3167.json


100%|██████████| 100/100 [00:00<00:00, 244.07it/s]
 28%|██▊       | 28/100 [00:00<00:00, 279.38it/s]

Processing file../data_filtered_2/judgments-3169.json


100%|██████████| 100/100 [00:00<00:00, 249.90it/s]
 26%|██▌       | 21/81 [00:00<00:00, 201.36it/s]

Processing file../data_filtered_2/judgments-3173.json


100%|██████████| 81/81 [00:00<00:00, 139.22it/s]
 32%|███▏      | 32/100 [00:00<00:00, 302.34it/s]

Processing file../data_filtered_2/judgments-3172.json


100%|██████████| 100/100 [00:00<00:00, 299.25it/s]
 25%|██▌       | 25/100 [00:00<00:00, 244.68it/s]

Processing file../data_filtered_2/judgments-3166.json


100%|██████████| 100/100 [00:00<00:00, 239.95it/s]
 27%|██▋       | 27/100 [00:00<00:00, 267.05it/s]

Processing file../data_filtered_2/judgments-3170.json


100%|██████████| 100/100 [00:00<00:00, 233.90it/s]


In [65]:
with open('judgment_succeeded.pickle', 'rb') as handle:
    judgment_succeeded = pickle.load(handle)

In [66]:
zipped = zip(judgments, signatures, judgment_succeeded)
zipped_filtered = [triple[:2] for triple in list(zipped) if triple[2]]

In [67]:
judgments, signatures = map(list, zip(*zipped_filtered))

In [68]:
signatures[0]

['VI Ca 651/17']

In [69]:
judge_processor = JudgmentsProcessor.JudgmentsProcessor(judgments, signatures, 'filteredRank.csv', lists)

In [70]:
X, Y, counter = judge_processor.process_judgments()

['na', 'do', 'nie', 'że', 'dnia', 'się', 'art', 'od', 'przez', 'sąd', 'roku', 'pracy', 'za', 'jest', 'oraz', 'zł', 'tym', 'to', 'sądu', 'co']


In [71]:
pprint.pprint (counter)

Counter({'sprawy z zakresu ubezpieczenia społecznego': 257,
         'sprawy cywilne': 214,
         'sprawy karne': 143,
         'sprawy gospodarcze': 107,
         'sprawy w zakresie prawa pracy': 72,
         'sprawy w zakresie prawa rodzinnego': 42,
         'sprawy o wykroczenia': 33,
         'sprawy w zakresie prawa konkurencji': 5})


In [None]:
print (len(X))
print (len(Y))

777
777


In [None]:
judgment_classifier = JudgmentsClassifier.JudgmentsClassifier(X, Y)
result_dict, clf, hyper_params = judgment_classifier.transform_and_train_classifier()

In [None]:
print (hyper_params)
print(result_dict['accuracy_score'])
print(result_dict['classification_report'])
print(result_dict['micro_report'])
print(result_dict['macro_report'])

In [36]:
import pickle
with open('judgments.pickle', 'rb') as handle:
    judgments = pickle.load(handle)
with open('signatures.pickle', 'rb') as handle:
    signatures = pickle.load(handle)
    
with open('judgment_succeeded.pickle', 'rb') as handle:
    judgment_succeeded = pickle.load(handle)
    


In [56]:
zipped = zip(judgments, signatures, judgment_succeeded)
zipped_filtered = [triple[:2] for triple in list(zipped) if triple[2]]
judgments, signatures = map(list, zip(*zipped_filtered))

In [57]:
judge_processor = JudgmentsProcessor.JudgmentsProcessor(judgments, signatures, 'taggedRank.csv', lists)

In [58]:
X, Y, counter = judge_processor.process_judgments()

['w', 'z', 'na', 'do', 'rok', 'być', 'dzień', 'on', 'sąd', 'praca', '1', 'ten', 'od', 'przez', 'który', 'o', 'art', 'k', 'sprawa', 'złoty']


In [59]:
judgment_classifier = JudgmentsClassifier.JudgmentsClassifier(X, Y)
result_dict, clf, hyper_params = judgment_classifier.transform_and_train_classifier()

  y = column_or_1d(y, warn=True)
  .format(len(labels), len(target_names))


In [60]:
print (hyper_params)
print(result_dict['accuracy_score'])
print(result_dict['classification_report'])
print(result_dict['micro_report'])
print(result_dict['macro_report'])

{'C': 2000, 'gamma': 0.001, 'kernel': 'rbf'}
0.920454545455
                                            precision    recall  f1-score   support

                            sprawy cywilne       0.84      0.93      0.88        41
sprawy z zakresu ubezpieczenia społecznego       0.98      1.00      0.99        51
                              sprawy karne       1.00      0.93      0.96        40
                        sprawy gospodarcze       0.87      0.81      0.84        16
             sprawy w zakresie prawa pracy       0.91      0.71      0.80        14
        sprawy w zakresie prawa rodzinnego       1.00      1.00      1.00         6
                      sprawy o wykroczenia       0.70      0.88      0.78         8

                               avg / total       0.93      0.92      0.92       176

(0.92045454545454541, 0.92045454545454541, 0.92045454545454553, None)
(0.90013875013875011, 0.89337356893977105, 0.89307694408636773, None)
