In [275]:
from elasticsearch_dsl import connections, Index, analyzer, DocType, Date, Text, Keyword, Search, Nested, InnerDoc
from elasticsearch_dsl.analysis import TokenFilter
import json
import pprint
import os

In [276]:
# in case of problsems with "only allow delete api" then:
# curl -XPUT -H "Content-Type: application/json" http://localhost:9200/_all/_settings -d '{"index.blocks.read_only_allow_delete": null}'

Defines a default connection that will be used globally

In [277]:
connections.create_connection(hosts=['localhost'], timeout=20) 

<Elasticsearch([{'host': 'localhost'}])>

Defines an index

In [278]:
judgments = Index(name = 'judgments')

In [279]:
# TODO: resolve what should be datatype for judges
class Judge(InnerDoc):
    function = Text()
    name = Text()
    specialRoles = Text()


class Judgments(DocType):
    content = Text()
    date = Date()
    title = Keyword()
    judges = Nested(Judge)
    
    class Meta:
        index = 'title'
    
    def add_judge(self, function, name, specialRoles):
        self.judges.append(
            Judge(function=function, name=name, specialRoles=specialRoles))
    

In [280]:
judgments.doc_type(Judgments)

__main__.Judgments

In [281]:
default_analyzer = analyzer('default_analyzer',
                           tokenizer='standard')

In [282]:
morfologik_analyzer = analyzer('morfologik', tokenizer='standard', filter='morfologik_stem')

In [283]:
judgments.analyzer(default_analyzer)
# judgments.analyzer(morfologik_analyzer)

In [284]:
judgments.delete(ignore=404)
judgments.create()

In [285]:
def read_file(file_path):
    with open (file_path) as file:
        json_content = json.load(file)
        for judgment in json_content:
            content = judgment['textContent']
            date = judgment['judgmentDate']
            title = judgment['courtCases'][0]['caseNumber']
            judges  = judgment['judges']
            # pprint.pprint(judges)
            maping = Judgments(content=content, date=date, title=title)
            for judge in judges:
                maping.add_judge(function=judge['function'], 
                             name=judge['name'], 
                             specialRoles=judge['specialRoles'])
                maping.save()

In [286]:
def read_all_judgments_from_2018():
    for filename in os.listdir("../data_filtered/"):
        read_file("../data_filtered/" + filename)

In [287]:
read_all_judgments_from_2018()

In [288]:
query = Search().query("match", content="szkoda")
response = query.execute(ignore_cache=True)
print (query.to_dict())
print (f"Liczba orzeczen w ktorych wystepuje slowo szkoda wynosi: {query.count()}")

{'query': {'match': {'content': 'szkoda'}}}
Liczba orzeczen w ktorych wystepuje slowo szkoda wynosi: 740


In [289]:
# https://github.com/elastic/elasticsearch-dsl-py/issues/294
# Znajdź liczbę orzeczeń, w których występuje fraza trwały uszczerbek na zdrowiu, 
# dokładnie w tej kolejności ale w dowolnej formie fleksyjnej.
judgments.close()
judgments.analyzer(morfologik_analyzer)
judgments.open()

{'acknowledged': True, 'shards_acknowledged': True}

In [296]:
query = Search().query("match", content="trwały uszczerbek na zdrowiu").execute(ignore_cache=True)
print (query.to_dict())
print (f"Liczba orzeczen w ktorych wystepuje trwaly uczerbek na zdrowiu i jego odmiany fleksyjne wynosi: {query.count()}")

{'took': 3, 'timed_out': False, '_shards': {'total': 15, 'successful': 15, 'skipped': 0, 'failed': 0}, 'hits': {'total': 9352, 'max_score': 16.27461, 'hits': [{'_index': 'title', '_type': 'doc', '_id': 'fTiWCWIBgI6iz7Sf8MzO', '_score': 16.27461, '_source': {'content': '<p>\n<strong>\n<!-- --> Sygn. akt: I C 574/16 </strong>\n</p>\n    <div>\n      <h2>WYROK</h2>\n      <h5>W IMIENIU RZECZYPOSPOLITEJ POLSKIEJ</h5>\n      <p>      Dnia 18 stycznia 2018 r.</p>\n      <p>\n<strong>\n<!-- -->Sąd Rejonowy w Giżycku I Wydział Cywilny</strong>\n</p>\n      <p>w składzie następującym:</p>\n      <table>\n        <colgroup>\n          <col width="199"/>\n          <col width="512"/>\n        </colgroup>\n        <tr>\n          <td>\n            <p>\n              <strong>\n<!-- -->Przewodniczący:</strong>\n</p>\n          </td>\n          <td>\n            <p>SSR Anna Kurzynowska - Drzażdżewska</p>\n          </td>\n        </tr>\n        <tr>\n          <td>\n            <p>\n              <st

AttributeError: 'Response' object has no attribute 'count'