In [None]:
import pandas as pd
import os

# Create dataset

In [None]:
dataPath = os.getcwd() + '/court case data/testdata/'
caseCount = len(os.listdir(dataPath))
data = []
try:
    os.remove(dataPath + ".DS_Store")
except:
    print("No file DS_Store")
for filename in os.listdir(dataPath):
    f = open(os.path.join(dataPath, filename), encoding='utf-8')
    data.append([filename.replace('.txt', ''), f.read()])

verdict_df = pd.DataFrame(data, columns=["id", "case text"])
cases_df = pd.read_csv('./court case data/testdata.csv')
merged_df = cases_df.join(verdict_df.set_index('id'), on='id', how='left')

merged_df["verdict_date"] = pd.to_datetime(merged_df["verdict_date"])
merged_df["publication_date"] = pd.to_datetime(merged_df["publication_date"])

In [None]:
merged_df

# Split documents

### Old

In [None]:
# -*- coding: utf-8 -*-
import re
alphabets= "([A-Za-z])"
prefixes = "(Mr|St|Mrs|Ms|Dr|mr|mevr|mvr)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov|nl)"
articles = "[artikel ][0-9][.][0-9]"

def split_into_sentences2(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    text = re.sub(prefixes,"\\1<prd>",text)
    text = re.sub(websites,"<prd>\\1",text)
    text = re.sub(articles,"[artikelnummer]",text)
    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
    text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
    text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    text = text.replace("<prd>",".")
    sentences = text.split("<stop>")
    sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    return sentences

### New

In [None]:
def split_into_sentences(text):
    sentences = re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])|\n', text)
    sentences = [x for x in sentences if len(x) > 1]
    return sentences


Two ways of splitting the documents:
    For Word2Vec, we need sentences to be an array of words.
    For the rest, just the sentence is enough.

In [None]:
sentence_list_by_word = []
sentence_list = []

for i in range(len(merged_df)):
    doc = merged_df.iloc[i]['case text']
    sentences = split_into_sentences(doc)
    sentence_list.append(sentences)
    for j in sentences:
        word_list = [x for x in j.lower().rstrip().replace('.', '').split(' ') if len(x)>0]
        sentence_list_by_word.append(word_list)
        
print(len(sentence_list))
print(len(sentence_list_by_word))
# print(sentence_list[8])

Testers:

In [None]:
for i in sentence_list[:1]:
    for j in i:
        print(j, '\n')

In [None]:
for i in sentence_list_by_word[:1]:
    for j in i:
        print(j, '\n')

# Create Word2Vec model

In [None]:
from gensim.models import Word2Vec
from gensim.test.utils import common_texts

Create and save model

In [None]:
dutch_word2vec_model = Word2Vec(sentences=sentence_list_by_word, vector_size=100, window=5, min_count=1, workers=4)
dutch_word2vec_model.save("word2vec_dutch_court_cases.model")

Load model

In [None]:
dutch_word2vec_model = Word2Vec.load("word2vec_dutch_court_cases.model")

In [674]:
sims = dutch_word2vec_model.wv.most_similar('vanuit', topn=100)
print([i[0] for i in sims])
# print(sims)

['uit', 'binnengebracht', 'binnen', 'via', 'uitreed', 'in', 'terugstromen', 'uitgereisd', 'langs', 'drijven', 'ingereisd', 'buitenwijken', 'metershoge', 'binnengingen', 'airport', 'vandoor', 'naar', 'rond', 'costarica', 'afhalersgedeelte', 'verlaat', '4–methylamfetamine', 'daime', 'verliet', 'regelmatig', 'wedel', 'binnenkomt', 'naartoe', 'somatisch-medisch', 'aires', 'buenos', 'binnengaat', 'uitlopen', 'kasteelstraat', 'cana', '14-12-04\treis', 'quaciel?', 'verscheept', 'bergafwaarts', '(vanuit', 'hotelaccommodatie(s)', 'gereisd', 'uitgereden', 'havenstraat’', 'aangekomen', 'ontvlucht', "'oefenladingen'", 'gezien315', 'naast', 'istinye-nederland', 'dragging', 'aanvaard(37)', 'en/ofsuriname', '(niad)', 'plaza', 'schokkerweg', 'cocaïnewasserijen', 'getransporteerd', '(azm)', 'binnengereden', 'betreedt', 'rijksgrens', 'rica', 'bentheim', 'ingelopen', 'gevlogen', 'zeeschip', 'binnenging', 'minderjarige]’s', 'ok208', 'hato', 'kluis/zen', 'betreden3', 'uitgezet', 'verlieten', 'paulo', 'drev

### Create list of drugs, smuggle, quantity keywords with Word2Vec model

Drugs list:

In [237]:
list_of_drugs = ['xtc', 'mdma', 'cocaine', 'wiet', 'speed', 'bmk', 'pmk']
word2vec_drug_list = []
for drug in list_of_drugs:
    results = dutch_word2vec_model.wv.most_similar(drug, topn=100)
    for i in results:
        word2vec_drug_list.append(i[0])

word2vec_drug_list = list(set([i for i in word2vec_drug_list if word2vec_drug_list.count(i)>2]))
print(len(word2vec_drug_list))
print(word2vec_drug_list)

80
['(olie)', 'ghb', 'pillen', 'paracetamol', 'speed', 'ecstasy', '(mdma)', 'meth', 'ketamine', 'mdma/mda', 'amfetamineolie', 'manitol', 'mdma-poeder', 'hashish', 'eindproduct', 'cocaïne', 'pillen)', 'amfetamine', 'lsd', 'xtc-tabletten', 'lactose', 'amfetaminesulfaat', 'hci', 'methadon', 'azijnzuuranhydride', 'hennep', 'kunstmest', 'cocaine', 'gbl', 'lidocaïne', 'methamfetamine', 'xtc', 'amfetaminebase', 'crystal', 'mdma)', '(met)amfetamine', 'crack', 'heroïne)', 'xtc-pillen', 'hash', 'xtc/mdma', 'weed', 'opium', 'morfine', 'amfetaminepasta', '2cb', 'mefedron', 'oxazepam', 'hasjiesj', 'amfetamine)', 'platina', 'n-formylamfetamine', 'pep', 'amfetaminen', 'mdma-kristallen', 'fenacetine', 'hasj', '34-mdma', '2-cb', 'levamisol', 'harddrugs', '2c-b', 'heroïne', 'marihuana', 'temazepam', 'speed/amfetamine', 'metamfetamine', 'coke', 'poedervorm', 'd-metamfetamine', 'diazepam', '34-methyleendioxymethamfetamine', 'cafeïne', 'hennep)', 'kristallen', 'mdma', 'mapa', 'mdma-hcl', 'cannabis', 'inosi

Smuggle keyword list:

In [238]:
list_of_smuggle_words = ['smokkel', 'invoer', 'uitvoer', 'import', 'export', 'transport']
word2vec_smuggle_list = []
for word in list_of_smuggle_words:
    results = dutch_word2vec_model.wv.most_similar(word, topn=100)
    for i in results:
        word2vec_smuggle_list.append(i[0])

word2vec_smuggle_list = list(set([i for i in word2vec_smuggle_list if word2vec_smuggle_list.count(i)>3]))
print(len(word2vec_smuggle_list))
print(word2vec_smuggle_list)
word2vec_smuggle_list = word2vec_smuggle_list + list_of_smuggle_words 

39
['uithalen', 'importeren', 'productie', 'drugssmokkel', 'doorverkoop', 'aanvoer', 'vervaardiging', 'bronland', 'hennepteelt', 'import', 'cocaïnehandel', 'gesmokkelde', 'bronlanden', 'terugwinnen', 'produktie', 'verkoop', 'kweek', 'straatverkoop', 'verscheping', 'transsport', 'smokkel', 'versnijding', 'exporteren', 'invoer', 'doorvoer', 'levering', 'opzetten', 'invoeren', 'export', 'fabricage', '(invoer', 'transporteren', 'proefmonster', 'leverantie', 'hasjhandel', 'handel', 'leveranciers', 'productie/verwerking', 'bewerking']


Quantity keyword list:

In [239]:
list_of_quantity_words = ['tabletten', 'kilo', 'gram', 'pakketten']
word2vec_quantity_list = []
for word in list_of_quantity_words:
    results = dutch_word2vec_model.wv.most_similar(word, topn=100)
    for i in results:
        word2vec_quantity_list.append(i[0])

word2vec_quantity_list = list(set([i for i in word2vec_quantity_list if word2vec_quantity_list.count(i)>1]))
print(len(word2vec_quantity_list))
print(word2vec_quantity_list)


79
['centiliter', '1993/330', 'mg', 'planten', 'pillen', 'zakjes', 'potten', 'ton', 'mdma/mda', 'gripzakken', 'planten)', 'sealbags', 'brokken', 'gr', 'liter;', 'promille', 'pillen)', 'wikkels', 'potjes', 'm³', 'm2', 'joints', 'mg)', 'watt', 'mg/l', 'gram)', 'sealtjes', 'plakken', 'mdma-pillen', 'km', 'kilo;', 'kilo)', 'flacons', 'drums', 'bolletjes)', 'planten”', 'milliliter', 'bolletjes', 'liter)', 'cm)', 'kilo', 'xtc-pillen', '(verpakt', 'seals', 'kilogram)', 'hennepplanten;', 'gram”', 'meter)', 'gram;', 'kg', 'gripzakjes', 'kg;', 'blokjes', 'flessen', 'vaatjes', 'kg)', 'emmers', 'brokjes', 'stuks', 'tabletten/pillen', 'tabletten', 'bollen', 'liter', 'ponypacks', 'gram);', 'ml', 'db(a)', 'gram:', 'bakjes', '(xtc-)pillen', 'pakketten', 'kilogram;', '(totaalgewicht', 'blokken', 'mg/ml', 'kilogram', 'slikkersbollen', 'pillen/tabletten', 'm²']


Country list:

In [240]:
list_of_countries = ['duitsland', 'colombia']
word2vec_country_list = []
for word in list_of_countries:
    results = dutch_word2vec_model.wv.most_similar(word, topn=100)
    for i in results:
        word2vec_country_list.append(i[0])

word2vec_country_list = list(set([i for i in word2vec_country_list if word2vec_country_list.count(i)>1]))
print(len(word2vec_country_list))
print(word2vec_country_list)

81
['cyprus', 'madrid', 'parijs', 'afrika', 'bogota', 'sydney', 'syrië', 'berlijn', 'amsterdam-noord', 'venezuela', 'europa', 'ghana', 'marokko', 'servië', 'slowakije', 'rica', 'noorwegen', 'dubai', 'zwitserland', 'trinidad', 'brazilië', 'engeland', 'nigeria', 'congo', 'mexico', 'santa', 'groot-brittannië', 'malaga', 'oekraïne', 'lissabon', 'portugal', 'turkije', 'amerika', 'kroatië', 'lima', 'oostenrijk', 'griekenland', 'natal', 'costa', 'bulgarije', 'iran', 'suriname', 'tanger', 'peru', 'australië', 'paramaribo', 'zuid-afrika', 'spanje', 'guayaquil', 'zweden', 'denemarken', 'finland', 'jamaica', 'guatemala', 'thailand', 'zuid-amerika', 'polen', 'antwerpen', 'rusland', 'caracas', 'argentinië', 'ecuador', 'panama', 'buitenland', 'ierland', 'barcelona', 'albanië', 'pakistan', 'frankrijk', 'canada', 'italië', 'luxemburg', 'roemenië', 'curaçao', 'bosnië', 'hamburg', 'tsjechië', 'belgië', 'hongarije', 'dominicaanse', 'kenia']


# Create SpaCy model

In [241]:
import spacy
from spacy import displacy
# !python -m spacy download nl_core_news_md
nlp = spacy.load('nl_core_news_md')


In [242]:
my_file = open("drugs list.txt", "r", encoding='utf-8')
my_file = my_file.readlines()
drugs_list = []
for i in my_file:
    drugs_list.append(i.replace('\n', ''))

my_file = open("countries list.txt", "r", encoding='utf-8')
my_file = my_file.readlines()
countries_list = []
for i in my_file:
    countries_list.append(i.replace('\n', ''))
countries_list = countries_list + word2vec_country_list

In [243]:
complete_drugs_list = list(set(drugs_list + word2vec_drugs_list))

In [244]:
def configure_spacy_model():
    # Create dict of drug pattern and quantity pattern
    pattern_list = []
    
    drugs_ent_list = []
    for i in complete_drugs_list:
        pattern_list.append({"label": "DRUG", "pattern": [{"lower": i}]})
    
#     quantity_rule = {"label": "QUANTITY", "pattern": [{"IS_DIGIT": True}, {"LOWER": "gram"}]}
#     pattern_list.append(quantity_rule)
    for i in word2vec_quantity_list:
        pattern_list.append({"label": "QUANTITY", "pattern": [{"IS_DIGIT": True}, {"LOWER": i}]})
        pattern_list.append({"label": "QUANTITY", "pattern": [{"ENT_TYPE": "CARDINAL"}, {"LOWER": i}]})
    
    for i in countries_list:
        pattern_list.append({"label": "GPE", "pattern": [{"lower": i.replace(' ', '').lower()}]})
    
    # Add drug and quantity rules to the model
    config = {
   "phrase_matcher_attr": None,
   "validate": True,
   "overwrite_ents": True,
   "ent_id_sep": "||",
    }
    ruler = nlp.add_pipe("entity_ruler", config=config)

    #List of Entities and Patterns
#     patterns = drugs_ent_list
    ruler.add_patterns(pattern_list)

In [245]:
configure_spacy_model()

# Select cases and chunks to keep

In [246]:
print(len(sentence_list) == len(merged_df))

True


For every case, split the sentences. If a sentence in a case contains a drug, a smuggle word, and a location: keep chunk and save to trafficking_df

In [247]:
relevant_chunk_list = []
ecli_list = []

for index, case in enumerate(sentence_list):
    chunk_list = []
    trafficking_related = False
    for chunk in case:
        if any(drug in chunk for drug in word2vec_drug_list) and any(smuggle_word in chunk for smuggle_word in word2vec_smuggle_list):
            ents = nlp(chunk).ents
            if any(ent.label_ == "GPE" or ent.label_ == "LOC" for ent in ents):
                trafficking_related = True
                chunk_list.append(chunk)
    
    if trafficking_related:
        relevant_chunk_list.append(chunk_list)
        ecli_list.append(merged_df.iloc[index]['id'].replace('-', ':'))

trafficking_df = pd.DataFrame({'id': pd.Series(ecli_list), 'chunks': pd.Series(relevant_chunk_list)})

                

In [248]:
print(f"{len(trafficking_df)} cases kept from original {len(merged_df)} cases.")


3601 cases kept from original 18178 cases.


# Create rule-based NER & POS tagging model

### Old

In [249]:
def extract_chunk_info(txt):
    source_country = None
    total_info = []
    for token in nlp(txt):
        info = {}
        drug_info = {}
        countries = []
        
        if token.ent_type_ == "DRUG":
            info = {"drug": token.text}
            
            ## Get source and destination
            for ancestor in token.ancestors:
                for nephew in ancestor.children:
                    if nephew.ent_type_ == "GPE" or nephew.ent_type_ == "LOC":
                        countries.append(nephew)
                        for child in nephew.children:
                            if child.dep_ == "conj" and child.ent_type_ == "GPE" or child.ent_type_ == "LOC":
                                countries.append(child.text)
                            elif child.pos_ == "ADP" and child.dep_ == "case":
                                adj = child.text
            if len(countries) > 0 :
                try:
                    info[adj] = countries
                except:
                    info['land'] = countries
                        
            ## Get volume
            for ancestors in token.ancestors:
                for nephew in ancestors.children:
                    if nephew.ent_type_ == "QUANTITY" or nephew.ent_type_ == "CARDINAL":
                        for second_nephew in nephew.children:
                            if second_nephew.is_digit != nephew.is_digit:
                                if second_nephew.is_digit:
                                    info['volume'] = second_nephew.text
                                    info['volume_type'] = nephew.text
                                else:
                                    info['volume'] = nephew.text
                                    info['volume_type'] = second_nephew.text
            if 'volume' not in info:
                for child in token.children:
                    if (child.dep_ == "det" and child.like_num) or (child.dep_ == "nummod"):
                        info['volume'] = child.text
                                
        if len(info) > 1:
#             print(info)
            total_info.append(info)
    return total_info

### New

In [662]:
# Get linguistic distance between token a and token b. After iter 10 it is deemed a too far distance.
def get_linguistic_distance(a, b):
    tokens_to_consider = [b]
    found = False
    iters = 0
    while not found:
        for token in tokens_to_consider:
            tokens_to_add = []
            for ancestor in token.ancestors:
                if ancestor not in tokens_to_add and ancestor not in tokens_to_consider:
                    tokens_to_add.append(ancestor)
            for child in token.children:
                if child not in tokens_to_add and child not in tokens_to_consider:
                    tokens_to_add.append(child)
            tokens_to_consider = tokens_to_consider + tokens_to_add
        for x in tokens_to_consider:
            if a.orth == x.orth:
                found = True
        iters += 1
        if iters == 10:
            found = True
    return iters

In [663]:
def get_adposition_from_loc(token):
    for child in token.children:
        if child.pos_ == "ADP" and child.dep_ == "case":
            return child.text

In [693]:
def extract_chunk_info(txt):
    result = {}
    for token in nlp(txt):
        if token.ent_type_ == "DRUG":
            print(f"Extracting info for {token.text}.")
            result = extract_info_from_drug(token, txt)
            result[token.text] = result
    return result
            
            
def extract_info_from_drug(drug, txt):
    volumes = []
    locations = {}
    for token in nlp(txt):
        
        # Extract countries
        if token.ent_type_ == "GPE" or token.ent_type_ == "LOC":
            dist = get_linguistic_distance(drug, token)
            if dist < 10:
                adj = get_adposition_from_loc(token)
                print(f"    {adj}: {token.text}, dist: {dist}, conj: {token.conjuncts}")
                locs = [token.text]
                for loc in token.conjuncts:
                    locs.append(loc.text)
                locations[adj] = locs
            else:
                print(f"{token.text} is irrelevant.")
        
        # Extract volume
        if token.ent_type_ == "QUANTITY":
            volume = {}
            dist = get_linguistic_distance(drug, token)
            second_token = ""
            if dist < 10:
                quantity = {}
                for ancestor in token.ancestors:
                    if ancestor.ent_type_ == "QUANTITY":
                        second_token = ancestor
                for child in token.children:
                    if child.ent_type_ == "QUANTITY":
                        second_token = child
                
                ## Decide volume and volume_type
                if nlp(token.text)[0].ent_type_ == "CARDINAL":
                    volume['volume'] = token.text
                    volume['volume_type'] = second_token.text
                elif nlp(second_token.text)[0].ent_type_ == "CARDINAL":
                    volume['volume'] = second_token.text
                    volume['volume_type'] = token.text
                
                #Only append when not already in volumes
                if volume not in volumes:
                    volumes.append(volume)
            else:
                print(f"{token.text} is irrelevant.")
                
        
        
            
    print(volumes)

    result = {}
    if bool(locations):
        result['locations'] = locations
    if len(volumes) > 0:
        result["volume"] = volumes
    
    return result
    
        


# Results

In [694]:
from_case = 30
to_case = 40

result_list = []

for index in range(len(trafficking_df[:from_case+to_case])):
    if index >= from_case:
        chunks = trafficking_df.iloc[index]['chunks']
        for chunk in chunks:
            
            print(trafficking_df.iloc[index]['id'])
#             displacy.render(nlp(chunk), style="ent")
            result_list.append(extract_chunk_info(chunk))
            print('\n')


ECLI:NL:RBMAA:2010:BM8942
Extracting info for amfetamine.
    te: Landgraaf, dist: 4, conj: ()
    te: Landgraaf, dist: 4, conj: ()
[{}]


ECLI:NL:RBMAA:2010:BM8942
Extracting info for wiet.
    naar: Duitsland, dist: 4, conj: ()
[]
Extracting info for amfetamine.
    naar: Duitsland, dist: 5, conj: ()
[]


ECLI:NL:RBMAA:2010:BM8942
Extracting info for amfetamine.
    None: Bonn, dist: 6, conj: ()
[{'volume': 'vijftig', 'volume_type': 'kilo'}]


ECLI:NL:RBMAA:2010:BM8942


ECLI:NL:RBMAA:2010:BM8942
Extracting info for amfetamine.
    naar: Duitsland, dist: 2, conj: ()
[]


ECLI:NL:RBMAA:2010:BM8942
Extracting info for amfetamine.
    te: Landgraaf, dist: 3, conj: ()
    te: Heerlen, dist: 3, conj: ()
    te: Landgraaf, dist: 3, conj: ()
[]
Extracting info for amfetamine.
    te: Landgraaf, dist: 3, conj: ()
    te: Heerlen, dist: 3, conj: ()
    te: Landgraaf, dist: 3, conj: ()
[]


ECLI:NL:RBMAA:2010:BM8942
Extracting info for amfetamine.
    naar: Duitsland, dist: 3, conj: ()
[]
Extr

[{'volume': '60', 'volume_type': 'kilogram'}, {'volume': '80', 'volume_type': 'kilogram'}]
Extracting info for hasj.
    in: Engeland, dist: 2, conj: ()
[{'volume': '60', 'volume_type': 'kilogram'}, {'volume': '80', 'volume_type': 'kilogram'}]
Extracting info for hasj.
    in: Engeland, dist: 2, conj: ()
[{'volume': '60', 'volume_type': 'kilogram'}, {'volume': '80', 'volume_type': 'kilogram'}]
Extracting info for hasj.
    in: Engeland, dist: 2, conj: ()
[{'volume': '60', 'volume_type': 'kilogram'}, {'volume': '80', 'volume_type': 'kilogram'}]


ECLI:NL:RBUTR:2007:BC0812


ECLI:NL:RBUTR:2007:BC0812
Extracting info for hasj.
Engeland is irrelevant.
Engeland is irrelevant.
[]


ECLI:NL:PHR:2003:AK3618
Extracting info for cocaïne.
    te: Schiphol, dist: 2, conj: ()
    in: gemeente, dist: 2, conj: (Amsterdam,)
    None: Haarlemmermeer, dist: 2, conj: ()
    te: Amsterdam, dist: 2, conj: (gemeente,)
    van: Nederland, dist: 2, conj: ()
    vanuit: Zuid-Amerika, dist: 2, conj: ()
    van:

    vanuit: Zuid-Amerika, dist: 2, conj: ()
    van: Nederland, dist: 2, conj: ()
[{'volume': '116', 'volume_type': 'kilogram'}]
Extracting info for cocaïne.
    te: Schiphol, dist: 2, conj: ()
    in: gemeente, dist: 2, conj: (Amsterdam,)
    None: Haarlemmermeer, dist: 2, conj: ()
    te: Amsterdam, dist: 2, conj: (gemeente,)
    van: Nederland, dist: 2, conj: ()
    vanuit: Zuid-Amerika, dist: 2, conj: ()
    van: Nederland, dist: 2, conj: ()
[{'volume': '116', 'volume_type': 'kilogram'}]
Extracting info for cocaïne.
    te: Schiphol, dist: 2, conj: ()
    in: gemeente, dist: 2, conj: (Amsterdam,)
    None: Haarlemmermeer, dist: 2, conj: ()
    te: Amsterdam, dist: 2, conj: (gemeente,)
    van: Nederland, dist: 2, conj: ()
    vanuit: Zuid-Amerika, dist: 2, conj: ()
    van: Nederland, dist: 2, conj: ()
[{'volume': '116', 'volume_type': 'kilogram'}]
Extracting info for cocaïne.
    te: Schiphol, dist: 2, conj: ()
    in: gemeente, dist: 2, conj: (Amsterdam,)
    None: Haarlemmermeer

Extracting info for cocaïne.
    van: €, dist: 3, conj: ()
[]


ECLI:NL:RBAMS:2016:9038
Extracting info for methamfetamine.
    None: slammen, dist: 1, conj: (hebben,)
[]
Extracting info for methamfetamine.
    None: slammen, dist: 1, conj: (hebben,)
[]


ECLI:NL:GHAMS:2008:BC5072
Extracting info for cocaïne.
    in: Nederland, dist: 2, conj: ()
[{'volume': '24', 'volume_type': 'kilogram'}]


ECLI:NL:RBSGR:2004:AR2800
Extracting info for cocaïne.
    naar: Duitsland, dist: 2, conj: ()
[]


ECLI:NL:RBZWB:2019:2735
Extracting info for harddrugs.
    te: Tilburg, dist: 3, conj: (doorzoekingen,)
    te: Tilburg, dist: 3, conj: ()
[]
Extracting info for Crystal.
Tilburg is irrelevant.
Tilburg is irrelevant.
[]
Extracting info for Meth.
Tilburg is irrelevant.
Tilburg is irrelevant.
[]
Extracting info for XTC.
Tilburg is irrelevant.
Tilburg is irrelevant.
[]
Extracting info for pillen.
Tilburg is irrelevant.
Tilburg is irrelevant.
[]
Extracting info for MDMA.
Tilburg is irrelevant.
Tilburg is

In [689]:
for i in result_list:
    print(i)

{'locations': {'te': ['Landgraaf']}, 'volume': [{}], 'amfetamine': {...}}
{'locations': {'naar': ['Duitsland']}, 'amfetamine': {...}}
{'locations': {None: ['Bonn']}, 'volume': [{'volume': 'vijftig', 'volume_type': 'kilo'}], 'amfetamine': {...}}
{}
{'locations': {'naar': ['Duitsland']}, 'amfetamine': {...}}
{'locations': {'te': ['Landgraaf']}, 'amfetamine': {...}}
{'locations': {'naar': ['Duitsland']}, 'wiet': {...}}
{'locations': {'te': ['Landgraaf']}, 'amfetamine': {...}}
{'locations': {'te': ['Landgraaf']}, 'amfetamine': {...}}
{'locations': {'te': ['Landgraaf']}, 'volume': [{}], 'amfetamine': {...}}
{'locations': {'naar': ['Duitsland']}, 'amfetamine': {...}}
{'locations': {'te': ['Landgraaf']}, 'amfetamine': {...}}
{'locations': {'in': ['Duitsland']}, 'amfetamine': {...}}
{'volume': [{'volume': 'vijftig', 'volume_type': 'kilo'}], 'amfetamine': {...}}
{}
{'locations': {'naar': ['Duitsland']}, 'amfetamine': {...}}
{'locations': {'te': ['Landgraaf']}, 'amfetamine': {...}}
{'locations':