# **Using Topic Modelling and LDA for Extractive Summarization of Legal Documents**

In [1]:
import gensim
import json
import math
import nltk
import pprint
from gensim import corpora, models
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from google.colab import drive
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
from functools import reduce
from termcolor import colored
from scipy import spatial

drive.mount('/content/drive', force_remount=True)
FILE_PATH = "/content/drive/My Drive/Colab Notebooks/topic_modelling_data/"
FILE_NAME = "drug_labeled_20190926.json"  
FILE = open(FILE_PATH + FILE_NAME, encoding='utf8')
DATA = json.load(FILE)
all_data = list(map(lambda x: {"annotation": x["annotation"], "content": x["content"]}, DATA["data"]))

nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

Mounted at /content/drive
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Unzipping corpora/omw-1.4.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

## Function to filter and pre-process data entries (called passages), one at a time

In [2]:
label_to_data = {
    # Offences
    "offence_type_quant_of_drugs": ["OT", "OQ", "OQT"],
    "offence_motive": ["OM"],
    "offence_role": ["ORC", "ORM", "ORO", "ORF"],

    # Aggravating factors
    "aggravating_factors_commited_while_bail": ["ABC"],
    "aggravating_factors_persistence": ["APC"],
    "aggravating_factors_international": ["AIC"],
    "aggravating_factors_other": ["AXC"],

    # Mitigating factors
    "mitigating_factors_remorse": ["MRC"],
    "mitigating_factors_self_consumption": ["MSC"],
    "mitigating_factors_assist_authorities": ["MCC", "MYC"],
    "mitigating_factors_characters_others": ["MGC", "MXC"],

    # Background
    "background_relationship_family": ["BR", "BI"],
    "background_criminal_drug_record": ["BP", "BL", "BD"],
    "background_personality": ["BN"],
    "background_health": ["BH"],
    "background_experience": ["BE", "BO"]
}

num_topics = {
    # # Offences
    # "offence_type_quant_of_drugs": 1,
    # "offence_motive": 1,
    # "offence_role": 1,

    # # Aggravating factors
    # "aggravating_factors_commited_while_bail": 1,
    # "aggravating_factors_persistence": 1,
    # "aggravating_factors_international": 1,
    # "aggravating_factors_other": 1,

    # # Mitigating factors


    # # Background
    # "background_relationship_family": 5 
}

def label_starts_with_matches(label: str):
  for key in label_to_data:
    for key_label in label_to_data[key]:
      if key_label == label[:len(key_label)].upper():
        return key
  return None

selected = {}

for label in label_to_data:
  selected[label] = []
  num_topics[label] = 1       # set all models to only have 1 topic for now

stopwords = list(gensim.parsing.preprocessing.STOPWORDS) + ['whilst', 'hong', 'kong', 'drug', 'drugs', 'section', 'offence', 'defendant', 'ordinance', 'dangerous', 'trafficking']

# stem, lemmatize, todo: filter stopwords/common words + reduce to one piece of string(?)
def preprocess(passage):
  stemmer = SnowballStemmer('english')
  return list(filter(
              lambda word: word not in stopwords and len(word) >= 3,
              gensim.utils.simple_preprocess(passage)
         ))
        #       list(map(
        #           lambda word: stemmer.stem(WordNetLemmatizer().lemmatize(word, pos='v')),
        #           gensim.utils.simple_preprocess(passage)
        #       ))
        #  ))

def select_data(json_data: dict):
  for annotation in json_data['annotation']:
    for label in annotation['label']:
      result = label_starts_with_matches(label)
      if result is not None:
        for point in annotation['points']:
          selected[result].append(preprocess(point['text']))

## Visualizing data entries

In [3]:
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(all_data[3126])

{ 'annotation': [ { 'label': ['PIC1D1_INDIVIDUAL_PENALTY_charge1_defendant1'],
                    'points': [ { 'end': 7433,
                                  'start': 7414,
                                  'text': '4 years and 8 months'}]},
                  { 'label': ['PSC1D1_PLEA_SENTENCE_charge1_defendant1'],
                    'points': [ { 'end': 7347,
                                  'start': 7339,
                                  'text': 'one-third'}]},
                  { 'label': ['SPC1D1_STARTING_TARIFF_charge1_defendant1'],
                    'points': [ { 'end': 7309,
                                  'start': 7303,
                                  'text': '7 years'}]},
                  { 'label': ['BID1_FAMILY_defendant1'],
                    'points': [ { 'end': 6787,
                                  'start': 6737,
                                  'text': 'she does have a\n'
                                          'close bond with her adult '
              

## Training the Word2Vec Model

In [4]:
punctuation = [",", ".", ":", ">"]

all_sentences = []

for data in all_data:
  content = data['content']
  all_sentences += list(filter(
    lambda w: w not in punctuation and w != "",
    list(map(
        lambda sentence: gensim.utils.simple_preprocess(sentence),
        nltk.tokenize.sent_tokenize(content.replace(">", "").lower())
    ))
  ))
  
word2vec_model = gensim.models.Word2Vec(all_sentences, min_count=4)

## Feeding Data to the topic model

In [6]:
class TopicModel:
  def __init__(self, aspect_name, passages, num_topics=4, no_below=6, no_above=0.6):
    self.aspect_name = aspect_name

    self.dictionary = corpora.Dictionary(passages)
    self.dictionary.filter_extremes(no_below=no_below, no_above=no_above)

    bow_corpus = [self.dictionary.doc2bow(passage) for passage in passages]

    tfidf = models.TfidfModel(bow_corpus)
    corpus_tfidf = tfidf[bow_corpus]

    self.lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics=num_topics,
                                                id2word=self.dictionary, passes=2,
                                                workers=2)

  def __str__(self):
    result = ""
    for idx, topic in self.lda_model.print_topics(-1):
      result += f"Topic: {idx}\nWords: {topic}\n"
    return result

  def similarity(self, sentence):
    return self.lda_model.get_document_topics(self.dictionary.doc2bow(sentence))
  

def get_lda_models():

  for data in all_data:
    select_data(data)

  lda_models = {}

  for aspect in selected:
    print("ASPECT", aspect.upper())

    lda_models[aspect] = TopicModel(aspect, selected[aspect], num_topics[aspect])

    print(lda_models[aspect])
  
  return lda_models

In [7]:
lda_models = get_lda_models()

ASPECT OFFENCE_TYPE_QUANT_OF_DRUGS
Topic: 0
Words: 0.373*"grammes" + 0.142*"ketamine" + 0.129*"hydrochloride" + 0.098*"cocaine" + 0.075*"methamphetamine" + 0.061*"heroin" + 0.023*"kilogrammes" + 0.022*"ice" + 0.012*"kilogramme" + 0.010*"cannabis"

ASPECT OFFENCE_MOTIVE
Topic: 0
Words: 0.089*"money" + 0.061*"financial" + 0.031*"earn" + 0.029*"committed" + 0.025*"quick" + 0.022*"debt" + 0.021*"family" + 0.019*"present" + 0.014*"support" + 0.012*"pressure"

ASPECT OFFENCE_ROLE
Topic: 0
Words: 0.056*"reward" + 0.046*"deliver" + 0.033*"courier" + 0.025*"delivering" + 0.024*"ketamine" + 0.022*"admitted" + 0.017*"delivery" + 0.016*"sell" + 0.016*"person" + 0.016*"said"

ASPECT AGGRAVATING_FACTORS_COMMITED_WHILE_BAIL
Topic: 0
Words: 0.384*"present" + 0.247*"police" + 0.164*"case" + 0.110*"court" + 0.096*"offences"

ASPECT AGGRAVATING_FACTORS_PERSISTENCE
Topic: 0
Words: 0.093*"previous" + 0.073*"record" + 0.073*"conviction" + 0.060*"offender" + 0.048*"convicted" + 0.045*"months" + 0.043*"commit

# Testing Topic Model on an Example

In [8]:
test = "The defendant has pleaded guilty to trafficking in a dangerous drug, namely 11.14 grammes of crystalline solid containing 10.90 grammes of\nmethamphetamine hydrochloride.\n\n> At around 1850 hours on 4 July 2011, police officers on duty outside Flat\nF2, 16th Floor, Kingland Apartments, No. 737\u2011741C Nathan Road, Mong Kok,\nobserved the defendant exiting from this flat in a furtive manner. When the\npolice started to approach the defendant to make inquiry of her, she\nimmediately tried to return to her flat. One of the officers revealed his\nidentity and then executed a search warrant of the premises.\n\n>\n\n> At around 1905 hours, when the police started to conduct the house search,\nthe defendant told a police officer that the dangerous drugs and packaging\nparaphernalia were kept inside a drawer of a white cabinet near the\nrefrigerator. Inside this drawer the police officer seized a number of items,\namongst which were:\n\n>\n\n>> (a) eight plastic bags containing the \u201cIce\u201d that is the subject of the\ncharge;\n\n>>\n\n>> (b) one bundle of empty resealable transparent plastic bags;\n\n>>\n\n>> (c) one electronic scale; and\n\n>>\n\n>> (d) one pair of scissors.\n\n>\n\n> Between 2356 hours on 4 July 2011 and 0700 hours on 5 July 2011, the\ndefendant was interviewed on video. Under caution she admitted the following:\n\n>\n\n>> (a) the \u201cIce\u201d seized was for sale;\n\n>>\n\n>> (b) the packaging paraphernalia was for packing the \u201cIce\u201d; and\n\n>>\n\n>> (c) she rented the flat.\n\n>\n\n> The Government Chemist certified that the eight bags of \u201cIce\u201d were 11.14\ngrammes of a crystalline solid containing 10.90 grammes of methamphetamine\nhydrochloride, a salt of methamphetamine.\n\n>\n\n> The total retail value of the drugs seized is estimated by a police drug\nexpert to be $7,820.\n\n>\n\n> In a number of different cases the Hong Kong Court of Appeal has set out the\nprinciples and sentencing guidelines for judges to follow when sentencing a\ndefendant convicted of a drug trafficking offence.\n\n>\n\n> The first is that the sentencing ranges for trafficking in \u201cIce\u201d are to be\ntaken from the guidelines laid down by the Court of Appeal in Attorney-General\nand Ching Kwok Hung [1991] 2 HKLRD 125as amended by the Court of Appeal in the\nsubsequent decision of HKSAR v Capitania CACC28/2004. As a result of this\nsubsequent amendment the first sentencing band relates to quantities up to 10\ngrammes and so in the present case, as the quantity in which this defendant\ntrafficked was 10.90 grammes of \u201cIce\u201d, this defendant is placed in the 7 to 10\nyears band for sentencing purposes.\n\n>\n\n> The existence of these sentencing tariffs leaves little room for mitigation,\nbut notwithstanding this, the sentencing tariffs play a very important role\nfor two main reasons. Firstly, they lead to consistency in sentencing and\navoid distinctions being drawn between drug traffickers based upon the nature\nof their trafficking, whether they be couriers, storekeepers, or play some\nother role. Whatever the role played by the trafficker, the sentence will\ngenerally be the same. See HKSAR v Manalo [2001] 1 HKLRD 557 at 560 G-H.\n\n>\n\n> The purpose of these sentencing bands is to lead to consistency in\nsentencing, and secondly, to deter those who might otherwise be tempted by\nwhat they perceive as an easy and low\u2011risk means to substantial profits. See\nSecretary for Justice v Chan Ka Wah [2005] 4 HKLR 904 at 908, paragraph 20.\n\n>\n\n> The Court of Appeal has made it clear that it requires exceptional\ncircumstances to justify a departure from the guidelines, and that whilst a\nsentencing judge may retain some degree of discretion, the sentence imposed\nshould normally be within the suggested bands. See the Chan Ka Wah case at\npage 908, paragraph 22.\n\n>\n\n> No such circumstances are present in this case. Nevertheless, I still have\nsome sentencing discretion, and I can take into account the amount of packets,\nthe type of mixture containing the narcotic, the degree of the involvement of\nthe offender and any previous involvement he or she may have had in drug\noffences. See HKSAR v Abdallah [2009] 2 HKLRD 437 and HKSAR v Chong Heung Sang\n[2010] 3 HKC 88.\n\n>\n\n> The defendant is 40 years of age and has received education only up to\nsecondary Form 1 level. She was unemployed at the time of her arrest. She is\nsingle, and lived alone. She claims to have been addicted to \u201cIce\u201d for the\npast 10 years.\n\n>\n\n> She has a criminal record dating back to 1989, when she would have been only\n18 years of age. She was then convicted and fined for possession of dangerous\ndrugs. Then in 1993 she was convicted of robbery and sentenced to 12 months\u2019\nimprisonment. At around the same time, she was convicted of a further two\ncharges of possession of dangerous drugs. In 1994 she was convicted in the\nHigh Court of 3 counts of trafficking in drugs, and sentenced to a total of 10\nyears\u2019 imprisonment.\n\n>\n\n> After her release from prison she has continued to be involved with\ndangerous drugs. She was convicted in 2002 of possession of dangerous drugs,\nand in 2005 of trafficking in dangerous drugs. For the trafficking offence she\nwas sentenced to 13 months\u2019 imprisonment. She then had convictions in 2007 for\ntheft and possession of dangerous drugs, and mostly recently in 2008 for\ntrafficking in dangerous drugs, when she was again sent to prison, this time\nfor 16 months.\n\n>\n\n> In identifying appropriate penalties when assessing an appropriate starting\npoint, I cannot ignore the defendant\u2019s criminal record. Her past offending\nreveals a failure to be deterred by the criminal justice process from\nreoffending. This failure to be deterred, this persistent reoffending, and\nthis continued involvement in drug trafficking constitutes an aggravating\nfactor justifying the adoption of a higher starting point. See HKSAR v Chan\nPui Chi [1999] 2 HKLRD 830.\n\n>\n\n> However, her counsel asks for one last chance for her client. Amongst all\nthe gloom, she points to a glimmer of hope in that the defendant did at one\nstage prior to these offences engage in some employment, and she does have a\nclose bond with her adult daughter. These two factors may provide some hope\nthat the defendant will try and break the cycle of drug addiction and drug\ntrafficking which has dogged her life.\n\n>\n\n> In the present case, the quantity of drugs is 10.90 grammes, just above the\n10 grammes lower limit of the sentencing band. I have decided to extend to the\ndefendant the last chance and the leniency that has been asked, and instead of\nincreasing the starting point within this band, I will adopt the lower limit\nof the band as my starting point. That is, 7 years. I discount her sentence by\none-third to allow for her plea of guilty.\n\n>\n\n> The resulting sentence is 4 years and 8 months, and that is the sentence I\nimpose.\n\n>\n\n> Miss Kwok, your case is one which is of great concern to me and which\ndisturbs me greatly. You have wasted your youth, and the only issue that\nremains is whether you can make something of the life that is left to you.\nYour life has been a cycle of drug addiction and drug trafficking interrupted\nby periods in prison because of these addictions. I fully appreciate that you\nmay find drug trafficking an easy means of sourcing an income, since you do\nnot have qualifications which would enable you to get a highly-paid job.\nHowever, you should be well aware that continued involvement in drug\ntrafficking can only lead to further periods of imprisonment and the\ndestruction of your life.\n\n>\n\n> You are very fortunate indeed that you still have some closeness with your\ndaughter. If you want to enjoy that relationship, then try and do something to\nbreak the cycle of drug addiction and drug trafficking. You will be well aware\nthat the Commissioner for Correctional Services has the power to reduce your\nsentence further by remitting part of it should you be of good behaviour. You\nwill also be aware of what facilities exist in prison that might enable you to\nacquire some kind of skill or qualification that you can make use of on your\nrelease.\n\n>\n\n> What you do with your life after your release is entirely in your hands. You\nare fortunate indeed that I have not started at a higher starting point than\nthe one I adopted. You can be confident that courts in future will not extend\nyou any leniency should you appear again on drug\u2011trafficking charges.\n\n>\n\n> Prison provides you with an opportunity to try and break away from drug\ntrafficking and acquire a skill. I urge you to take advantage of that\nopportunity, otherwise you will find yourself regularly appearing in court and\nregularly being sent to prison.isonment.\n\n > \n\n > After her release from prison she has continued to be involved with\ndangerous drugs.She was convicted in 2002 of possession of dangerous drugs, \nand in 2005 of trafficking in dangerous drugs.For the trafficking offence she\nwas sentenced to 13 months\u2019 imprisonment.She then had convictions in 2007 for \ntheft and possession of dangerous drugs, and mostly recently in 2008 for \ntrafficking in dangerous drugs, when she was again sent to prison, this time\nfor 16 months. "
sentences = test.replace(">", "").split("\n\n")

for sentence in sentences:

  if preprocess(sentence) == []:
    continue

  print("Original: " + sentence.replace("\n", " "))
  print(colored("Preprocessed: " + reduce(lambda x, y: x + " " + y, preprocess(sentence)), "blue"))

  curr_lowest = math.inf
  closest_aspect = None

  for aspect in lda_models:

    topic_model = lda_models[aspect]
    bow_vector = topic_model.dictionary.doc2bow(preprocess(sentence))

    if bow_vector == []:
      continue

    sorted_topics = sorted(topic_model.lda_model[bow_vector], key=lambda tup: -1*tup[1])

    # for index, score in sorted_topics:
    #   print(colored("Score: " + str(score) + "\t", "red"),
    #         colored("Topic: " + topic_model.lda_model.print_topic(index, 5), "magenta"))
      
    chosen_topic = sorted_topics[0]

    words_and_probabilities = lda_models[aspect].lda_model.get_topic_terms(0, topn=20)

    topic_weighted_vector = reduce(
        lambda v1, v2: v1 + v2,
        list(map(
            lambda wp: wp[1] * word2vec_model.wv.word_vec(lda_models[aspect].dictionary[wp[0]]),
            words_and_probabilities
        ))) / len(words_and_probabilities)

    text_weighted_vector = reduce(lambda v1, v2: v1 + v2, list(map(lambda x: word2vec_model.wv.word_vec(x) if x in word2vec_model.wv else 0, preprocess(sentence)))) / len(preprocess(sentence))

    result = spatial.distance.cosine(text_weighted_vector, topic_weighted_vector)

    print(colored(aspect + ":", "cyan"), end=" ")

    if result == 0:
      print(text_weighted_vector, topic_weighted_vector)

    if result < curr_lowest:
      curr_lowest = result
      closest_aspect = aspect
    
    print(colored(result, "blue"))

  print(closest_aspect)

Original: The defendant has pleaded guilty to trafficking in a dangerous drug, namely 11.14 grammes of crystalline solid containing 10.90 grammes of methamphetamine hydrochloride.
[34mPreprocessed: pleaded guilty grammes crystalline solid containing grammes methamphetamine hydrochloride[0m
[36moffence_type_quant_of_drugs:[0m [34m0.1654769778251648[0m
[36moffence_role:[0m [34m0.9653696306049824[0m
[36mmitigating_factors_remorse:[0m [34m1.0103004230186343[0m
[36mmitigating_factors_self_consumption:[0m [34m0.761973574757576[0m
[36mmitigating_factors_characters_others:[0m [34m1.1888915300369263[0m
[36mbackground_criminal_drug_record:[0m [34m0.7424896657466888[0m
offence_type_quant_of_drugs
Original:  At around 1850 hours on 4 July 2011, police officers on duty outside Flat F2, 16th Floor, Kingland Apartments, No. 737‑741C Nathan Road, Mong Kok, observed the defendant exiting from this flat in a furtive manner. When the police started to approach the defendant to m

In [None]:
text = """

The defendant has pleaded guilty to trafficking in a dangerous drug, namely 11.14 grammes of crystalline solid containing 10.90 grammes of methamphetamine hydrochloride.

At around 1850 hours on 4 July 2011, police officers on duty outside Flat F2,  16th Floor, Kingland Apartments , No. 737‑741C Nathan Road, Mong Kok, observed the defendant exiting from this flat in a furtive manner. When the police started to approach the defendant to make inquiry of her, she immediately tried to return to her flat. One of the officers revealed his identity and then executed a search warrant of the premises.

At around 1905 hours, when the police started to conduct the house search, the defendant told a police officer that the dangerous drugs and packaging paraphernalia were kept inside a drawer of a white cabinet near the refrigerator. Inside this drawer the police officer seized a number of items, amongst which were:

(a) eight plastic bags containing the “Ice” that is the subject of the charge;

(b) one bundle of empty resealable transparent plastic bags;

(c) one electronic scale; and

(d) one pair of scissors.

Between 2356 hours on 4 July 2011 and 0700 hours on 5 July 2011, the defendant was interviewed on video. Under caution she admitted the following:

(a) the “Ice” seized was for sale;

(b) the packaging paraphernalia was for packing the “Ice”; and

(c) she rented the flat.

The Government Chemist certified that the eight bags of “Ice” were 11.14 grammes of a crystalline solid containing 10.90 grammes of methamphetamine hydrochloride, a salt of methamphetamine.

The total retail value of the drugs seized is estimated by a police drug expert to be $7,820.

In a number of different cases the Hong Kong Court of Appeal has set out the principles and sentencing guidelines for judges to follow when sentencing a defendant convicted of a drug trafficking offence.

The first is that the sentencing ranges for trafficking in “Ice” are to be taken from the guidelines laid down by the Court of Appeal in Attorney-General and Ching Kwok Hung [1991] 2 HKLRD 125as amended by the Court of Appeal in the subsequent decision of HKSAR v Capitania CACC28/2004. As a result of this subsequent amendment the first sentencing band relates to quantities up to 10 grammes and so in the present case, as the quantity in which this defendant trafficked was 10.90 grammes of “Ice”, this defendant is placed in the 7 to 10 years band for sentencing purposes.

The existence of these sentencing tariffs leaves little room for mitigation, but notwithstanding this, the sentencing tariffs play a very important role for two main reasons. Firstly, they lead to consistency in sentencing and avoid distinctions being drawn between drug traffickers based upon the nature of their trafficking, whether they be couriers, storekeepers, or play some other role. Whatever the role played by the trafficker, the sentence will generally be the same. See HKSAR v Manalo [2001] 1 HKLRD 557 at 560 G-H.

The purpose of these sentencing bands is to lead to consistency in sentencing, and secondly, to deter those who might otherwise be tempted by what they perceive as an easy and low‑risk means to substantial profits. See Secretary for Justice v Chan Ka Wah [2005] 4 HKLR 904 at 908, paragraph 20.

The Court of Appeal has made it clear that it requires exceptional circumstances to justify a departure from the guidelines, and that whilst a sentencing judge may retain some degree of discretion, the sentence imposed should normally be within the suggested bands. See the Chan Ka Wah case at page 908, paragraph 22.

No such circumstances are present in this case. Nevertheless, I still have some sentencing discretion, and I can take into account the amount of packets, the type of mixture containing the narcotic, the degree of the involvement of the offender and any previous involvement he or she may have had in drug offences. See HKSAR v Abdallah [2009] 2 HKLRD 437 and HKSAR v Chong Heung Sang [2010] 3 HKC 88.

The defendant is 40 years of age and has received education only up to secondary Form 1 level. She was unemployed at the time of her arrest. She is single, and lived alone. She claims to have been addicted to “Ice” for the past 10 years.

She has a criminal record dating back to 1989, when she would have been only 18 years of age. She was then convicted and fined for possession of dangerous drugs. Then in 1993 she was convicted of robbery and sentenced to 12 months’ imprisonment. At around the same time, she was convicted of a further two charges of possession of dangerous drugs. In 1994 she was convicted in the High Court of 3 counts of trafficking in drugs, and sentenced to a total of 10 years’ imprisonment.

After her release from prison she has continued to be involved with dangerous drugs. She was convicted in 2002 of possession of dangerous drugs, and in 2005 of trafficking in dangerous drugs. For the trafficking offence she was sentenced to 13 months’ imprisonment. She then had convictions in 2007 for theft and possession of dangerous drugs, and mostly recently in 2008 for trafficking in dangerous drugs, when she was again sent to prison, this time for 16 months.

In identifying appropriate penalties when assessing an appropriate starting point, I cannot ignore the defendant’s criminal record. Her past offending reveals a failure to be deterred by the criminal justice process from reoffending. This failure to be deterred, this persistent reoffending, and this continued involvement in drug trafficking constitutes an aggravating factor justifying the adoption of a higher starting point. See HKSAR v Chan Pui Chi [1999] 2 HKLRD 830.

However, her counsel asks for one last chance for her client. Amongst all the gloom, she points to a glimmer of hope in that the defendant did at one stage prior to these offences engage in some employment, and she does have a close bond with her adult daughter. These two factors may provide some hope that the defendant will try and break the cycle of drug addiction and drug trafficking which has dogged her life.

In the present case, the quantity of drugs is 10.90 grammes, just above the 10 grammes lower limit of the sentencing band. I have decided to extend to the defendant the last chance and the leniency that has been asked, and instead of increasing the starting point within this band, I will adopt the lower limit of the band as my starting point. That is, 7 years. I discount her sentence by one-third to allow for her plea of guilty.

The resulting sentence is 4 years and 8 months, and that is the sentence I impose.

Miss Kwok, your case is one which is of great concern to me and which disturbs me greatly. You have wasted your youth, and the only issue that remains is whether you can make something of the life that is left to you. Your life has been a cycle of drug addiction and drug trafficking interrupted by periods in prison because of these addictions. I fully appreciate that you may find drug trafficking an easy means of sourcing an income, since you do not have qualifications which would enable you to get a highly-paid job. However, you should be well aware that continued involvement in drug trafficking can only lead to further periods of imprisonment and the destruction of your life.

You are very fortunate indeed that you still have some closeness with your daughter. If you want to enjoy that relationship, then try and do something to break the cycle of drug addiction and drug trafficking. You will be well aware that the Commissioner for Correctional Services has the power to reduce your sentence further by remitting part of it should you be of good behaviour. You will also be aware of what facilities exist in prison that might enable you to acquire some kind of skill or qualification that you can make use of on your release.

What you do with your life after your release is entirely in your hands. You are fortunate indeed that I have not started at a higher starting point than the one I adopted. You can be confident that courts in future will not extend you any leniency should you appear again on drug‑trafficking charges.

Prison provides you with an opportunity to try and break away from drug trafficking and acquire a skill. I urge you to take advantage of that opportunity, otherwise you will find yourself regularly appearing in court and regularly being sent to prison.

"""





In [None]:
# word2vec_model.wv.most_similar("years of imprisonment")
5 * word2vec_model.wv.word_vec("imprisonment") + 3 * word2vec_model.wv.word_vec("skill")

array([ -9.462629  , -18.404139  ,  -8.426258  ,  -0.99144614,
        -3.1612248 ,   6.6138897 ,   3.4411068 ,  10.961947  ,
         7.574908  ,  -9.069915  ,   0.59963787,  -7.169389  ,
        12.39362   ,   1.995619  ,   4.3024845 ,   5.582766  ,
         4.411383  ,   3.4278934 ,   7.966709  , -10.625965  ,
       -26.951757  ,  19.978394  ,  -6.6005497 ,  -2.9069376 ,
       -11.899344  ,  -1.0757685 ,  -0.84889686, -12.8987465 ,
       -10.787942  ,   0.3023861 ,   6.3125176 ,  12.784238  ,
        -0.2117269 ,   3.19437   , -10.850562  ,   3.41884   ,
         4.4756026 ,  -5.5148163 , -14.053297  ,  -1.9419237 ,
         4.745144  ,  -0.54399425,   3.5719552 ,  10.0396185 ,
        -6.497223  ,  -3.7446046 ,   9.462566  ,  -0.90779823,
        -0.9959442 ,  -2.9331176 ,  11.473785  ,   1.0496285 ,
       -15.681624  , -11.269348  ,   5.7783475 ,   0.10609275,
         2.212152  ,  -4.375989  ,   2.6229682 ,   6.8963447 ,
         5.2809253 , -12.947457  , -11.770961  ,   5.20

In [None]:
text = "The defendant has pleaded guilty to trafficking in a dangerous drug, namely 11.14 grammes of crystalline solid containing 10.90 grammes of methamphetamine hydrochloride."




# print(lda_models["offence_motive"].lda_model.get_topic_terms(0, topn=20))

# print(lda_models["offence_motive"].dictionary[10])

# print(list(map(lambda x: x[0], lda_models["offence_motive"].dictionary.doc2bow(["committed", "family"]))))


[-0.7037809  -1.4865761   0.96001655  0.74256635  0.02258958  0.09871833
  2.5635993   2.237731   -0.22575304 -1.3240633   0.63807815  1.1514897
  0.94312835 -1.3376623   0.50615644 -1.0943182   0.27222097  2.741008
  0.70129085 -2.1133573  -1.2169962   1.6650866  -1.4835291   0.19273236
  0.48846966 -1.0365473   0.5379564  -0.5187251   0.64966434  1.686719
  2.0091708   0.43115664 -0.00381713 -0.54823405 -0.15451986  1.1908305
 -0.28028634 -0.12362666 -0.65262246 -2.3809776   0.16119276  0.34824502
  1.7731658   0.36839    -0.93991536 -0.3077803  -0.05061044 -0.56036174
 -0.1441168   0.57661873  1.0668329   0.67739904  0.13642591 -0.64004487
 -0.08782891  0.04519963  0.99354297 -1.7364101  -0.6636822  -1.458984
  0.6552676  -1.5719066   0.3127681   0.589803   -0.14864776 -1.4916607
  0.27721587 -1.3904781  -0.60537875 -0.42147553  0.75302243 -1.0442442
  0.09257434 -2.0778825  -0.3179809   1.2230405   0.18583694  0.20629078
  1.7416749  -0.68807846 -2.413048    0.7026432  -1.5527589  