In [1]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
import numpy as np
import pandas as pd
import re
import os.path
import csv
from datetime import datetime

## Part I. Web Scraping

In [2]:
# helper function
def is_year(s):
    '''
    Input: 
        s - (string) 
    Output:
        True if the string is a number, False otherwise.
    '''
    try: 
        int(s)
        return True
    except ValueError:
        return False

In [3]:
# helper function
def get_level_info(soup):
    '''
    Input: 
        soup - (beautifulsoup)
    Return:
        category, subcategory, year at current level
    '''
    all_links = soup.find_all('a')[25:-1]
    cat_list = []
    subcat_list = []
    year_list = []
    for i, elem in enumerate(all_links):
        if '#H' in str(all_links[i]) and len(all_links[i-1].text)>0:
            cat_list.append(all_links[i-1].text)
        if '#A' in str(all_links[i]) and len(all_links[i-1].text)>0:
            if not is_year(all_links[i-1].text):
                subcat_list.append(all_links[i-1].text)
            else:
                year_list.append(all_links[i-1].text)
        if elem == soup.findAll("a", {"class": "ThemeXPRowAnchor"})[0]:
            start_idx = i
        if elem == soup.findAll("a", {"class": "ThemeXPRowAnchor"})[-1]:
            end_idx = i
            break
            
    if len(cat_list) > 0:
        last_cat = cat_list[-1]
    else:
        last_cat = np.nan
    
    if len(subcat_list) > 0:
        last_subcat = subcat_list[-1]
    else:
        last_subcat = np.nan
    
    if len(year_list) > 0:
        last_year = year_list[-1]
    else:
        last_year = np.nan
        
    return last_cat, last_subcat, last_year

In [4]:
# helper function
def get_items(driver, file_name = "data.csv"):
    '''
    Input: driver
    Output: 
        current level - (list) current category, subcategory, year
        items - (list) date, case number, name of appellant and respondant, url of detailed document 
        page infomation - (list) 
                           page_nums - list of page numbering at current webpage,
                           pages - list of selenium objects
    '''
    # get the page source
    page_source = driver.page_source
    # parse the HTML
    soup = BeautifulSoup(page_source, "html.parser")
    #print("crawling data at current level: {}".format(current_level))
    current_level = get_level_info(soup)
    last_cat, last_subcat, last_year = current_level
    
    dates, case_ids, names, case_urls = [], [], [], []

    for table_tag in soup.find_all('table', {"width":"99%"}):
        date_tag = table_tag.find_all('font', {'color':'#006633'})
        id_tag = table_tag.find_all('a', {'class':'ThemeXPRowAnchor'})
        name_tag = table_tag.find_all('td', {'valign':'top', 'width':'55%'})
        if len(date_tag) == 1:
            dt_str = date_tag[0].text
            date_time_obj = datetime.strptime(dt_str, '(%d/%m/%Y)')
            dates.append(date_time_obj.date())
        else:
            dates.append(np.nan)
        if len(id_tag) == 1:
            case_ids.append(id_tag[0].text)
            case_urls.append(id_tag[0].get("href").split("'")[1])
        else:
            case_ids.append(np.nan)
            case_urls.append(np.nan)
        if len(name_tag) == 1:
            names.append(name_tag[0].text.split('\xa0')[0])
        else:
            names.append(np.nan)
            
    print("At current level: {}, {} of documents were crawled.".format(current_level, len(case_urls)))
    
    items = [dates, case_ids, names, case_urls]
    
    # get all page links
    page_links = soup.find_all('a', {"class":"pagefont"})
    pages = driver.find_elements_by_xpath("//*[@class='pagefont']")
    page_list = []
    for page_link in page_links:
        page_list.append(int(page_link.get("href").split("'")[1]))
    page_nums = np.array(page_list)
    
    page_info = [page_nums, pages]
    
    fieldnames = ["Category", "Subcategory", "Year", "Date", "Case_ID", "Respondent_and_Appellant", "Document_URL"]
    if not os.path.isfile(file_name):
        with open(file_name, "w") as outcsv:
            writer = csv.DictWriter(outcsv, fieldnames = fieldnames)
            writer.writeheader()
            
    with open(file_name, 'a') as outcsv:
        writer = csv.DictWriter(outcsv, fieldnames = fieldnames)
        for idx in np.arange(len(dates)):
            line = {fieldnames[0]: last_cat, fieldnames[1]: last_subcat, fieldnames[2]: last_year, \
                    fieldnames[3]: dates[idx], \
                    fieldnames[4]: case_ids[idx], fieldnames[5]: names[idx], fieldnames[6]: case_urls[idx]}
            writer.writerow(line)
    return current_level, items, page_info
        

In [5]:
# web crawler using depth-first-search algorithm
def DFS_traverse(driver, current_url, visited_urls, visited_levels, file_name = "data.csv", save_log = False):
    '''
    Input: driver - (webdriver)
           current_url - (string)
           visited_urls - (set)
           visited_levels - (list) [category, subcategory, year]
           file_name - (string) output file where scraped content are stored
           save_log - (boolean) export visited url to a log file (for debug purpose)
    '''
    # prevent Chrome Webdriver from producing timeout in Selenium by refreshing
    # https://stackoverflow.com/questions/40514022/chrome-webdriver-produces-timeout-in-selenium
    try:
        driver.get(current_url)
    except TimeoutException as ex:
        driver.refresh()
    #driver.get(current_url)
    # get the page source
    page_source = driver.page_source
    # parse the HTML
    soup = BeautifulSoup(page_source, "html.parser")
    # find all targeted tags 
    item_list = soup.findAll("a", {"class": "ThemeXPRowAnchor"})
    # relevant links to be crawled starts from index 25
    urls = soup.find_all('a',href=True)[25:-1]
    all_urls = soup.find_all('a')[25:-1]
    # obtain list of current levels
    #current_levels = get_current_level(soup)
    # base case: at the base level (year), ThemeXPRowAnchor items are not empty
    if len(item_list) > 0:
        current_level = get_level_info(soup)
        # if current level has not been visited, proceed with crawling content from this page
        if current_level not in visited_levels:
            _, _, [page_nums, pages] = get_items(driver, file_name = file_name)
            curr_page = 1

            while(len(pages) > 0):
                next_page = curr_page + 1
                idx = np.where(page_nums == next_page)[0]
                if len(idx) == 1:
                    attempts = 0
                    while attempts < 3:
                        try:
                            # turn page
                            pages[idx[0]].click()
                            break
                        except TimeoutException as ex:
                            print("Refresh attempt: ", attempts)
                            attempts += 1
                            driver.refresh()
                    # save content on this page
                    _, _, [page_nums, pages] = get_items(driver, file_name = file_name)
                    curr_page += 1
                else:
                    break
        # mark current level as visitted
        visited_levels.append(current_level)
        return visited_levels
    
    # traverse to the next level until the content contains html tag of class "ThemeXPRowAnchor"
    for i, url in enumerate(urls):
        # clean html codes to a string of link
        url = url.get("href")
        if url not in visited_urls:
            # mark current url as visitted
            visited_urls.add(url)
            # for debug purpose, each url should be visited once
            if save_log is True:
                with open("log.txt", "a") as logfile:
                    logfile.write(url+'\n')
            #print("crawling data from url: {}".format(url))
            visited_levels = DFS_traverse(driver, url, visited_urls, visited_levels, save_log = save_log)

In [10]:
def main():
    # initialize a Chrome driver
    chrome_options = Options()
    chrome_options.add_argument("--dns-prefetch-disable")
    driver = webdriver.Chrome(options=chrome_options)
    # create an emty set to flag visitted urls
    visited_urls = set([])
    visited_levels = []
    level_list = []
    # initialize the seed node
    current_url = 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp'
    #current_url = 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=T&L1=CA&L2=CV&L3=2001&AR=3_18#A3_18'
    #current_url = 'https://legalref.judiciary.hk/lrs/common/ju/judgment.jsp?EX=&L1=CA&L2=AR&L3=2018&AR=1_1#A1_1'
    # traverse all urls using DFS
    DFS_traverse(driver, current_url, visited_urls, visited_levels, save_log = True)
    # close connection
    driver.close()
main()

At current level: ('District Court ', 'Civil Action', '2018'), 24 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2017'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2017'), 9 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2016'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2016'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2016'), 43 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2015'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2015'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2015'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2015'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2015'), 13 of do

At current level: ('District Court ', 'Civil Action', '2002'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2002'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2002'), 35 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 50 of documents were crawled.
At current level: ('District Court ', 'Civil Action', '2001'), 14 of d

At current level: ('District Court ', 'Distraint Case', '2014'), 4 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2013'), 2 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2012'), 3 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2011'), 2 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2010'), 5 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2009'), 5 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2008'), 7 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2007'), 1 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2006'), 5 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '2005'), 4 of documents were crawled.
At current level: ('District Court ', 'Distraint Case', '200

At current level: ('District Court ', "Employee's Compensation Case", '1988'), 5 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1987'), 3 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1985'), 1 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1984'), 9 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1983'), 4 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1982'), 2 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1981'), 2 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1980'), 5 of documents were crawled.
At current level: ('District Court ', "Employee's Compensation Case", '1979'), 3 of documents were crawled.
At current level: ('District

At current level: ('District Court ', 'Miscellaneous Proceedings', '1974'), 4 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1973'), 1 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1972'), 1 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1970'), 2 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1969'), 1 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1948'), 2 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1947'), 6 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1946'), 9 of documents were crawled.
At current level: ('District Court ', 'Miscellaneous Proceedings', '1945'), 2 of documents were crawled.
At current level: ('District Court ', 'Occupational Dea

At current level: ('Family Court ', 'Matrimonial Causes', '2014'), 12 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2013'), 50 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2013'), 50 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2013'), 18 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2012'), 50 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2012'), 50 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2012'), 11 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2011'), 50 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2011'), 33 of documents were crawled.
At current level: ('Family Court ', 'Matrimonial Causes', '2010'), 50 of documents were crawled.
At current level: ('Family Cou

At current level: ('Lands Tribunal ', 'Building Management Application', '2010'), 33 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2009'), 30 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2008'), 46 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2007'), 32 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2006'), 25 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2005'), 49 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2004'), 50 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2004'), 39 of documents were crawled.
At current level: ('Lands Tribunal ', 'Building Management Application', '2003'), 50 of documents were c

At current level: ('Lands Tribunal ', 'Land Resumption Application', '2008'), 2 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '2007'), 3 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '2006'), 16 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '2005'), 5 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '2004'), 4 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '2002'), 5 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '2001'), 2 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '2000'), 4 of documents were crawled.
At current level: ('Lands Tribunal ', 'Land Resumption Application', '1999'), 3 of documents were crawled.
At current level: ('Lands Tribunal '

At current level: ('Lands Tribunal ', 'Part I Possession Application', '1995'), 1 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part I Possession Application', '1986'), 2 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part I Possession Application', '1985'), 3 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part I Possession Application', '1983'), 1 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part I Possession Application', '1982'), 4 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part I Possession Application', '1981'), 1 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part II Possession Application', '2000'), 1 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part II Possession Application', '1998'), 1 of documents were crawled.
At current level: ('Lands Tribunal ', 'Part II Possession Application', '1997'), 2 of documents were crawled.
At current level

At current level: ('Lands Tribunal ', 'Rating Appeal', '1984'), 10 of documents were crawled.
At current level: ('Lands Tribunal ', 'Rating Appeal', '1983'), 50 of documents were crawled.
At current level: ('Lands Tribunal ', 'Rating Appeal', '1983'), 22 of documents were crawled.
At current level: ('Lands Tribunal ', 'Rating Appeal', '1982'), 1 of documents were crawled.
At current level: ('Lands Tribunal ', 'Rating Appeal', '1980'), 5 of documents were crawled.
At current level: ('Miscellaneous  ', "Coroner's Court Death Inquest", '2013'), 9 of documents were crawled.
At current level: ('Miscellaneous  ', "Eastern Magistrates' Court Charge Case", '2016'), 2 of documents were crawled.
At current level: ('Miscellaneous  ', "Eastern Magistrates' Court Charge Case", '2015'), 5 of documents were crawled.
At current level: ('Miscellaneous  ', "Eastern Magistrates' Court Charge Case", '2011'), 1 of documents were crawled.
At current level: ('Miscellaneous  ', "Eastern Magistrates' Court Sum

## Part II. Data Analysis

In [9]:
def process_name(s):
    s_ = re.split("v. | 對 | 訴", s)
    if len(s_) > 1:
        return s_[0].strip(), s_[1].strip()
    else:
        return s_[0].strip(), ''

In [6]:
df = pd.read_csv('data.csv')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88752 entries, 0 to 88751
Data columns (total 7 columns):
Category                    88752 non-null object
Subcategory                 88752 non-null object
Year                        88752 non-null int64
Date                        88559 non-null object
Case_ID                     88752 non-null object
Respondent_and_Appellant    88752 non-null object
Document_URL                88752 non-null object
dtypes: int64(1), object(6)
memory usage: 4.7+ MB


In [8]:
df.head()

Unnamed: 0,Category,Subcategory,Year,Date,Case_ID,Respondent_and_Appellant,Document_URL
0,Court of Final Appeal,Final Appeal (Civil),2018,2018-07-04,FACV1/2018,DIRECTOR OF IMMIGRATION v. QT,https://legalref.judiciary.hk/lrs/common/ju/ju...
1,Court of Final Appeal,Final Appeal (Civil),2018,2018-04-30,FACV1/2018,ABN AMRO BANK N.V. AND OTHERS v. QT,https://legalref.judiciary.hk/lrs/common/ju/ju...
2,Court of Final Appeal,Final Appeal (Civil),2018,2018-09-13,FACV2/2018,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...,https://legalref.judiciary.hk/lrs/common/ju/ju...
3,Court of Final Appeal,Final Appeal (Civil),2018,2018-07-11,FACV2/2018,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...,https://legalref.judiciary.hk/lrs/common/ju/ju...
4,Court of Final Appeal,Final Appeal (Civil),2018,2018-09-13,FACV3/2018,CHANG WA SHAN v. ESTHER CHAN PUI KWAN（陳佩君）al...,https://legalref.judiciary.hk/lrs/common/ju/ju...


In [10]:
A, B = [], []
for idx, text in enumerate(df['Respondent_and_Appellant']):
    name1, name2 = process_name(text)
    A.append(name1)
    B.append(name2)

In [11]:
df['Appellant'] = np.array(A)
df['Respondant'] = np.array(B)
df

Unnamed: 0,Category,Subcategory,Year,Date,Case_ID,Respondent_and_Appellant,Document_URL,Appellant,Respondant
0,Court of Final Appeal,Final Appeal (Civil),2018,2018-07-04,FACV1/2018,DIRECTOR OF IMMIGRATION v. QT,https://legalref.judiciary.hk/lrs/common/ju/ju...,DIRECTOR OF IMMIGRATION,QT
1,Court of Final Appeal,Final Appeal (Civil),2018,2018-04-30,FACV1/2018,ABN AMRO BANK N.V. AND OTHERS v. QT,https://legalref.judiciary.hk/lrs/common/ju/ju...,ABN AMRO BANK N.V. AND OTHERS,QT
2,Court of Final Appeal,Final Appeal (Civil),2018,2018-09-13,FACV2/2018,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...,https://legalref.judiciary.hk/lrs/common/ju/ju...,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...,CHANG WA SHAN
3,Court of Final Appeal,Final Appeal (Civil),2018,2018-07-11,FACV2/2018,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...,https://legalref.judiciary.hk/lrs/common/ju/ju...,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...,CHANG WA SHAN
4,Court of Final Appeal,Final Appeal (Civil),2018,2018-09-13,FACV3/2018,CHANG WA SHAN v. ESTHER CHAN PUI KWAN（陳佩君）al...,https://legalref.judiciary.hk/lrs/common/ju/ju...,CHANG WA SHAN,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...
5,Court of Final Appeal,Final Appeal (Civil),2018,2018-07-11,FACV3/2018,CHANG WA SHAN v. ESTHER CHAN PUI KWAN（陳佩君）al...,https://legalref.judiciary.hk/lrs/common/ju/ju...,CHANG WA SHAN,ESTHER CHAN PUI KWAN（陳佩君）also known as CHAN PU...
6,Court of Final Appeal,Final Appeal (Civil),2018,2018-07-06,FACV4/2018,DESIGNING HONG KONG LTD v. THE TOWN PLANNING B...,https://legalref.judiciary.hk/lrs/common/ju/ju...,DESIGNING HONG KONG LTD,THE TOWN PLANNING BOARD
7,Court of Final Appeal,Final Appeal (Civil),2018,2018-05-15,FACV4/2018,DESIGNING HONG KONG LTD v. THE TOWN PLANNING B...,https://legalref.judiciary.hk/lrs/common/ju/ju...,DESIGNING HONG KONG LTD,"THE TOWN PLANNING BOARD, SECRETARY FOR JUSTICE"
8,Court of Final Appeal,Final Appeal (Civil),2018,2018-10-12,FACV5/2018,SECURITIES AND FUTURES COMMISSION v. YIU HOI Y...,https://legalref.judiciary.hk/lrs/common/ju/ju...,SECURITIES AND FUTURES COMMISSION,YIU HOI YING CHARLES AND OTHERS
9,Court of Final Appeal,Final Appeal (Civil),2018,2018-10-11,FACV6/2018,MOODY’S INVESTORS SERVICE HONG KONG LIMITED v....,https://legalref.judiciary.hk/lrs/common/ju/ju...,MOODY’S INVESTORS SERVICE HONG KONG LIMITED,SECURITIES AND FUTURES COMMISSION


In [16]:
df_processed = df[df['Respondant'] != '']
df_unprocessed = df[df['Respondant'] == '']
process_rate = df_processed.shape[0]/df.shape[0]*100
print('There are {} of records where appellant/respondant are successfully identified, consisting of {} % of original data'.format(df_processed.shape[0], process_rate))

There are 83996 of records where appellant/respondant are successfully identified, consisting of 94.64124752118262 % of original data


In [39]:
appellant_freq = df_processed[['Case_ID', 'Appellant']].groupby(by = 'Appellant').count() \
                .sort_values(by = 'Case_ID', ascending = False).rename(columns={'Case_ID': 'Count'})
respondant_freq = df_processed[['Case_ID', 'Respondant']].groupby(by = 'Respondant').count() \
                .sort_values(by = 'Case_ID', ascending = False).rename(columns={'Case_ID': 'Count'})

### The top frequent applellants in lawsuit

In [40]:
appellant_freq.head(10)

Unnamed: 0_level_0,Count
Appellant,Unnamed: 1_level_1
HKSAR,9142
香港特別行政區,8048
THE QUEEN,3213
R.,1796
香港特別行政區政府,1247
SECRETARY FOR JUSTICE,614
ATTORNEY GENERAL,548
SECURITIES AND FUTURES COMMISSION,131
COMMISSIONER OF INLAND REVENUE,122
律政司司長,120


### The top frequent respondants/defendants in lawsuit

In [41]:
respondant_freq.head(10)

Unnamed: 0_level_0,Count
Respondant,Unnamed: 1_level_1
入境事務處處長,4621
THE QUEEN,2464
HKSAR,480
TORTURE CLAIMS APPEAL BOARD / NON-REFOULEMENT CLAIMS PETITION OFFICE,369
R.,277
SECRETARY FOR JUSTICE,266
DIRECTOR OF IMMIGRATION,262
COMMISSIONER OF RATING AND VALUATION,243
ATTORNEY GENERAL,237
DIRECTOR OF LANDS,159
