# Metric Plots

In [32]:
import pandas as pd
import ast

def read_csv_with_lists(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Iterate over each column
    for column in df.columns:
        # Check if the column contains strings that represent lists
        if df[column].dtype == object:
            try:
                # Convert the strings to lists using ast.literal_eval
                df[column] = df[column].apply(ast.literal_eval)
            except (ValueError, SyntaxError):
                # Skip the column if it cannot be converted to a list
                pass
    
    return df

In [33]:
partner_gold_df = read_csv_with_lists('data/gold_labels_with_files.csv')
partner_gold_df['file_number'] = partner_gold_df['file_number_gold_cleaned']#.columns
partner_gold_df.head(2)

Unnamed: 0,What is the file number of the case?,file_number_gold_cleaned,raw_file_text,raw_file_name,Timestamp,Email Address,What was the date of the hearing? [mm/dd/yyyy],What was the date of the decision? [mm/dd/yyyy],Who was the member adjudicating the decision?,What was the location of the landlord tenant board?,...,"If yes to the previous question, which of the following were applicable to the tenant?",Did the decision state the tenant was given prior notice for the eviction?,"If the tenant was given prior notice for the eviction, how much notice was given?",Did the decisions state postponement would result in the tenant accruing additional arrears?,Which other specific applications of the landlord or the tenant were mentioned?,Did the decision mention the validity of an N4 eviction notice?,Were there detail(s) in the decision not captured by this questionnaire that should be included?,Exec Review,Review Status,file_number
0,CEL-62600-16,CEL-62600-16,Metadata:\nDate:\t2017-01-18\nFile number:\t\n...,CEL-62600-16.txt,1/31/2021 22:26:19,jac.huang@mail.utoronto.ca,1/5/2017,1/18/2017,Avril Cardoso,Mississauga,...,,Yes,Not stated,No,L1: Application to Evict a Tenant for Non-paym...,No,,,,CEL-62600-16
1,CEL-62852-16,CEL-62852-16,Metadata:\nDate:\t2017-01-09\nFile number:\t\n...,CEL-62852-16.txt,1/31/2021 22:35:03,jac.huang@mail.utoronto.ca,12/14/2016,1/9/2017,Tiisetso Russell,Mississauga,...,"reliance on social assistance, children's scho...",Yes,Not stated,No,No other specific applications were mentioned,No,,,,CEL-62852-16


In [34]:
gold_df = read_csv_with_lists('data/outcome_extraction_testing.csv')
print(gold_df.value_counts('new_case_outcome', normalize = True))
# gold_df['new_hearing_date'] = partner_gold_df
gold_df.head(1)

new_case_outcome
No relief            0.639881
Relief               0.349702
Conditional Order    0.010417
dtype: float64


Unnamed: 0,raw_file_text,raw_file_name,full_cleaned,metadata,content,case_citation,file_number,language,year,ltb_location,decision_date,hearing_date,url,adjudicating_member,new_case_outcome
0,Metadata:\nDate:\t2017-01-18\nFile number:\t\n...,CEL-62600-16.txt,"[Metadata:, Date: 2017-01-18, File number:, CE...","[Date: 2017-01-18, File number:, CEL-62600-16,...",[Arrears Worksheet File Number: CEL-62600-16 T...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,English,2016,Mississauga,01/18/2017,01/30/2017,https://canlii.ca/t/gxq6n,Avril Cardoso,No relief


In [35]:
# Assuming "raw_file_name" is a column in both partner_gold_df and gold_df

# Perform inner join based on "raw_file_name" column
merged_df = gold_df.merge(partner_gold_df[["raw_file_name", "What was the date of the hearing? [mm/dd/yyyy]"]], on = "raw_file_name", how = "inner")

# Copy the values from column A of partner_gold_df to a new column in gold_df
gold_df["new_hearing_date"] = merged_df["What was the date of the hearing? [mm/dd/yyyy]"]
gold_df.head(1)

Unnamed: 0,raw_file_text,raw_file_name,full_cleaned,metadata,content,case_citation,file_number,language,year,ltb_location,decision_date,hearing_date,url,adjudicating_member,new_case_outcome,new_hearing_date
0,Metadata:\nDate:\t2017-01-18\nFile number:\t\n...,CEL-62600-16.txt,"[Metadata:, Date: 2017-01-18, File number:, CE...","[Date: 2017-01-18, File number:, CEL-62600-16,...",[Arrears Worksheet File Number: CEL-62600-16 T...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,English,2016,Mississauga,01/18/2017,01/30/2017,https://canlii.ca/t/gxq6n,Avril Cardoso,No relief,1/5/2017


In [36]:
# Assuming "file_number" is a column in both partner_gold_df and gold_df

# Perform inner join based on "file_number" column
merged_df = gold_df.merge(partner_gold_df[["raw_file_name", "What was the date of the decision? [mm/dd/yyyy]"]], on = "raw_file_name", how = "inner")

# Copy the values from column A of partner_gold_df to a new column in gold_df
gold_df["new_decision_date"] = merged_df["What was the date of the decision? [mm/dd/yyyy]"]
gold_df.head(1)

Unnamed: 0,raw_file_text,raw_file_name,full_cleaned,metadata,content,case_citation,file_number,language,year,ltb_location,decision_date,hearing_date,url,adjudicating_member,new_case_outcome,new_hearing_date,new_decision_date
0,Metadata:\nDate:\t2017-01-18\nFile number:\t\n...,CEL-62600-16.txt,"[Metadata:, Date: 2017-01-18, File number:, CE...","[Date: 2017-01-18, File number:, CEL-62600-16,...",[Arrears Worksheet File Number: CEL-62600-16 T...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,English,2016,Mississauga,01/18/2017,01/30/2017,https://canlii.ca/t/gxq6n,Avril Cardoso,No relief,1/5/2017,1/18/2017


# Creating `silver_df` for eval

In [37]:
silver_df = gold_df.drop(columns = [col for col in gold_df.columns if col not in ['full_cleaned', 'metadata', 'content']])
silver_df['metadata'] = silver_df["metadata"].apply(lambda x: " ".join(x))
silver_df['content'] = silver_df["content"].apply(lambda x: " ".join(x))
silver_df

Unnamed: 0,full_cleaned,metadata,content
0,"[Metadata:, Date: 2017-01-18, File number:, CE...",Date: 2017-01-18 File number: CEL-62600-16 CEL...,Arrears Worksheet File Number: CEL-62600-16 Ti...
1,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-62852-16 CEL...,Arrears Worksheet File Number: CEL-62852-16 Ti...
2,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-63024-16 CEL...,Arrears Worksheet File Number: CEL-63024-16 Ti...
3,"[Metadata:, Date: 2017-01-20, File number:, CE...",Date: 2017-01-20 File number: CEL-63056-16 CEL...,Arrears Worksheet File Number: CEL-63056-16 Ti...
4,"[Metadata:, Date: 2017-02-03, File number:, CE...",Date: 2017-02-03 File number: CEL-63193-16 CEL...,Arrears Worksheet File Number: CEL-63193-16 Ti...
...,...,...,...
667,"[Metadata:, Date: 2018-12-13, File number:, TS...",Date: 2018-12-13 File number: TSL-98918-18-RV ...,Order under Section 21.2 of the Statutory Powe...
668,"[Metadata:, Date: 2018-11-23, File number:, TS...",Date: 2018-11-23 File number: TSL-99691-18 TSL...,Order under Section 69 Residential Tenancies A...
669,"[Metadata:, Date: 2018-11-29, File number:, TS...",Date: 2018-11-29 File number: TSL-99824-18 TSL...,Order under Section 69 Residential Tenancies A...
670,"[Metadata:, Date: 2018-12-12, File number:, TS...",Date: 2018-12-12 File number: TSL-99900-18 TSL...,Order under Section 69 Residential Tenancies A...


# File Number + Citation

In [38]:
import re

def get_case_citation(metadata_list):
    """
    Extracts the case citation from a list of metadata lines.

    This function searches through the metadata lines for a line containing "Citation:" or "Référence:"
    and extracts the citation information from that line.

    Parameters
    ----------
    metadata_list : list of str
        A list of metadata lines.

    Returns
    -------
    str or None
        The extracted case citation, or None if no citation is found.

    Examples
    --------
    >>> metadata = ["Title: Example Case", "Citation: ABC123 (LTB)"]
    >>> get_case_citation(metadata)
    'ABC123 (LTB)'

    >>> metadata = ["Title: Another Case", "Référence: XYZ789 (LTB)"]
    >>> get_case_citation(metadata)
    'XYZ789 (LTB)'
    """
    if isinstance(metadata_list, str):
        metadata_list = metadata_list.split("\n")

    for line in metadata_list:
        if "Citation:" in line:
            citation_start = line.find("Citation: ")
            citation_end = line.find("LTB)") + 4
            return line[citation_start:citation_end].replace("Citation: ", "").strip()
        elif "Référence: " in line:
            citation_start = line.find("Référence: ")
            citation_end = line.find("LTB)") + 4
            return line[citation_start:citation_end].replace("Référence: ", "").strip()
    return None

def get_file_number(metadata_list):
    """
    Extracts the file number from a list of metadata lines.

    This function concatenates the metadata lines into a single string and extracts the file number
    from that string. The file number is obtained either after "File number:" or "Numéro de dossier:".

    Parameters
    ----------
    metadata_list : list of str
        A list of metadata lines.

    Returns
    -------
    str or None
        The extracted file number, or None if no file number is found.

    Examples
    --------
    >>> metadata = ["File number: TNL-10001-18", "Citation: ABC123 (LTB)"]
    >>> get_file_number(metadata)
    'TNL-10001-18'

    >>> metadata = ["Numéro de dossier: XYZ789", "Référence: DEF456 (LTB)"]
    >>> get_file_number(metadata)
    'XYZ789'
    """
    if isinstance(metadata_list, list):
        metadata_str = " ".join(metadata_list)
    else:
        metadata_str = metadata_list

    if "Citation: " in metadata_str:
        file_nums = metadata_str[metadata_str.find("File number: ") + len("File number: ") : metadata_str.find("Citation:")].strip()
    elif "Référence: " in metadata_str:
        file_nums = metadata_str[metadata_str.find("Numéro de dossier: ") + len("Numéro de dossier: ") : metadata_str.find("Référence")].strip()

    if len(file_nums) == 0:
        return None

    file_nums = file_nums.replace(";", " ")

    file_num = list(set(file_nums.split()))
    file_num = ";".join(file_num)
    file_num = re.sub(r'[^\w\s]$', '', file_num)

    if ";" in file_num:
        file_num = list(set(file_num.split(";")))
        file_num = [re.sub(r'[\(\)]', '', num) for num in file_num]
        file_num = ";".join(file_num)

    file_num = re.sub(r'[\(\)]', '', file_num)

    return file_num

In [39]:
for row in silver_df.index:
    silver_df.loc[row, 'citation'] = get_case_citation(silver_df.loc[row, 'metadata'])
    silver_df.loc[row, 'file_number'] = get_file_number(silver_df.loc[row, 'metadata'])

silver_df

Unnamed: 0,full_cleaned,metadata,content,citation,file_number
0,"[Metadata:, Date: 2017-01-18, File number:, CE...",Date: 2017-01-18 File number: CEL-62600-16 CEL...,Arrears Worksheet File Number: CEL-62600-16 Ti...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16
1,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-62852-16 CEL...,Arrears Worksheet File Number: CEL-62852-16 Ti...,"CEL-62852-16 (Re), 2017 CanLII 9535 (ON LTB)",CEL-62852-16
2,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-63024-16 CEL...,Arrears Worksheet File Number: CEL-63024-16 Ti...,"CEL-63024-16 (Re), 2017 CanLII 9543 (ON LTB)",CEL-63024-16
3,"[Metadata:, Date: 2017-01-20, File number:, CE...",Date: 2017-01-20 File number: CEL-63056-16 CEL...,Arrears Worksheet File Number: CEL-63056-16 Ti...,"CEL-63056-16 (Re), 2017 CanLII 9537 (ON LTB)",CEL-63056-16
4,"[Metadata:, Date: 2017-02-03, File number:, CE...",Date: 2017-02-03 File number: CEL-63193-16 CEL...,Arrears Worksheet File Number: CEL-63193-16 Ti...,"CEL-63193-16 (Re), 2017 CanLII 30828 (ON LTB)",CEL-63193-16
...,...,...,...,...,...
667,"[Metadata:, Date: 2018-12-13, File number:, TS...",Date: 2018-12-13 File number: TSL-98918-18-RV ...,Order under Section 21.2 of the Statutory Powe...,"TSL-98918-18-RV (Re), 2018 CanLII 141679 (ON LTB)",TSL-98918-18-RV
668,"[Metadata:, Date: 2018-11-23, File number:, TS...",Date: 2018-11-23 File number: TSL-99691-18 TSL...,Order under Section 69 Residential Tenancies A...,"TSL-99691-18 (Re), 2018 CanLII 141675 (ON LTB)",TSL-99691-18
669,"[Metadata:, Date: 2018-11-29, File number:, TS...",Date: 2018-11-29 File number: TSL-99824-18 TSL...,Order under Section 69 Residential Tenancies A...,"TSL-99824-18 (Re), 2018 CanLII 141673 (ON LTB)",TSL-99824-18
670,"[Metadata:, Date: 2018-12-12, File number:, TS...",Date: 2018-12-12 File number: TSL-99900-18 TSL...,Order under Section 69 Residential Tenancies A...,"TSL-99900-18 (Re), 2018 CanLII 140403 (ON LTB)",TSL-99900-18


In [40]:
from sklearn.metrics import classification_report, f1_score, accuracy_score, precision_score, recall_score
from jellyfish import jaro_winkler_similarity
import plotly.express as px

In [41]:
def eval(y_pred: list, y_true: list, metric: str = "accuracy"):

    assert len(y_true) == len(y_pred), f"The two lists must be the same length.\n\tlen(y_pred) == {len(y_pred)}, len(y_true) == {len(y_true)}"

    if metric == "accuracy": # accuracy
        return(accuracy_score(y_true = y_true, y_pred = y_pred))

    if metric == "precision": # recall
        return(precision_score(y_true = y_true, y_pred = y_pred, average = "weighted", zero_division = 1))

    if metric == "recall": # recall
        return(recall_score(y_true = y_true, y_pred = y_pred, average = "weighted", zero_division = 1))

    if metric == "f1 score": # f1 score
        return(f1_score(y_true = y_true, y_pred = y_pred, average = "weighted"))

    if metric == "cr": # classification report
        return(classification_report(y_true = y_true, y_pred = y_pred))

    if metric == "Jaro-Winkler Similarity": # Jaro-Winkler Similarity similarity with 0.7 threshold
        total_above_thresh = 0
        threshold = 0.7
        for true, pred in zip(y_true, y_pred):
            jw_score = jaro_winkler_similarity(true, pred)
            if jw_score >= threshold:
                total_above_thresh += 1
        return(total_above_thresh / len(y_true)) # number above threshold

In [42]:
print(gold_df.columns)
print(silver_df.columns)

Index(['raw_file_text', 'raw_file_name', 'full_cleaned', 'metadata', 'content',
       'case_citation', 'file_number', 'language', 'year', 'ltb_location',
       'decision_date', 'hearing_date', 'url', 'adjudicating_member',
       'new_case_outcome', 'new_hearing_date', 'new_decision_date'],
      dtype='object')
Index(['full_cleaned', 'metadata', 'content', 'citation', 'file_number'], dtype='object')


In [43]:
results = {}
results['extraction'] = []
results['metric'] = []
results['score'] = []

In [44]:
for metric in ["accuracy", "precision", "recall", "f1 score", "Jaro-Winkler Similarity"]:
    results["extraction"].append("file number")
    results['metric'].append(metric)
    results['score'].append(eval(y_pred = silver_df['file_number'], y_true = gold_df['file_number'], metric = metric))

# pd.DataFrame(results)

# Create the bar chart using Plotly Express
fig = px.bar(pd.DataFrame(results),
             x = 'metric',
             y = 'score',
             title = 'Extraction Metrics',
             color = "extraction")

# Display the chart
fig.show()

# LTB Location

In [45]:
import re

def find_all_positions(text: str, keyword: str):
    """
    Finds all positions of a keyword in a given text.

    This function searches for a keyword in a given text and returns a list of positions where the keyword is found.

    Parameters
    ----------
    text : str
        The text to search within.
    keyword : str
        The keyword to find in the text.

    Returns
    -------
    list
        A list of integers representing the positions of the keyword in the text.

    Examples
    --------
    >>> find_all_positions("This is an example sentence.", "example")
    [11]
    """
    positions = []
    start = 0
    while True:
        index = text.find(keyword, start)
        if index == -1:
            break
        positions.append(index)
        start = index + 1
    return positions

def get_postal_code(text: str):
    """
    Finds a postal code in the format "L4Z2G5" within the given text.

    Args:
        text (str): The input text to search for a postal code.

    Returns:
        str: The postal code found in the text. Returns an empty string if no postal code is found.

    Examples:
        >>> find_postal_code("This is a sample text with a postal code L4Z2G5.")
        "L4Z2G5"
    """

    pattern = r"\b[A-Za-z]\d[A-Za-z]\d[A-Za-z]\d\b"
    match = re.search(pattern, text)

    if match:
        return match.group()
    else:
        return None

def find_closest_subset(text: str, keywords: list):
    """
    Finds a subset of the given text where a date and any of the given keywords appear with the smallest distance between them,
    but only if the subset appears before the word "determination" in the lowercase text and does not contain the word "member".

    Args:
        text (str): The input text to search for the subset.
        keywords (list): The list of keywords to search for.

    Returns:
        tuple: A tuple containing the subset of the text where the date and keyword appear with the smallest distance between them,
               and the corresponding keyword. Returns an empty string and None if no match is found or if the subset appears after "determination"
               or contains the word "member".

    Examples:
        >>> find_closest_subset("The event will take place on April 23, 2018. The application was heard on April 25, 2018.", ["heard", "event"])
        ("The event will take place on April 23, 2018.", "event")

    """

    pattern = r"\b[A-Z][a-z]+ \d{1,2}, \d{4}\b"
    date_matches = re.findall(pattern, text)
    keyword_positions = [(m.start(), m.end(), keyword) for keyword in keywords for m in re.finditer(keyword, text)]

    if not date_matches or not keyword_positions:
        return "", None

    smallest_distance = float('inf')
    best_subset = ""
    best_keyword = None
    
    for date in date_matches:
        for start, end, keyword in keyword_positions:
            distance = abs(start - text.find(date))
            subset = text[min(start, text.find(date)): max(end, text.find(date))]

            if distance < smallest_distance and text.lower().find(best_subset.lower()) < (text.lower().find("determination") or text.lower().find("it is determinatined that")) and ("member" or "with the request to review") not in subset.lower():
                smallest_distance = distance
                best_subset = subset
                best_keyword = keyword

    if text.lower().find(best_subset.lower()) >= text.lower().find("determination") or "member" in best_subset.lower():
        return "", None

    return best_subset, best_keyword


def get_ltb_location_by_postal_code(case_content_str: str):
    """
    Helps to extract the location information from the given case content string using postal code lookup.

    Args:
        case_content_str (str): The case content string to extract the location from.

    Returns:
        str or None: Subset of text from the passed case string wherein the location appears near the postal code.

    Examples:
        >>> get_ltb_location_by_postal_code("The application was heard at L4Z 2G5.")
        "Mississauga"
    """

    # if there isn't a postal code, return None right away
    if not get_postal_code(case_content_str):
        return None

    pc_idx = case_content_str.find(get_postal_code(case_content_str))
    subset = case_content_str[pc_idx - 30 : pc_idx]

    if "ON" in subset:
        subset = subset.split("ON")[:-1]
    elif "Ontario" in subset:
        subset = subset.split("Ontario")[:-1]

    subset = " ".join(subset)
    
    if "floor" in subset.lower():
        floor_idx = subset.lower().find("floor")
        # print(floor_idx)
        subset = subset[floor_idx + len("floor") :].strip()
    
    return subset

def get_ltb_location(case_content_str: str):
    """
    Extracts the location information from the given case content string.

    Args:
        case_content_str (str): The case content string to extract the location from.

    Returns:
        str or None: The extracted location information if found, otherwise None.

    Examples:
        >>> get_ltb_location("The application was heard in Newmarket.")
        "Newmarket"
    """

    keywords = ["application was heard", "applications were heard", "was heard", "were heard together",
                "was held", "set to be heard",
                # "heard by telephone", "heard by teleconference", "heard via teleconference",
                "heard by", "heard by", "heard via",
                "motion were heard", "motion was heard", "came before the board in",
                "was then heard in", "were then heard in"]

    subset, keyword = find_closest_subset(text = case_content_str, keywords = keywords)

    if subset:
        subset = subset.replace(keyword, "")
        subset = subset.split()
        subset = [tok for tok in subset if tok not in ['in', 'on', 'via', 'together', 'by']]
        subset = " ".join(subset).strip()
        subset = subset.replace("With The Request To Review", "")

    if subset: # sometimes the hearing location is redacted and replaced with [CITY]
        if str(subset) != "[CITY]":
            return subset.title().replace("And Avenue, Unit 2 ", "").strip()

    # otherwise, go by postal code
    subset = get_ltb_location_by_postal_code(case_content_str = case_content_str)
    if subset:
        return subset.title().replace("And Avenue, Unit 2 ", "").strip()
    else:
        return None

In [46]:
for row in silver_df.index:
    silver_df.loc[row, 'ltb_location'] = get_ltb_location(silver_df.loc[row, 'content'])

silver_df.head(1)

Unnamed: 0,full_cleaned,metadata,content,citation,file_number,ltb_location
0,"[Metadata:, Date: 2017-01-18, File number:, CE...",Date: 2017-01-18 File number: CEL-62600-16 CEL...,Arrears Worksheet File Number: CEL-62600-16 Ti...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,Mississauga


In [47]:
print(gold_df.columns)
print(silver_df.columns)

Index(['raw_file_text', 'raw_file_name', 'full_cleaned', 'metadata', 'content',
       'case_citation', 'file_number', 'language', 'year', 'ltb_location',
       'decision_date', 'hearing_date', 'url', 'adjudicating_member',
       'new_case_outcome', 'new_hearing_date', 'new_decision_date'],
      dtype='object')
Index(['full_cleaned', 'metadata', 'content', 'citation', 'file_number',
       'ltb_location'],
      dtype='object')


In [48]:
gold_df['ltb_location'] = gold_df['ltb_location'].apply(lambda x: str(x))
silver_df['ltb_location'] = silver_df['ltb_location'].apply(lambda x: str(x))

# results

for metric in ["accuracy", "precision", "recall", "f1 score", "Jaro-Winkler Similarity"]:
    results["extraction"].append("ltb location")
    results['metric'].append(metric)
    results['score'].append(eval(y_pred = silver_df['ltb_location'], y_true = gold_df['ltb_location'], metric = metric))

# results

# pd.DataFrame(results)

# Create the bar chart using Plotly Express
fig = px.bar(pd.DataFrame(results),
             x = 'metric',
             y = 'score',
             title = 'Extraction Metrics',
             color = "extraction",
             barmode  = 'group')

# Display the chart
fig.show()

# Hearing Date

In [49]:
def find_date(text: str):
    """
    Finds a date in the format "Month Day, Year" within the given text.

    Args:
        text (str): The input text to search for a date.

    Returns:
        str: The date found in the text. Returns an empty string if no date is found.

    Examples:
        >>> find_date("The event will take place on April 23, 2018.")
        "April 23, 2018"
    """

    pattern = r"\b[A-Z][a-z]+ \d{1,2}, \d{4}\b"
    match = re.search(pattern, text)

    if match:
        return match.group()
    else:
        return ""

def get_hearing_date(case_content_str: str):
    """
    Extracts the hearing date from the given case content string.

    Args:
        case_content_str (str): The case content string to extract the hearing date from.

    Returns:
        str or None: The extracted hearing date in the format "Month Day, Year" if found, otherwise None.

    Examples:
        >>> get_hearing_date("The application was heard on April 23, 2018. It is determined that...")
        "April 23, 2018"
    """

    for keyword in ["determinations:", "it is determined"]:
        if keyword in case_content_str.lower():
            kw_idx = case_content_str.find(keyword)
            break
        else:
            kw_idx = -1

    subset = case_content_str[case_content_str.lower().find("application") : kw_idx].strip()
    date = find_date(subset)

    if date:
        return date.strip()
        
    # otherwise return None
    return None

In [50]:
from datetime import datetime

def convert_date_format(date_str):
    try:
        # Parse the date
        date = datetime.strptime(date_str, "%B %d, %Y")
    except ValueError:
        # If parsing fails, try parsing with abbreviated month name
        date = datetime.strptime(date_str, "%b %d, %Y")

    # Convert to new format
    month = date.strftime("%m").lstrip('0')
    day = date.strftime("%d").lstrip('0')
    year = date.strftime("%Y")

    new_format_date = f"{month}/{day}/{year}"

    return new_format_date

# Usage
print(convert_date_format("February 13, 2018"))  # Returns: 2/13/2018
print(convert_date_format("Aug 1, 2018"))  # Returns: 8/1/2018

2/13/2018
8/1/2018


In [51]:
for row in silver_df.itertuples():

    try:
        silver_df.at[row.Index, 'hearing_date'] = convert_date_format(get_hearing_date(silver_df.loc[row.Index, 'content']))
    except Exception as any_error:
        silver_df.at[row.Index, 'hearing_date'] = "HEARING DATE NOT FOUND"

silver_df.head()

Unnamed: 0,full_cleaned,metadata,content,citation,file_number,ltb_location,hearing_date
0,"[Metadata:, Date: 2017-01-18, File number:, CE...",Date: 2017-01-18 File number: CEL-62600-16 CEL...,Arrears Worksheet File Number: CEL-62600-16 Ti...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,Mississauga,1/5/2017
1,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-62852-16 CEL...,Arrears Worksheet File Number: CEL-62852-16 Ti...,"CEL-62852-16 (Re), 2017 CanLII 9535 (ON LTB)",CEL-62852-16,Mississauga,12/14/2016
2,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-63024-16 CEL...,Arrears Worksheet File Number: CEL-63024-16 Ti...,"CEL-63024-16 (Re), 2017 CanLII 9543 (ON LTB)",CEL-63024-16,Mississauga,1/4/2017
3,"[Metadata:, Date: 2017-01-20, File number:, CE...",Date: 2017-01-20 File number: CEL-63056-16 CEL...,Arrears Worksheet File Number: CEL-63056-16 Ti...,"CEL-63056-16 (Re), 2017 CanLII 9537 (ON LTB)",CEL-63056-16,Mississauga,1/4/2017
4,"[Metadata:, Date: 2017-02-03, File number:, CE...",Date: 2017-02-03 File number: CEL-63193-16 CEL...,Arrears Worksheet File Number: CEL-63193-16 Ti...,"CEL-63193-16 (Re), 2017 CanLII 30828 (ON LTB)",CEL-63193-16,Mississauga,1/10/2017


In [52]:
gold_df['ltb_location'] = gold_df['ltb_location'].apply(lambda x: str(x))
silver_df['ltb_location'] = silver_df['ltb_location'].apply(lambda x: str(x))

# results

for metric in ["accuracy", "precision", "recall", "f1 score", "Jaro-Winkler Similarity"]:
    results["extraction"].append("hearing date")
    results['metric'].append(metric)
    results['score'].append(eval(y_pred = silver_df['hearing_date'], y_true = gold_df['new_hearing_date'], metric = metric))

results

# pd.DataFrame(results)

# Create the bar chart using Plotly Express
fig = px.bar(pd.DataFrame(results),
             x = 'metric',
             y = 'score',
             title = 'Extraction Metrics',
             color = "extraction",
             barmode  = 'group')

# Display the chart
fig.show()

# Decision Date

In [53]:
import re
from dateutil.parser import parse
import spacy
nlp = spacy.load("en_core_web_sm")

def find_date(text: str):
    """
    Finds a date in the format "Month Day, Year" within the given text.

    Args:
        text (str): The input text to search for a date.

    Returns:
        str: The date found in the text. Returns an empty string if no date is found.

    Examples:
        >>> find_date("The event will take place on April 23, 2018.")
        "April 23, 2018"
    """

    pattern = r"\b[A-Z][a-z]+ \d{1,2}, \d{4}\b"
    match = re.search(pattern, text)

    if match:
        return match.group()
    else:
        return ""

def extract_date(text, nlp = nlp):
    """
    Extracts a date from a string of text using spaCy's entity recognition.

    Args:
        text (str): The text to extract the date from.

    Returns:
        str: The extracted date string, or an empty string if no date is found.

    Examples:
        >>> extract_date("The event will take place on April 23, 2018.")
        "April 23, 2018"
    """

    doc = nlp(text)

    for entity in doc.ents:
        if entity.label_ == "DATE":
            return entity.text

    return ""

def convert_date(date_str):
    """
    Parses a date string in any format and converts it to the format "Month Day, Year".

    Args:
        date_str (str): The date string to parse.

    Returns:
        str: The parsed date string in the format "Month Day, Year", or an empty string if parsing fails.

    Examples:
        >>> convert_date("2022-05-31")
        "May 31, 2022"

        >>> convert_date("05/31/2018")
        "May 31, 2018"
    """

    try:
        parsed_date = parse(date_str)
        formatted_date = parsed_date.strftime("%B %d, %Y")
        return formatted_date
    except ValueError:
        return ""

def get_decision_date(case_content_str: str):
    """
    Extracts the decision date from the given case content string.

    Args:
        case_content_str (str): The case content string to extract the decision date from.

    Returns:
        str or None: The extracted decision date in the format "Month Day, Year" if found, otherwise None.

    Examples:
        >>> get_decision_date("The date order issued on April 23, 2018 states...")
        "April 23, 2018"
    """

    # intentionally searches these in this order. Any amendment would be the most recent date
    for keyword in ['date order amended', 'date issued', 'date order issued']: 
        if keyword in case_content_str.lower():
            di_idx = case_content_str.lower().find(keyword)
            subset = case_content_str[di_idx - 18 : di_idx].strip().split(". ")[-1]
            return subset.strip()
    
    else:
        if "date" in case_content_str.lower()[: 500]:
            subset = case_content_str[: 500]
            date_idx = case_content_str.lower().find('date')
            subset = case_content_str[date_idx + len('date') : date_idx + len('date') + 50].strip()
            subset = extract_date(subset).strip()
            return convert_date(subset).strip()
    
    # otherwise return None
    return None

In [54]:
from datetime import datetime

def convert_date_format(date_str):
    try:
        # Parse the date
        date = datetime.strptime(date_str, "%B %d, %Y")
    except ValueError:
        # If parsing fails, try parsing with abbreviated month name
        date = datetime.strptime(date_str, "%b %d, %Y")

    # Convert to new format
    month = date.strftime("%m").lstrip('0')
    day = date.strftime("%d").lstrip('0')
    year = date.strftime("%Y")

    new_format_date = f"{month}/{day}/{year}"

    return new_format_date

# Usage
print(convert_date_format("February 13, 2018"))  # Returns: 2/13/2018
print(convert_date_format("Aug 1, 2018"))  # Returns: 8/1/2018

2/13/2018
8/1/2018


In [55]:
for row in silver_df.itertuples():

    try:
        silver_df.at[row.Index, 'decision_date'] = convert_date_format(get_decision_date(silver_df.loc[row.Index, 'content']))
    except Exception as any_error:
        silver_df.at[row.Index, 'decision_date'] = "DECISION DATE NOT FOUND"

silver_df.head()

Unnamed: 0,full_cleaned,metadata,content,citation,file_number,ltb_location,hearing_date,decision_date
0,"[Metadata:, Date: 2017-01-18, File number:, CE...",Date: 2017-01-18 File number: CEL-62600-16 CEL...,Arrears Worksheet File Number: CEL-62600-16 Ti...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,Mississauga,1/5/2017,1/18/2017
1,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-62852-16 CEL...,Arrears Worksheet File Number: CEL-62852-16 Ti...,"CEL-62852-16 (Re), 2017 CanLII 9535 (ON LTB)",CEL-62852-16,Mississauga,12/14/2016,1/9/2017
2,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-63024-16 CEL...,Arrears Worksheet File Number: CEL-63024-16 Ti...,"CEL-63024-16 (Re), 2017 CanLII 9543 (ON LTB)",CEL-63024-16,Mississauga,1/4/2017,1/9/2017
3,"[Metadata:, Date: 2017-01-20, File number:, CE...",Date: 2017-01-20 File number: CEL-63056-16 CEL...,Arrears Worksheet File Number: CEL-63056-16 Ti...,"CEL-63056-16 (Re), 2017 CanLII 9537 (ON LTB)",CEL-63056-16,Mississauga,1/4/2017,1/9/2017
4,"[Metadata:, Date: 2017-02-03, File number:, CE...",Date: 2017-02-03 File number: CEL-63193-16 CEL...,Arrears Worksheet File Number: CEL-63193-16 Ti...,"CEL-63193-16 (Re), 2017 CanLII 30828 (ON LTB)",CEL-63193-16,Mississauga,1/10/2017,2/3/2017


In [56]:
# gold_df['ltb_location'] = gold_df['ltb_location'].apply(lambda x: str(x))
# silver_df['ltb_location'] = silver_df['ltb_location'].apply(lambda x: str(x))

# results

for metric in ["accuracy", "precision", "recall", "f1 score", "Jaro-Winkler Similarity"]:
    results["extraction"].append("decision date")
    results['metric'].append(metric)
    results['score'].append(eval(y_pred = silver_df['decision_date'], y_true = gold_df['new_decision_date'], metric = metric))

# results

pd.DataFrame(results)

# Create the bar chart using Plotly Express
fig = px.bar(pd.DataFrame(results),
             x = 'extraction',
             y = 'score',
             title = 'Extraction Metrics',
             color = "metric",
             barmode  = 'group')

# Display the chart
fig.show()

# Adjudicating Member

In [57]:
def get_adj_member(case_content_str: str):
    """
    Retrieves the adjudicating member(s) mentioned in the given case content string.

    Args:
        case_content_str (str): The input string containing the case content.

    Returns:
        str: The adjudicating member(s) mentioned in the case content. If no adjudicating member is found, returns "nan".

    Examples:
        >>> get_adjudicating_member("This is the entire case file. There are sentences and other text.")
        "Name of Adjudicating Member"

    Notes:
        The function looks for specific keywords in the `case_content_str` to identify the adjudicating member(s).
        The keywords are evaluated in the following order: "date issued", "date of reasons", and "date order issued".
        If multiple instances of the same keyword are found, the function extracts the adjacent text and processes it to retrieve the member(s).
        If only one instance of the keyword is found, the function extracts the adjacent text and processes it to retrieve the member(s).
        If no adjudicating member is found, the function returns "nan".

    Raises:
        TypeError: If `case_content_str` is not a string.

    """

    keyword_1 = "date issued" # this is the most reliable one
    keyword_2 = "date of reasons" # first fallback
    keyword_3 = "date order issued" # second fallback

    # find which is best for the case (in order of best option to worst option)
    if keyword_1 in case_content_str.lower():
        keyword = keyword_1
        # kw_idx = case_content_str.lower().find(keyword_1)
    
    elif keyword_2 in case_content_str.lower():
        keyword = keyword_2
        # kw_idx = case_content_str.lower().find(keyword_2)

    elif keyword_3 in case_content_str.lower():
        keyword = keyword_3
        # kw_idx = case_content_str.lower().find(keyword_3)

    # if nothing is found, better to return nothing than to return something clearly incorrect
    if not keyword:
        return "nan"
    
    # getting index of whichever keyword was found first
    kw_idxs = find_all_positions(text = case_content_str.lower(), keyword = keyword)
    
    
    ### If there are multiple members found ###

    if len(kw_idxs) > 1:

        adj_membs = []

        for kw_idx in kw_idxs:
                
            subset = case_content_str[kw_idx + len(keyword): kw_idx + 100] # subsetting to an arbitrary distance after the keyword location
            subset = subset.split(", ")[0].strip()

            # removing "member" if applicable
            if "member" in subset.lower():
                memb_idx = subset.lower().find("member")
                subset = subset[: memb_idx].strip()

            # removing "vice chair" if applicable
            if "vice chair" in subset.lower():
                memb_idx = subset.lower().find("vice chair")
                subset = subset[: memb_idx].strip()

            # removing "vice chair" if applicable
            if "vice-chair" in subset.lower():
                memb_idx = subset.lower().find("vice-chair")
                subset = subset[: memb_idx].strip()

            # return subset
            adj_membs.append(subset)

        return ", ".join(list(set([memb for memb in adj_membs if memb != ""]))) # removing empty and duplicate items
    
    ### If there's only one member found ###

    kw_idx = case_content_str.lower().find(keyword)

    subset = case_content_str[kw_idx + len(keyword): kw_idx + 100] # subsetting to an arbitrary distance after the keyword location
    subset = subset.split(", ")[0].strip()

    # removing "member" if applicable
    if "member" in subset.lower():
        memb_idx = subset.lower().find("member")
        subset = subset[: memb_idx].strip()

    # removing "vice chair" if applicable
    if "vice chair" in subset.lower():
        memb_idx = subset.lower().find("vice chair")
        subset = subset[: memb_idx].strip()

    # removing "vice chair" if applicable
    if "vice-chair" in subset.lower():
        memb_idx = subset.lower().find("vice-chair")
        subset = subset[: memb_idx].strip()

    return subset

In [58]:
for row in silver_df.itertuples():

    try:
        silver_df.at[row.Index, 'adjudicating_member'] = get_adj_member(silver_df.loc[row.Index, 'content']).replace("Vice Chair", "").replace("Vice-Chair", "").strip()
    except Exception as any_error:
        silver_df.at[row.Index, 'adjudicating_member'] = "MEMBER NOT FOUND"

silver_df.head(1)

Unnamed: 0,full_cleaned,metadata,content,citation,file_number,ltb_location,hearing_date,decision_date,adjudicating_member
0,"[Metadata:, Date: 2017-01-18, File number:, CE...",Date: 2017-01-18 File number: CEL-62600-16 CEL...,Arrears Worksheet File Number: CEL-62600-16 Ti...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,Mississauga,1/5/2017,1/18/2017,Avril Cardoso


In [59]:
gold_df['adjudicating_member'] = gold_df['adjudicating_member'].apply(lambda x: str(x))
silver_df['adjudicating_member'] = silver_df['adjudicating_member'].apply(lambda x: str(x))

# for metric in ["accuracy", "precision", "recall", "f1 score", "Jaro-Winkler Similarity"]:
#     results["extraction"].append("adjudicating member")
#     results['metric'].append(metric)
#     results['score'].append(eval(y_pred = silver_df['adjudicating_member'], y_true = gold_df['adjudicating_member'], metric = metric))

# results

pd.DataFrame(results)

Unnamed: 0,extraction,metric,score
0,file number,accuracy,0.96875
1,file number,precision,1.0
2,file number,recall,0.96875
3,file number,f1 score,0.96875
4,file number,Jaro-Winkler Similarity,0.997024
5,ltb location,accuracy,0.952381
6,ltb location,precision,0.982185
7,ltb location,recall,0.952381
8,ltb location,f1 score,0.951221
9,ltb location,Jaro-Winkler Similarity,0.953869


In [60]:
final_results_df = pd.DataFrame(results)
final_results_df["Extraction"] = final_results_df["extraction"]
final_results_df["Metric"] = final_results_df["metric"]
final_results_df["Score"] = final_results_df["score"]

final_results_df["Extraction"] = final_results_df["extraction"].apply(lambda x: x.title())
final_results_df["Metric"] = final_results_df["metric"].apply(lambda x: x.title())
final_results_df["Score"] = final_results_df["score"].apply(lambda x: round(x * 100, 2))

final_results_df = final_results_df.drop(columns = ['extraction', 'metric', 'score'])
final_results_df

Unnamed: 0,Extraction,Metric,Score
0,File Number,Accuracy,96.88
1,File Number,Precision,100.0
2,File Number,Recall,96.88
3,File Number,F1 Score,96.88
4,File Number,Jaro-Winkler Similarity,99.7
5,Ltb Location,Accuracy,95.24
6,Ltb Location,Precision,98.22
7,Ltb Location,Recall,95.24
8,Ltb Location,F1 Score,95.12
9,Ltb Location,Jaro-Winkler Similarity,95.39


In [63]:
color_palette = [
    "#f79256",
    "#fbd1a2",
    "#7dcfb6",
    "#00b2ca",
    # "#736FF6", # darker purple
    '#B2B1F6', # lighter purple
    #"#1d4e89"] # my own custom
    ]

color_palette = [
    "#f0f9e8",
    "#bae4bc",
    "#7bccc4",
    "#43a2ca",
    # "#0868ac"
    "#B2B1F6"
]

# color_palette = ['#222222', '#AAAAAA', '#FFFFFF']  # Classic Neutrals
# color_palette = ['#336699', '#6699CC', '#99CCFF']  # Corporate Blues
# color_palette = ['#336633', '#66CC99', '#CCFFDD']  # Subtle Greens
# color_palette = ['#333333', '#777777', '#CCCCCC']  # Sophisticated Grays
# color_palette = ['#663399', '#9966CC', '#CC99FF']  # Professional Purples
# color_palette = ['#444444', '#888888', '#DDDDDD']  # Elegant Monochrome

# Create the bar chart with side-by-side bars and bar values
fig = px.bar(final_results_df, x = 'Extraction', y = 'Score', color = 'Metric',
             title = 'Bar Chart with Side-by-Side Bars', barmode = 'group',
             text = 'Score',
             color_discrete_sequence = color_palette)

# Configure text position and format
fig.update_traces(textposition = 'inside', textfont = dict(size = 10, color = 'black', family = 'Times'),
                  insidetextfont = dict(size = 13, color = 'black', family = 'Times'))

# updating title font 
fig.update_layout(
    xaxis_title = "Extracted Information",
    yaxis_title = "Score (%)", 
    title = {
        'text': 'Evaluation of Rule-Based Extraction Methods on Labelled Dataset (672 cases)',
        'font': {'size': 24},
        'x': 0.5,  # Set x position to center
        'xanchor': 'center',  # Set x anchor to center
        'y': 0.95  # Adjust y position as needed
    },
    xaxis = {'title': 'Extraction', 'tickfont': {'size': 15}, 'title_font': {'size': 20}},
    yaxis = {'title': 'Score', 'tickfont': {'size': 15}, 'title_font': {'size': 20}},
    # coloraxis = {'colorbar': {'tickfont': {'family': 'times', 'weight': 'bold'}}}
    coloraxis = {'colorbar': {'tickfont': {'size': 15}, 'title_font': {'size': 20}}}
)

# Display the chart
fig.show()

In [64]:
silver_df

Unnamed: 0,full_cleaned,metadata,content,citation,file_number,ltb_location,hearing_date,decision_date,adjudicating_member
0,"[Metadata:, Date: 2017-01-18, File number:, CE...",Date: 2017-01-18 File number: CEL-62600-16 CEL...,Arrears Worksheet File Number: CEL-62600-16 Ti...,"CEL-62600-16 (Re), 2017 CanLII 9545 (ON LTB)",CEL-62600-16,Mississauga,1/5/2017,1/18/2017,Avril Cardoso
1,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-62852-16 CEL...,Arrears Worksheet File Number: CEL-62852-16 Ti...,"CEL-62852-16 (Re), 2017 CanLII 9535 (ON LTB)",CEL-62852-16,Mississauga,12/14/2016,1/9/2017,Tiisetso Russell
2,"[Metadata:, Date: 2017-01-09, File number:, CE...",Date: 2017-01-09 File number: CEL-63024-16 CEL...,Arrears Worksheet File Number: CEL-63024-16 Ti...,"CEL-63024-16 (Re), 2017 CanLII 9543 (ON LTB)",CEL-63024-16,Mississauga,1/4/2017,1/9/2017,Tiisetso Russell
3,"[Metadata:, Date: 2017-01-20, File number:, CE...",Date: 2017-01-20 File number: CEL-63056-16 CEL...,Arrears Worksheet File Number: CEL-63056-16 Ti...,"CEL-63056-16 (Re), 2017 CanLII 9537 (ON LTB)",CEL-63056-16,Mississauga,1/4/2017,1/9/2017,Tiisetso Russell
4,"[Metadata:, Date: 2017-02-03, File number:, CE...",Date: 2017-02-03 File number: CEL-63193-16 CEL...,Arrears Worksheet File Number: CEL-63193-16 Ti...,"CEL-63193-16 (Re), 2017 CanLII 30828 (ON LTB)",CEL-63193-16,Mississauga,1/10/2017,2/3/2017,Karen Wallace
...,...,...,...,...,...,...,...,...,...
667,"[Metadata:, Date: 2018-12-13, File number:, TS...",Date: 2018-12-13 File number: TSL-98918-18-RV ...,Order under Section 21.2 of the Statutory Powe...,"TSL-98918-18-RV (Re), 2018 CanLII 141679 (ON LTB)",TSL-98918-18-RV,Toronto,11/8/2018,12/13/2018,Nancy Henderson
668,"[Metadata:, Date: 2018-11-23, File number:, TS...",Date: 2018-11-23 File number: TSL-99691-18 TSL...,Order under Section 69 Residential Tenancies A...,"TSL-99691-18 (Re), 2018 CanLII 141675 (ON LTB)",TSL-99691-18,Toronto,11/20/2018,11/23/2018,David Lee
669,"[Metadata:, Date: 2018-11-29, File number:, TS...",Date: 2018-11-29 File number: TSL-99824-18 TSL...,Order under Section 69 Residential Tenancies A...,"TSL-99824-18 (Re), 2018 CanLII 141673 (ON LTB)",TSL-99824-18,Toronto,11/23/2018,11/29/2018,Renée Lang
670,"[Metadata:, Date: 2018-12-12, File number:, TS...",Date: 2018-12-12 File number: TSL-99900-18 TSL...,Order under Section 69 Residential Tenancies A...,"TSL-99900-18 (Re), 2018 CanLII 140403 (ON LTB)",TSL-99900-18,Toronto,12/4/2018,12/12/2018,David Mungovan


In [66]:
silver_df.loc[0, 'content'].split(". ")

["Arrears Worksheet File Number: CEL-62600-16 Time period for Arrears Owing From: September 1, 2016 to November 5, 2016 (From the commencement of arrears to the termination date in the notice, or the end of the rental period if the tenancy is not being terminated.) Part 1 - Calculations of Arrears Owing (A) Rent Period (monthly, weekly, etc.) (B) Rent Charged (C) Lawful Rent (if issue raised) (D) Lower of (B) and (C) (E) Rent Paid (F) Amount Owing (D-E) 01/09/2016 - 30/09/2016 $1,760.00 $1,760.00 $1,760.00 01/10/2016 - 31/10/2016 $1,760.00 $1,760.00 $1,760.00 01/11/2016 - 05/11/2016 $289.32 $289.32 $289.32 **Part Month ** To calculate the Rent for part of a month, use the following formula for columns (B), (C) and (D): Monthly Rent X 12 X # Days 365 (F) Total Rent Owing $3,809.32 (G1) Arrears Owing $3,809.32 [From (F)] (G2) Arrears Claimed $5,450.00 (G3) Include whichever is less when Calculation Total Arrears Owing $3,809.32 (H1) Payment to Landlord $5,280.00 (I) Total Amount Owing -$

In [69]:
gold_df.columns

Index(['raw_file_text', 'raw_file_name', 'full_cleaned', 'metadata', 'content',
       'case_citation', 'file_number', 'language', 'year', 'ltb_location',
       'decision_date', 'hearing_date', 'url', 'adjudicating_member',
       'new_case_outcome', 'new_hearing_date', 'new_decision_date'],
      dtype='object')

In [75]:
gold_df['new_decision_date'].value_counts(normalize = True)

2/12/2018     0.014881
5/4/2018      0.010417
3/22/2018     0.010417
6/6/2018      0.010417
5/16/2018     0.010417
                ...   
8/7/2020      0.001488
6/25/2020     0.001488
5/8/2020      0.001488
1/16/2017     0.001488
11/20/2018    0.001488
Name: new_decision_date, Length: 353, dtype: float64