# Loading and Categorizing Court Data

## Imports

In [1]:
import pandas as pd
import tqdm as notebook_tqdm
from tqdm import tqdm
from datasets import load_dataset
import datetime
import re

  from .autonotebook import tqdm as notebook_tqdm


## Functions for Filtering and Categorizing the data

In [2]:
def immigration_cases(text):
    """
    Checks whether the given text contains references to immigration-related ministries 
    within the first 10 lines.

    This function is typically used to filter legal case documents that mention 
    either "Citizenship and Immigration" or "Citoyenneté et Immigration" early in the text.

    Parameters:
    ----------
    text : str or None
        The textual content of a legal case, potentially containing multiple lines.

    Returns:
    -------
    bool
        True if either phrase appears in the first 10 lines of the text; False otherwise.
    """
    if pd.isna(text):
        return False
    lines = text.splitlines()[:10]
    joined_lines = ' '.join(lines)
    return (
        "Citizenship and Immigration" in joined_lines or
        "Citoyenneté et Immigration" in joined_lines
    )

In [3]:
def remove_translated_cases(df, citation_col='citation', lang_col='language', lang_primary='en', lang_secondary='fr'):
    """
    Removes rows in the secondary language (e.g., French) that are translations of cases already 
    present in the primary language (e.g., English), based on normalized court citations.

    Parameters:
    -----------
    df : pd.DataFrame
        The DataFrame containing legal case data.
    citation_col : str, optional
        The name of the column containing case citations. Default is 'citation'.
    lang_col : str, optional
        The name of the column containing language information. Default is 'language'.
    lang_primary : str, optional
        The language code to be considered as the primary version (e.g., 'en'). Default is 'en'.
    lang_secondary : str, optional
        The language code to be considered as the translated version to remove (e.g., 'fr'). Default is 'fr'.

    Returns:
    --------
    pd.DataFrame
        A filtered DataFrame with translated cases removed when the same case exists in the primary language.
    """
    court_acronyms = ['SCC', 'CSC', 'FCA', 'CAF', 'FC', 'CF'] 
    pattern = r'\b(' + '|'.join(court_acronyms) + r')\b'

    def normalize(citation):
        return re.sub(pattern, 'COURT', citation)

    df = df.copy()
    df['normalized_citation'] = df[citation_col].apply(normalize)

    primary_citations = set(df[df[lang_col] == lang_primary]['normalized_citation'])

    filtered_df = df[~((df[lang_col] == lang_secondary) & (df['normalized_citation'].isin(primary_citations)))]

    return filtered_df.drop(columns=['normalized_citation'])

In [4]:
def categorize_document(text):
    """
    Categorizes a legal document into one or more predefined categories based on regex pattern matches.

    If the document matches a specific exclusion pattern related to refugee cases, it is immediately 
    categorized as ['refugee']. Otherwise, it is checked against a dictionary of regular expression 
    patterns (`RE_patterns`), and all matching categories are returned.

    Parameters:
    ----------
    text : str
        The textual content of the document to be categorized.

    Returns:
    -------
    list of str
        A list of category labels that apply to the document. If no categories match, returns ['other'].
    """
    if re.search(RE_exclude_refugee, text):
        return ['refugee']

    matched_categories = [
        category for category, pattern in RE_patterns.items()
        if re.search(pattern, text)
    ]

    return matched_categories if matched_categories else ['other']

## Regular Expression for Categorizing the data

In [5]:
RE_exclude_refugee = re.compile(
    r'\b(convention refugees?|persons? in need of protection|refugee claimants?|protected persons?|réfugiés?)\b',
    re.IGNORECASE
)

RE_patterns = {
    'security': re.compile(
        r'\b(espionages?|against canada|canada[’\'‘`s]* interests?|subversions?|democratic governments?|terrorisms?|dangers? to security|violences?|endangerments?|memberships?|complicity|reasonable grounds? to believe)\b',
        re.IGNORECASE
    ),
    'human_rights': re.compile(
        r'\b(human rights?|international rights?|violations?|senior officials?|governments?|regimes?|genocides?|war crimes?|crimes? against humanity|participations?|contributions?|reasonable grounds? to believe|terrorisms?)\b',
        re.IGNORECASE
    ),
    'serious_criminality': re.compile(
        r'\b(criminal convictions?|foreign convictions?|imprisonments?|10 years|ten years|sentences?|over (6|six) months|serious indictable offences?|commissions?|reasonable grounds? to believe)\b',
        re.IGNORECASE
    ),
    'criminality': re.compile(
        r'\b(criminal convictions?|foreign convictions?|indictments?|indictable offences?|summary offences?|commissions?)\b',
        re.IGNORECASE
    ),
    'organized_criminality': re.compile(
        r'\b(memberships?|criminal activities?|organized crimes?|acting in concert|people smuggling|traffickings?|money launderings?|proceeds? of crime|reasonable grounds? to believe)\b',
        re.IGNORECASE
    ),
    'health_grounds': re.compile(
        r'\b(dangers? to public health|dangers? to public safety|excessive demands? on health services|excessive demands? on social services)\b',
        re.IGNORECASE
    ),
    'financial_reasons': re.compile(
        r'\b(unable or unwilling to support (oneself|dependents?)|arrangements? for care and support|social assistances?)\b',
        re.IGNORECASE
    ),
    'misrepresentation': re.compile(
        r'\b(misrepresenting|withholding|material facts?|errors? in administration|non-disclosures?|omissions?|false statements?|false information)\b',
        re.IGNORECASE
    ),
    'non_compliance': re.compile(
        r'\b(contraventions?|non-compliances?|failures? to comply)\b',
        re.IGNORECASE
    ),
    'inadmissible_family': re.compile(
        r'\b(inadmissible family members?|accompanying family members?)\b',
        re.IGNORECASE
    )
}

## Processing the Datasets

### SCC Data

In [6]:
dataset = load_dataset("refugee-law-lab/canadian-legal-data", "SCC", split="train")
SCC = dataset.to_pandas()
SCC_2014_2024 = SCC.query("year >= 2014")
SCC_2014_2024

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
10139,2014 SCC 1,[2014] 1 SCR 3,SCC,2014,Vivendi Canada Inc. v. Dell’Aniello,en,2014-01-16,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Vivendi Canada Inc. v. Dell’Aniello\nCollectio...,
10140,2014 SCC 10,[2014] 1 SCR 140,SCC,2014,R. v. Yelle,en,2014-01-22,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,R. v. Yelle\nCollection\nSupreme Court Judgmen...,
10141,2014 SCC 11,[2014] 1 SCR 142,SCC,2014,Telecommunications Employees Association of Ma...,en,2014-01-30,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Telecommunications Employees Association of Ma...,
10142,2014 SCC 12,[2014] 1 SCR 177,SCC,2014,A.I. Enterprises Ltd. v. Bram Enterprises Ltd.,en,2014-01-31,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,A.I. Enterprises Ltd. v. Bram Enterprises Ltd....,
10143,2014 SCC 13,[2014] 1 S. C.R. 227,SCC,2014,Bernard v. Canada (Attorney General),en,2014-02-07,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Bernard v. Canada (Attorney General)\nCollecti...,
...,...,...,...,...,...,...,...,...,...,...,...
15702,2024 CSC 5,,SCC,2024,Renvoi relatif à la Loi concernant les enfants...,fr,2024-02-09,https://decisions.scc-csc.ca/scc-csc/scc-csc/f...,2024-04-10,Renvoi relatif à la Loi concernant les enfants...,
15703,2024 CSC 6,,SCC,2024,R. c. Bykovets,fr,2024-03-01,https://decisions.scc-csc.ca/scc-csc/scc-csc/f...,2024-07-12,R. c. Bykovets\nCollection\nJugements de la Co...,
15704,2024 CSC 7,,SCC,2024,R. c. Kruk,fr,2024-03-08,https://decisions.scc-csc.ca/scc-csc/scc-csc/f...,2024-07-12,R. c. Kruk\nCollection\nJugements de la Cour s...,
15705,2024 CSC 8,,SCC,2024,Yatar c. TD Assurance Meloche Monnex,fr,2024-03-15,https://decisions.scc-csc.ca/scc-csc/scc-csc/f...,2024-07-12,Yatar c. TD Assurance Meloche Monnex\nCollecti...,


In [7]:
SCC_immigration = remove_translated_cases(SCC_2014_2024)
SCC_immigration

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
10139,2014 SCC 1,[2014] 1 SCR 3,SCC,2014,Vivendi Canada Inc. v. Dell’Aniello,en,2014-01-16,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Vivendi Canada Inc. v. Dell’Aniello\nCollectio...,
10140,2014 SCC 10,[2014] 1 SCR 140,SCC,2014,R. v. Yelle,en,2014-01-22,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,R. v. Yelle\nCollection\nSupreme Court Judgmen...,
10141,2014 SCC 11,[2014] 1 SCR 142,SCC,2014,Telecommunications Employees Association of Ma...,en,2014-01-30,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Telecommunications Employees Association of Ma...,
10142,2014 SCC 12,[2014] 1 SCR 177,SCC,2014,A.I. Enterprises Ltd. v. Bram Enterprises Ltd.,en,2014-01-31,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,A.I. Enterprises Ltd. v. Bram Enterprises Ltd....,
10143,2014 SCC 13,[2014] 1 S. C.R. 227,SCC,2014,Bernard v. Canada (Attorney General),en,2014-02-07,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Bernard v. Canada (Attorney General)\nCollecti...,
...,...,...,...,...,...,...,...,...,...,...,...
10754,2024 SCC 6,,SCC,2024,R. v. Bykovets,en,2024-03-01,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2024-07-12,R. v. Bykovets\nCollection\nSupreme Court Judg...,
10755,2024 SCC 7,,SCC,2024,R. v. Kruk,en,2024-03-08,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2024-07-12,R. v. Kruk\nCollection\nSupreme Court Judgment...,
10756,2024 SCC 8,,SCC,2024,Yatar v. TD Insurance Meloche Monnex,en,2024-03-15,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2024-07-12,Yatar v. TD Insurance Meloche Monnex\nCollecti...,
10757,2024 SCC 9,,SCC,2024,R. v. Boudreau,en,2024-03-20,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2024-07-12,R. v. Boudreau\nCollection\nSupreme Court Judg...,


In [8]:
SCC_immigration = SCC_immigration[SCC_immigration['unofficial_text'].apply(immigration_cases)]
SCC_immigration

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
10169,2014 SCC 37,[2014] 2 SCR 33,SCC,2014,Canada (Citizenship and Immigration) v. Harkat,en,2014-05-14,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Canada (Citizenship and Immigration) v. Harkat...,
10203,2014 SCC 68,[2014] 3 SCR 431,SCC,2014,Febles v. Canada (Citizenship and Immigration),en,2014-10-30,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Febles v. Canada (Citizenship and Immigration)...,
10269,2015 SCC 58,[2015] 3 SCR 704,SCC,2015,B010 v. Canada (Citizenship and Immigration),en,2015-11-27,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,B010 v. Canada (Citizenship and Immigration)\n...,
10273,2015 SCC 61,[2015] 3 SCR 909,SCC,2015,Kanthasamy v. Canada (Citizenship and Immigrat...,en,2015-12-10,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Kanthasamy v. Canada (Citizenship and Immigrat...,
10522,2019 SCC 65,[2019] 4 SCR 653,SCC,2019,Canada (Minister of Citizenship and Immigratio...,en,2019-12-19,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2024-04-10,Canada (Minister of Citizenship and Immigratio...,
10689,2023 SCC 17,,SCC,2023,Canadian Council for Refugees v. Canada (Citiz...,en,2023-06-16,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2023-12-01,Canadian Council for Refugees v. Canada (Citiz...,
10694,2023 SCC 21,,SCC,2023,Mason v. Canada (Citizenship and Immigration),en,2023-09-27,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2023-12-01,Mason v. Canada (Citizenship and Immigration)\...,


In [9]:
SCC_immigration['inadmissibility_reason'] = SCC_immigration['unofficial_text'].apply(categorize_document)
SCC_immigration

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  SCC_immigration['inadmissibility_reason'] = SCC_immigration['unofficial_text'].apply(categorize_document)


Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other,inadmissibility_reason
10169,2014 SCC 37,[2014] 2 SCR 33,SCC,2014,Canada (Citizenship and Immigration) v. Harkat,en,2014-05-14,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Canada (Citizenship and Immigration) v. Harkat...,,"[security, human_rights, serious_criminality, ..."
10203,2014 SCC 68,[2014] 3 SCR 431,SCC,2014,Febles v. Canada (Citizenship and Immigration),en,2014-10-30,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Febles v. Canada (Citizenship and Immigration)...,,[refugee]
10269,2015 SCC 58,[2015] 3 SCR 704,SCC,2015,B010 v. Canada (Citizenship and Immigration),en,2015-11-27,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,B010 v. Canada (Citizenship and Immigration)\n...,,[refugee]
10273,2015 SCC 61,[2015] 3 SCR 909,SCC,2015,Kanthasamy v. Canada (Citizenship and Immigrat...,en,2015-12-10,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2022-09-01,Kanthasamy v. Canada (Citizenship and Immigrat...,,[refugee]
10522,2019 SCC 65,[2019] 4 SCR 653,SCC,2019,Canada (Minister of Citizenship and Immigratio...,en,2019-12-19,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2024-04-10,Canada (Minister of Citizenship and Immigratio...,,"[security, human_rights, serious_criminality, ..."
10689,2023 SCC 17,,SCC,2023,Canadian Council for Refugees v. Canada (Citiz...,en,2023-06-16,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2023-12-01,Canadian Council for Refugees v. Canada (Citiz...,,[refugee]
10694,2023 SCC 21,,SCC,2023,Mason v. Canada (Citizenship and Immigration),en,2023-09-27,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,2023-12-01,Mason v. Canada (Citizenship and Immigration)\...,,[refugee]


In [10]:
final_SCC = SCC_immigration.reset_index().loc[:, ['citation', 'dataset', 'year', 'language', 'document_date', 'source_url', 'unofficial_text','inadmissibility_reason']]
final_SCC

Unnamed: 0,citation,dataset,year,language,document_date,source_url,unofficial_text,inadmissibility_reason
0,2014 SCC 37,SCC,2014,en,2014-05-14,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,Canada (Citizenship and Immigration) v. Harkat...,"[security, human_rights, serious_criminality, ..."
1,2014 SCC 68,SCC,2014,en,2014-10-30,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,Febles v. Canada (Citizenship and Immigration)...,[refugee]
2,2015 SCC 58,SCC,2015,en,2015-11-27,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,B010 v. Canada (Citizenship and Immigration)\n...,[refugee]
3,2015 SCC 61,SCC,2015,en,2015-12-10,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,Kanthasamy v. Canada (Citizenship and Immigrat...,[refugee]
4,2019 SCC 65,SCC,2019,en,2019-12-19,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,Canada (Minister of Citizenship and Immigratio...,"[security, human_rights, serious_criminality, ..."
5,2023 SCC 17,SCC,2023,en,2023-06-16,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,Canadian Council for Refugees v. Canada (Citiz...,[refugee]
6,2023 SCC 21,SCC,2023,en,2023-09-27,https://decisions.scc-csc.ca/scc-csc/scc-csc/e...,Mason v. Canada (Citizenship and Immigration)\...,[refugee]


In [11]:
len(final_SCC.query("language != 'fr'"))

7

In [12]:
final_SCC.query("language != 'fr'")['inadmissibility_reason'].value_counts()

inadmissibility_reason
[refugee]                                                                                               5
[security, human_rights, serious_criminality, criminality, organized_criminality, misrepresentation]    2
Name: count, dtype: int64

In [13]:
final_SCC.to_csv("../data/processed/SCC_Regex.csv")

### FCA Data

In [14]:
dataset = load_dataset("refugee-law-lab/canadian-legal-data", "FCA", split="train")
FCA = dataset.to_pandas()
FCA_2014_2024 = FCA.query("year >= 2014")
FCA_2014_2024

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
4979,2014 FCA 1,,FCA,2014,Jolivet v. Canada (Correctional Service),en,2014-01-07,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Jolivet v. Canada (Correctional Service)\nCour...,
4980,2014 FCA 10,,FCA,2014,Soft-Moc Inc. v. Canada (National Revenue),en,2014-01-21,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Soft-Moc Inc. v. Canada (National Revenue)\nCo...,
4981,2014 FCA 101,,FCA,2014,Canada (Indian Affairs) v. Daniels,en,2014-04-17,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada (Indian Affairs) v. Daniels\nCourt (s) ...,
4982,2014 FCA 102,,FCA,2014,Canada (Attorney General) v. Vorobyov,en,2014-04-17,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada (Attorney General) v. Vorobyov\nCourt (...,
4983,2014 FCA 103,,FCA,2014,Canada v. Lehigh Cement Limited,en,2014-04-23,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada v. Lehigh Cement Limited\nCourt (s) Dat...,
...,...,...,...,...,...,...,...,...,...,...,...
14646,2024 CAF 90,,FCA,2024,Marquis c. Canada (Procureur général),fr,2024-05-07,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Marquis c. Canada (Procureur général)\nBase de...,
14647,2024 CAF 91,,FCA,2024,Interpro Distributeurs de Viandes Inc. c. Cana...,fr,2024-05-08,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Interpro Distributeurs de Viandes Inc. c. Cana...,
14648,2024 CAF 93,,FCA,2024,Gutierrez c. Canada,fr,2024-05-13,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Gutierrez c. Canada\nBase de données – Cour (s...,
14649,2024 CAF 98,,FCA,2024,Chartier c. Canada,fr,2024-05-22,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Chartier c. Canada\nBase de données – Cour (s)...,


In [15]:
FCA_immigration = remove_translated_cases(FCA_2014_2024)
FCA_immigration

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
4979,2014 FCA 1,,FCA,2014,Jolivet v. Canada (Correctional Service),en,2014-01-07,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Jolivet v. Canada (Correctional Service)\nCour...,
4980,2014 FCA 10,,FCA,2014,Soft-Moc Inc. v. Canada (National Revenue),en,2014-01-21,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Soft-Moc Inc. v. Canada (National Revenue)\nCo...,
4981,2014 FCA 101,,FCA,2014,Canada (Indian Affairs) v. Daniels,en,2014-04-17,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada (Indian Affairs) v. Daniels\nCourt (s) ...,
4982,2014 FCA 102,,FCA,2014,Canada (Attorney General) v. Vorobyov,en,2014-04-17,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada (Attorney General) v. Vorobyov\nCourt (...,
4983,2014 FCA 103,,FCA,2014,Canada v. Lehigh Cement Limited,en,2014-04-23,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada v. Lehigh Cement Limited\nCourt (s) Dat...,
...,...,...,...,...,...,...,...,...,...,...,...
14645,2024 CAF 89,,FCA,2024,Chelsea (Municipalité) c. Canada (Procureur gé...,fr,2024-05-06,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Chelsea (Municipalité) c. Canada (Procureur gé...,
14646,2024 CAF 90,,FCA,2024,Marquis c. Canada (Procureur général),fr,2024-05-07,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Marquis c. Canada (Procureur général)\nBase de...,
14647,2024 CAF 91,,FCA,2024,Interpro Distributeurs de Viandes Inc. c. Cana...,fr,2024-05-08,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Interpro Distributeurs de Viandes Inc. c. Cana...,
14649,2024 CAF 98,,FCA,2024,Chartier c. Canada,fr,2024-05-22,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Chartier c. Canada\nBase de données – Cour (s)...,


In [16]:
FCA_immigration = FCA_immigration[FCA_immigration['unofficial_text'].apply(immigration_cases)]
FCA_immigration

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
4993,2014 FCA 113,,FCA,2014,Kanthasamy v. Canada (Citizenship and Immigra...,en,2014-05-02,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Kanthasamy v. Canada (Citizenship and Immigrat...,
4994,2014 FCA 114,,FCA,2014,Lemus v. Canada (Citizenship and Immigration),en,2014-05-02,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Lemus v. Canada (Citizenship and Immigration)\...,
5001,2014 FCA 126,,FCA,2014,Kinsel v. Canada (Citizenship and Immigration),en,2014-05-14,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2023-04-13,Kinsel v. Canada (Citizenship and Immigration)...,
5027,2014 FCA 157,,FCA,2014,Sanchez v. Canada (Citizenship and Immigration),en,2014-06-10,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Sanchez v. Canada (Citizenship and Immigration...,
5030,2014 FCA 160,,FCA,2014,Canada (Citizenship and Immigration) v. Savin,en,2014-06-12,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada (Citizenship and Immigration) v. Savin\...,
...,...,...,...,...,...,...,...,...,...,...,...
7175,2023 FCA 36,,FCA,2023,Lukács v. Canada (Citizenship and Immigration),en,2023-02-16,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Lukács v. Canada (Citizenship and Immigration)...,
7284,2024 FCA 165,,FCA,2024,Nanhar v. Canada (Citizenship and Immigration),en,2024-10-08,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Nanhar v. Canada (Citizenship and Immigration)...,
7291,2024 FCA 174,,FCA,2024,Li v. Canada (Citizenship and Immigration),en,2024-10-24,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Li v. Canada (Citizenship and Immigration)\nCo...,
13832,2020 CAF 126,,FCA,2020,Canada (Citoyenneté et Immigration) c. Solmaz,fr,2020-07-28,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-30,Canada (Citoyenneté et Immigration) c. Solmaz\...,


In [17]:
FCA_immigration['inadmissibility_reason'] = FCA_immigration['unofficial_text'].apply(categorize_document)
FCA_immigration

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  FCA_immigration['inadmissibility_reason'] = FCA_immigration['unofficial_text'].apply(categorize_document)


Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other,inadmissibility_reason
4993,2014 FCA 113,,FCA,2014,Kanthasamy v. Canada (Citizenship and Immigra...,en,2014-05-02,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Kanthasamy v. Canada (Citizenship and Immigrat...,,[refugee]
4994,2014 FCA 114,,FCA,2014,Lemus v. Canada (Citizenship and Immigration),en,2014-05-02,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Lemus v. Canada (Citizenship and Immigration)\...,,[refugee]
5001,2014 FCA 126,,FCA,2014,Kinsel v. Canada (Citizenship and Immigration),en,2014-05-14,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2023-04-13,Kinsel v. Canada (Citizenship and Immigration)...,,"[security, human_rights, serious_criminality, ..."
5027,2014 FCA 157,,FCA,2014,Sanchez v. Canada (Citizenship and Immigration),en,2014-06-10,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Sanchez v. Canada (Citizenship and Immigration...,,"[serious_criminality, criminality]"
5030,2014 FCA 160,,FCA,2014,Canada (Citizenship and Immigration) v. Savin,en,2014-06-12,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-29,Canada (Citizenship and Immigration) v. Savin\...,,[human_rights]
...,...,...,...,...,...,...,...,...,...,...,...,...
7175,2023 FCA 36,,FCA,2023,Lukács v. Canada (Citizenship and Immigration),en,2023-02-16,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Lukács v. Canada (Citizenship and Immigration)...,,[refugee]
7284,2024 FCA 165,,FCA,2024,Nanhar v. Canada (Citizenship and Immigration),en,2024-10-08,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Nanhar v. Canada (Citizenship and Immigration)...,,[refugee]
7291,2024 FCA 174,,FCA,2024,Li v. Canada (Citizenship and Immigration),en,2024-10-24,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2025-01-03,Li v. Canada (Citizenship and Immigration)\nCo...,,"[human_rights, serious_criminality, criminality]"
13832,2020 CAF 126,,FCA,2020,Canada (Citoyenneté et Immigration) c. Solmaz,fr,2020-07-28,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,2022-08-30,Canada (Citoyenneté et Immigration) c. Solmaz\...,,[refugee]


In [18]:
final_FCA = FCA_immigration.reset_index().loc[:, ['citation', 'dataset', 'year', 'language', 'document_date', 'source_url', 'unofficial_text','inadmissibility_reason']]
final_FCA

Unnamed: 0,citation,dataset,year,language,document_date,source_url,unofficial_text,inadmissibility_reason
0,2014 FCA 113,FCA,2014,en,2014-05-02,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Kanthasamy v. Canada (Citizenship and Immigrat...,[refugee]
1,2014 FCA 114,FCA,2014,en,2014-05-02,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Lemus v. Canada (Citizenship and Immigration)\...,[refugee]
2,2014 FCA 126,FCA,2014,en,2014-05-14,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Kinsel v. Canada (Citizenship and Immigration)...,"[security, human_rights, serious_criminality, ..."
3,2014 FCA 157,FCA,2014,en,2014-06-10,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Sanchez v. Canada (Citizenship and Immigration...,"[serious_criminality, criminality]"
4,2014 FCA 160,FCA,2014,en,2014-06-12,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Canada (Citizenship and Immigration) v. Savin\...,[human_rights]
...,...,...,...,...,...,...,...,...
94,2023 FCA 36,FCA,2023,en,2023-02-16,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Lukács v. Canada (Citizenship and Immigration)...,[refugee]
95,2024 FCA 165,FCA,2024,en,2024-10-08,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Nanhar v. Canada (Citizenship and Immigration)...,[refugee]
96,2024 FCA 174,FCA,2024,en,2024-10-24,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Li v. Canada (Citizenship and Immigration)\nCo...,"[human_rights, serious_criminality, criminality]"
97,2020 CAF 126,FCA,2020,fr,2020-07-28,https://decisions.fca-caf.gc.ca/fca-caf/decisi...,Canada (Citoyenneté et Immigration) c. Solmaz\...,[refugee]


In [19]:
len(final_FCA.query("language != 'fr'"))

97

In [20]:
final_FCA.query("language != 'fr'")['inadmissibility_reason'].value_counts()

inadmissibility_reason
[refugee]                                                                                                               29
[other]                                                                                                                 28
[serious_criminality, criminality]                                                                                       4
[human_rights]                                                                                                           4
[human_rights, serious_criminality, criminality]                                                                         4
[misrepresentation]                                                                                                      3
[security, human_rights, serious_criminality, criminality, organized_criminality, misrepresentation]                     3
[security, human_rights, serious_criminality, organized_criminality, misrepresentation, non_compliance]             

In [21]:
final_FCA.to_csv("../data/processed/FCA_Regex.csv")

### FC Data

In [22]:
dataset = load_dataset("refugee-law-lab/canadian-legal-data", "FC", split="train")
FC = dataset.to_pandas()
FC_2014_2024 = FC.query("year >= 2014")
FC_2014_2024

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
18485,2014 FC 1,,FC,2014,Telus Communications Company v. Canada (Attorn...,en,2014-01-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Telus Communications Company v. Canada (Attorn...,
18486,2014 FC 10,,FC,2014,Singh v. Canada (Citizenship and Immigration),en,2014-01-07,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Singh v. Canada (Citizenship and Immigration)\...,
18487,2014 FC 100,,FC,2014,Higgins v. Canada (Attorney General),en,2014-01-29,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Higgins v. Canada (Attorney General)\nCourt (s...,
18488,2014 FC 1000,,FC,2014,Vinat v. Canada (Citizenship and Immigration),en,2014-10-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Vinat v. Canada (Citizenship and Immigration)\...,
18489,2014 FC 1001,,FC,2014,Khadr v. Canada,en,2014-11-04,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Khadr v. Canada\nCourt (s) Database\nFederal C...,
...,...,...,...,...,...,...,...,...,...,...,...
63563,2024 CF 982,,FC,2024,Peralta Rojas c. Canada (Citoyenneté et Immigr...,fr,2024-07-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Peralta Rojas c. Canada (Citoyenneté et Immigr...,
63564,2024 CF 983,,FC,2024,Butt et al v. Canada,fr,2024-06-25,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-09-07,Butt c. Canada\nBase de données – Cour (s)\nDé...,
63565,2024 CF 984,,FC,2024,Sanchez Herrera c. Canada (Sécurité Publique e...,fr,2024-06-25,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Sanchez Herrera c. Canada (Sécurité publique e...,
63566,2024 CF 99,,FC,2024,Nooristani c. Canada (Citoyenneté et Immigration),fr,2024-01-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2025-01-03,Nooristani c. Canada (Citoyenneté et Immigrati...,


In [23]:
FC_immigration = remove_translated_cases(FC_2014_2024)
FC_immigration

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
18485,2014 FC 1,,FC,2014,Telus Communications Company v. Canada (Attorn...,en,2014-01-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Telus Communications Company v. Canada (Attorn...,
18486,2014 FC 10,,FC,2014,Singh v. Canada (Citizenship and Immigration),en,2014-01-07,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Singh v. Canada (Citizenship and Immigration)\...,
18487,2014 FC 100,,FC,2014,Higgins v. Canada (Attorney General),en,2014-01-29,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Higgins v. Canada (Attorney General)\nCourt (s...,
18488,2014 FC 1000,,FC,2014,Vinat v. Canada (Citizenship and Immigration),en,2014-10-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Vinat v. Canada (Citizenship and Immigration)\...,
18489,2014 FC 1001,,FC,2014,Khadr v. Canada,en,2014-11-04,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Khadr v. Canada\nCourt (s) Database\nFederal C...,
...,...,...,...,...,...,...,...,...,...,...,...
63537,2024 CF 88,,FC,2024,Matas c. Canada (Affaires mondiales),fr,2024-01-23,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-04-10,Matas c. Canada (Affaires mondiales)\nBase de ...,
63554,2024 CF 966,,FC,2024,Quintero c. Canada (Citoyenneté et Immigration),fr,2024-06-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Blandon Quintero c. Canada (Citoyenneté et Imm...,
63563,2024 CF 982,,FC,2024,Peralta Rojas c. Canada (Citoyenneté et Immigr...,fr,2024-07-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Peralta Rojas c. Canada (Citoyenneté et Immigr...,
63565,2024 CF 984,,FC,2024,Sanchez Herrera c. Canada (Sécurité Publique e...,fr,2024-06-25,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Sanchez Herrera c. Canada (Sécurité publique e...,


In [24]:
FC_immigration = FC_immigration[FC_immigration['unofficial_text'].apply(immigration_cases)]
FC_immigration

Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other
18486,2014 FC 10,,FC,2014,Singh v. Canada (Citizenship and Immigration),en,2014-01-07,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Singh v. Canada (Citizenship and Immigration)\...,
18488,2014 FC 1000,,FC,2014,Vinat v. Canada (Citizenship and Immigration),en,2014-10-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Vinat v. Canada (Citizenship and Immigration)\...,
18490,2014 FC 1002,,FC,2014,Almrei v. Canada (Citizenship and Immigration),en,2014-10-23,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Almrei v. Canada (Citizenship and Immigration)...,
18491,2014 FC 1003,,FC,2014,Avagyan v. Canada (Citizenship and Immigration),en,2014-10-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Avagyan v. Canada (Citizenship and Immigration...,
18492,2014 FC 1004,,FC,2014,Avagyan v. Canada (Citizenship and Immigration),en,2014-10-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Avagyan v. Canada (Citizenship and Immigration...,
...,...,...,...,...,...,...,...,...,...,...,...
63487,2024 CF 677,,FC,2024,Ramirez c. Canada (Citoyenneté et Immigration),fr,2024-05-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Betanzos Ramirez c. Canada (Citoyenneté et Imm...,
63502,2024 CF 74,,FC,2024,Elnour c. Canada (Citoyenneté et Immigration),fr,2024-01-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-04-10,Abdallah Elnour El Senoussi c. Canada (Citoyen...,
63515,2024 CF 79,,FC,2024,Callado c. Canada (Citoyenneté et Immigration),fr,2024-01-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-04-10,Contreras Callado c. Canada (Citoyenneté et Im...,
63554,2024 CF 966,,FC,2024,Quintero c. Canada (Citoyenneté et Immigration),fr,2024-06-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Blandon Quintero c. Canada (Citoyenneté et Imm...,


In [25]:
FC_immigration['inadmissibility_reason'] = FC_immigration['unofficial_text'].apply(categorize_document)
FC_immigration

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  FC_immigration['inadmissibility_reason'] = FC_immigration['unofficial_text'].apply(categorize_document)


Unnamed: 0,citation,citation2,dataset,year,name,language,document_date,source_url,scraped_timestamp,unofficial_text,other,inadmissibility_reason
18486,2014 FC 10,,FC,2014,Singh v. Canada (Citizenship and Immigration),en,2014-01-07,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Singh v. Canada (Citizenship and Immigration)\...,,[refugee]
18488,2014 FC 1000,,FC,2014,Vinat v. Canada (Citizenship and Immigration),en,2014-10-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Vinat v. Canada (Citizenship and Immigration)\...,,[refugee]
18490,2014 FC 1002,,FC,2014,Almrei v. Canada (Citizenship and Immigration),en,2014-10-23,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Almrei v. Canada (Citizenship and Immigration)...,,[refugee]
18491,2014 FC 1003,,FC,2014,Avagyan v. Canada (Citizenship and Immigration),en,2014-10-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Avagyan v. Canada (Citizenship and Immigration...,,[refugee]
18492,2014 FC 1004,,FC,2014,Avagyan v. Canada (Citizenship and Immigration),en,2014-10-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2022-08-23,Avagyan v. Canada (Citizenship and Immigration...,,[refugee]
...,...,...,...,...,...,...,...,...,...,...,...,...
63487,2024 CF 677,,FC,2024,Ramirez c. Canada (Citoyenneté et Immigration),fr,2024-05-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Betanzos Ramirez c. Canada (Citoyenneté et Imm...,,[refugee]
63502,2024 CF 74,,FC,2024,Elnour c. Canada (Citoyenneté et Immigration),fr,2024-01-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-04-10,Abdallah Elnour El Senoussi c. Canada (Citoyen...,,[refugee]
63515,2024 CF 79,,FC,2024,Callado c. Canada (Citoyenneté et Immigration),fr,2024-01-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-04-10,Contreras Callado c. Canada (Citoyenneté et Im...,,[refugee]
63554,2024 CF 966,,FC,2024,Quintero c. Canada (Citoyenneté et Immigration),fr,2024-06-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,2024-07-12,Blandon Quintero c. Canada (Citoyenneté et Imm...,,[refugee]


In [26]:
final_FC = FC_immigration.reset_index().loc[:, ['citation', 'dataset', 'year', 'language', 'document_date', 'source_url', 'unofficial_text','inadmissibility_reason']]
final_FC

Unnamed: 0,citation,dataset,year,language,document_date,source_url,unofficial_text,inadmissibility_reason
0,2014 FC 10,FC,2014,en,2014-01-07,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Singh v. Canada (Citizenship and Immigration)\...,[refugee]
1,2014 FC 1000,FC,2014,en,2014-10-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Vinat v. Canada (Citizenship and Immigration)\...,[refugee]
2,2014 FC 1002,FC,2014,en,2014-10-23,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Almrei v. Canada (Citizenship and Immigration)...,[refugee]
3,2014 FC 1003,FC,2014,en,2014-10-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Avagyan v. Canada (Citizenship and Immigration...,[refugee]
4,2014 FC 1004,FC,2014,en,2014-10-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Avagyan v. Canada (Citizenship and Immigration...,[refugee]
...,...,...,...,...,...,...,...,...
8688,2024 CF 677,FC,2024,fr,2024-05-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Betanzos Ramirez c. Canada (Citoyenneté et Imm...,[refugee]
8689,2024 CF 74,FC,2024,fr,2024-01-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Abdallah Elnour El Senoussi c. Canada (Citoyen...,[refugee]
8690,2024 CF 79,FC,2024,fr,2024-01-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Contreras Callado c. Canada (Citoyenneté et Im...,[refugee]
8691,2024 CF 966,FC,2024,fr,2024-06-21,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Blandon Quintero c. Canada (Citoyenneté et Imm...,[refugee]


In [27]:
len(final_FC.query("language != 'fr'"))

8546

In [28]:
final_FC.query("language != 'fr'")['inadmissibility_reason'].value_counts()

inadmissibility_reason
[refugee]                                                                4217
[other]                                                                  1395
[human_rights]                                                            511
[misrepresentation]                                                       265
[security]                                                                155
                                                                         ... 
[serious_criminality, criminality, health_grounds, misrepresentation]       1
[human_rights, organized_criminality, inadmissible_family]                  1
[human_rights, misrepresentation, inadmissible_family]                      1
[security, serious_criminality, misrepresentation]                          1
[security, human_rights, financial_reasons, non_compliance]                 1
Name: count, Length: 165, dtype: int64

In [29]:
final_FC.to_csv("../data/processed/FC_Regex.csv")