In [1]:
import subprocess
import pandas as pd

In [6]:
import subprocess
import pandas as pd

def classify_inadmissibility(df, text_column="unofficial_text", model="llama3"):
    """
    Classify each court case using LLaMA 3 via subprocess by:
    1. Extracting numbered sections starting from [1].
    2. Summarizing the extracted text.
    3. Feeding the summary into a classification prompt.

    This version returns the raw classification output from the model.

    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame containing court case texts.
    text_column : str, optional
        The name of the column containing the court text (default is "unofficial_text").
    model : str, optional
        The name of the language model to use with Ollama (default is "llama3").

    Returns
    -------
    pandas.DataFrame
        A DataFrame with an added column 'inadmissibility' containing the raw model output.
    """

    def extract_numbered_lines(text):
        lines = text.splitlines()
        extracted = []
        start_extracting = False

        for line in lines:
            line_strip = line.strip()
            if not start_extracting:
                # Start if line contains "[1]" anywhere
                if "[1]" in line_strip:
                    extracted.append(line)
                    start_extracting = True
            else:
                # Continue only if line starts with [number] or number
                if line_strip.startswith("[") and line_strip[1:line_strip.find("]")].isdigit():
                    extracted.append(line)
                elif line_strip and line_strip[0].isdigit():
                    extracted.append(line)
                else:
                    break
        return "\n".join(extracted)

    def run_ollama(prompt):
        try:
            result = subprocess.run(
                ["ollama", "run", model],
                input=prompt.encode(),
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            return result.stdout.decode().strip()
        except Exception as e:
            print(f"Subprocess error: {e}")
            return "subprocess_error"

    def generate_summary(text):
        prompt = f"""
You are a legal analyst specializing in Canadian immigration law.

Summarize the following court case in one sentence, clearly stating what the case is about.

Case Text:
{text}

Summary:
"""
        return run_ollama(prompt)

    def classify_summary(summary):
        prompt = f"""
You are a Canadian immigration law expert.

Based on the following summary of a legal case, classify whether it relates to 'inadmissibility' under Canadian immigration law.

Respond only with one of the following:
Inadmissibility
Not Inadmissibility

Summary:
{summary}

Classification:
"""
        return run_ollama(prompt)

    df = df.copy()
    df["inadmissibility"] = None

    for idx, row in df.iterrows():
        full_text = row[text_column]
        limited_text = extract_numbered_lines(full_text)
        summary = generate_summary(limited_text)
        raw_output = classify_summary(summary)
        df.at[idx, "inadmissibility"] = raw_output
        print(f"Row {idx} classified.")

    return df


In [7]:
df = pd.read_excel("/Users/husain/Downloads/inadmissible_cases_verification.xlsx")
df_20 = df.head(20)

In [8]:
inadmissibility_df = classify_inadmissibility(df_20)

Row 0 classified.
Row 1 classified.
Row 2 classified.
Row 3 classified.
Row 4 classified.
Row 5 classified.
Row 6 classified.
Row 7 classified.
Row 8 classified.
Row 9 classified.
Row 10 classified.
Row 11 classified.
Row 12 classified.
Row 13 classified.
Row 14 classified.
Row 15 classified.
Row 16 classified.
Row 17 classified.
Row 18 classified.
Row 19 classified.


In [9]:
inadmissibility_df

Unnamed: 0,citation,dataset,year,language,document_date,source_url,unofficial_text,judges,locations,outcome,inadmissibility_ground,inadmissible/bility_count,inadmissibility
0,2021 FC 1003,FC,2021,en,2021-09-27,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Li v. Canada (Citizenship and Immigration)\nCo...,['McDonald'],Fredericton,allowed,['non_compliance'],1,Not Inadmissibility
1,2021 FC 1004,FC,2021,en,2021-09-27,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Malik v. Canada (Citizenship and Immigration)\...,['Strickland'],Ottawa,dismissed,['misrepresentation'],11,Inadmissibility
2,2021 FC 1031,FC,2021,en,2021-10-05,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Abu v. Canada (Citizenship and Immigration)\nC...,['Norris'],Ottawa,allowed,['human_rights'],6,Not Inadmissibility
3,2021 FC 1045,FC,2021,en,2021-10-07,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Dale v. Canada (Citizenship and Immigration)\n...,['McDonald'],Fredericton,dismissed,"['security', 'serious_criminality']",1,Not Inadmissibility
4,2021 FC 1072,FC,2021,en,2021-10-15,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Ketjinganda v. Canada (Citizenship and Immigra...,['Rochester'],Ottawa,dismissed,['human_rights'],1,Not Inadmissibility
5,2021 FC 1087,FC,2021,en,2021-10-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Camacho Valera v. Canada (Citizenship and Immi...,['Manson'],Ottawa,allowed,"['security', 'human_rights', 'organized_crimin...",1,Not Inadmissibility
6,2021 FC 1089,FC,2021,en,2021-10-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Adeosun v. Canada (Citizenship and Immigration...,['Andrew D. Little'],Toronto,dismissed,['misrepresentation'],28,Inadmissibility
7,2021 FC 1098,FC,2021,en,2021-10-18,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Zhao v. Canada (Citizenship and Immigration)\n...,['Walker'],Ottawa,dismissed,['misrepresentation'],3,Not Inadmissibility
8,2021 FC 1104,FC,2021,en,2021-10-20,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Adams v. Canada (Citizenship and Immigration)\...,['Furlanetto'],Ottawa,dismissed,['criminality'],3,Inadmissibility
9,2021 FC 1145,FC,2021,en,2021-10-27,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Quraishi v. Canada (Citizenship and Immigratio...,['McHaffie'],Ottawa,allowed,['other'],1,Not Inadmissibility
