# Instructions

To run this notebook you will need to:
1. Download Ollam from [here](https://ollama.com/)
2. Go to your terminal and type: `ollama pull llama3` to download llama3 model in your local computer.

## Imports

In [1]:
import subprocess
import re
import ast
import pandas as pd

## Function to extract location name

In [2]:
def extract_locations_from_text(text, model="llama3"):
    """
    Directly extracts the city name from the given court text.

    Parameters
    ----------
    text : str
        The court text to analyze.
    model : str, optional
        The name of the language model to use (default is "llama3").

    Returns
    -------
    str
        The extracted city name, or 'NA' if no location is found.
    """
    prompt = f"""
You are a legal assistant. Identify the city where the case was heard from the following court text.

Instructions:
- Extract the city where the case was heard.
- Just provide the city name, nothing else.
- If no location is found, return NA

Court Text:
{text}
"""
    result = subprocess.run(
        ["ollama", "run", model],
        input=prompt.encode(),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    
    output = result.stdout.decode().strip()
    
    return output if output else "NA"

def extract_locations_from_dataframe(df, startline, endline, text_column="unofficial_text", model="llama3"):
    """
    Extract city names from a DataFrame containing court texts.

    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame containing court text data.
    startline : int
        The starting line number of the text to consider.
    endline : int
        The ending line number of the text to consider.
    text_column : str, optional
        The column in `df` that contains the court text (default is "unofficial_text").
    model : str, optional
        The language model to use (default is "llama3").

    Returns
    -------
    pandas.DataFrame
        A copy of the input DataFrame with an additional 'locations' column containing extracted city names.
    """
    df = df.copy()

    def extract_slice_and_location(text):
        lines = text.splitlines()
        selected_text = "\n".join(lines[startline:endline])
        return extract_locations_from_text(selected_text, model=model)

    df["locations"] = df[text_column].apply(extract_slice_and_location)

    return df



## Federal Court

In [None]:
FC = pd.read_csv("../data/processed/FC_judges.csv")
FC

Unnamed: 0.1,Unnamed: 0,citation,dataset,year,language,document_date,source_url,unofficial_text,inadmissibility_reason,judges
0,11922,2014 FC 1,FC,2014,en,2014-01-02,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Telus Communications Company v. Canada (Attorn...,"['human_rights', 'serious_criminality', 'crimi...",['Strickland']
1,11930,2014 FC 1008,FC,2014,en,2014-10-22,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Bautista v. Canada (Citizenship and Immigratio...,['misrepresentation'],['Diner']
2,11933,2014 FC 1011,FC,2014,en,2014-10-23,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Canada (Public Safety and Emergency Preparedne...,['non_compliance'],"['Shore', 'Gauthier']"
3,11934,2014 FC 1012,FC,2014,en,2014-10-23,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Po v. Canada (Citizenship and Immigration)\nCo...,['human_rights'],['Tremblay-Lamer']
4,11935,2014 FC 1015,FC,2014,en,2014-10-24,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Gordon v. Canada (Citizenship and Immigration)...,"['human_rights', 'misrepresentation']",['Locke']
...,...,...,...,...,...,...,...,...,...,...
4826,23170,2022 CF 882,FC,2022,fr,2022-06-16,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Jules c. Canada (Citoyenneté et Immigration)\n...,"['human_rights', 'serious_criminality', 'crimi...",['St-Louis']
4827,23173,2023 CF 1619,FC,2023,fr,2023-12-01,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Salhi c. Canada (Citoyenneté et Immigration)\n...,"['serious_criminality', 'criminality']",['Azmudeh']
4828,23177,2024 CF 151,FC,2024,fr,2024-01-30,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Salazar Vargas c. Canada (Citoyenneté et Immig...,"['serious_criminality', 'criminality']",['Heneghan']
4829,23178,2024 CF 327,FC,2024,fr,2024-02-28,https://decisions.fct-cf.gc.ca/fc-cf/decisions...,Rashidian c. Canada (Procureur général)\nBase ...,['human_rights'],"['Ngo', 'Tabib', 'Steele']"


In [None]:
FC_city = extract_locations_from_dataframe(FC, 10, 25,  text_column="unofficial_text", model="llama3")
FC_city

In [None]:
FC_city['locations'].value_counts()

In [None]:
FC_city.to_csv("../data/processed/FC_city.csv")