# Automatic legal file formatter

To specify which file should be formatted, change the path in the cell below:

In [1]:
file_path = 'legal3.txt'

In [2]:
import re


def standardize_legal_description(text):
    # Remove text inside parentheses (such as in legal3.txt)
    text = re.sub(r"\(.*?\)", "", text)

    # Replace periods with spaces if they are preceded by a cardinal direction
    text = re.sub(r'(?<=[NSEW])\.', ' ', text)

    # Translation of non-standard characters to dashes or spaces
    translation_table = str.maketrans({'°':'-', "'":'-', '"':'-', ',':' ', '’':'-'})

    text = text.translate(translation_table)

    # Replace non-digit character between groups of 2 digits with a dash (for typos)
    text = re.sub(r"(?<=\d{2})\D(?=\d{2}\D?\d{2})", "-", text)
    return text


def group_text(text):
    pattern = re.compile(r'([NS])\s*(\d{2})\D(\d{2})\D(\d{2})\D?([WE])\s*([\d.]+)')

    # Find all matches
    matches = pattern.findall(text)

    formatted_lines = []

    # Iterate over matches to format them
    for match in matches:
        direction1 = match[0]
        degrees = match[1]
        minutes = match[2]
        seconds = match[3]
        direction2 = match[4]
        distance = match[5]

        # Create the formatted string
        formatted_line = f"{direction1} {degrees}:{minutes}:{seconds} {direction2} {distance}"
        formatted_lines.append(formatted_line)

    return ' '.join(formatted_lines)


def final_mesh(text):
    # Second pattern to clean up extra colons and spaces
    pattern2 = re.compile(r'([NS])\s*(\d{2}):(\d{2}):(\d{2})\s*:{0,}\s*([WE])\s*([\d.]+)')

    matches = pattern2.findall(text)

    formatted_lines = []

    # Iterate over matches to format them
    for match in matches:
        direction1 = match[0]
        degrees = match[1]
        minutes = match[2]
        seconds = match[3]
        direction2 = match[4]
        distance = match[5]

        # Create the formatted string
        formatted_line = f"{direction1} {degrees}:{minutes}:{seconds} {direction2} {distance}"
        formatted_lines.append(formatted_line)
    
    return formatted_lines



def convert_legal_description(file_path):
    with open(file_path, 'r') as file:
        legal_description = file.read()

    legal_description = standardize_legal_description(legal_description)
    grouped_text = group_text(legal_description)
    formatted_lines = final_mesh(grouped_text)

    # Write the formatted lines to a new file or print them
    with open('formatted_legal.txt', 'w') as output_file:
        for line in formatted_lines:
            output_file.write(line + '\n')

    print("Conversion complete! Check the 'formatted_legal.txt' file.")

### Run the cell below to execute the program

In [3]:
convert_legal_description(file_path)

Conversion complete! Check the 'formatted_legal.txt' file.
