# Exploratory Analysis of Legal Documents

This notebook is used for performing exploratory analysis on legal documents before summarization.

In [None]:
# Import necessary libraries
import os
import json
import pandas as pd

### Load and inspect raw legal document

In [None]:
# Load a sample raw legal document
raw_data_path = '../data/raw/'
sample_file = os.path.join(raw_data_path, 'unique_legal_document_1.txt')

with open(sample_file, 'r') as file:
    raw_text = file.read()

print('Raw Document Text:', raw_text[:500])  # Show first 500 characters

### Preprocess the document

In [None]:
# Use DataPreprocessor class to preprocess document
from src.data_preprocessing import DataPreprocessor

preprocessor = DataPreprocessor()
cleaned_text = preprocessor.clean_text(raw_text)
tokens = preprocessor.tokenize_text(cleaned_text)
sections = preprocessor.extract_sections(cleaned_text)

print('Cleaned Text:', cleaned_text[:500])
print('Tokens:', tokens[:20])  # Show first 20 tokens
print('Sections:', sections)

### Summarize the document

In [None]:
# Use LegalSummarizer class to summarize the document
from src.summarization import LegalSummarizer

summarizer = LegalSummarizer()
summary = summarizer.summarize(cleaned_text)

print('Summary:', summary)