In [None]:
# pip install pandas sentence-transformers

In [None]:
import re
import pandas as pd
from sentence_transformers import SentenceTransformer, util

# Load SBERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Semantic query
query = "wrongful termination of employment contract"
query_embedding = model.encode(query, convert_to_tensor=True)

# Lightweight sentence splitter using regex
def split_into_sentences(text):
    sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', text.strip())
    return [s.strip() for s in sentences if s]

# Combine sentences into multi-sentence chunks
def create_sentence_chunks(sentences, chunk_size=3):
    return [" ".join(sentences[i:i + chunk_size]) for i in range(0, len(sentences), chunk_size)]

# Extract top-k semantically relevant sentence chunks
def extract_semantic_chunks(paragraph, top_k=2, chunk_size=3):
    sentences = split_into_sentences(paragraph)
    sentence_chunks = create_sentence_chunks(sentences, chunk_size=chunk_size)

    if not sentence_chunks:
        return ""

    chunk_embeddings = model.encode(sentence_chunks, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(query_embedding, chunk_embeddings)[0]
    top_indices = similarities.argsort(descending=True)[:top_k]
    top_chunks = [sentence_chunks[i] for i in top_indices]

    return " ".join(dict.fromkeys(top_chunks))  # Remove duplicates, preserve order

# Sample paragraphs (legal examples)
paragraphs = [
    "The employee, who had served the company for over ten years, was dismissed \
    without prior notice or documented warnings. The termination letter cited vague allegations of underperformance. \
    However, no performance reviews or formal complaints had been filed. \
    According to the labor regulations in effect, employees are entitled to a fair hearing before termination. \
    Legal counsel argued that the dismissal constituted a breach of the employment contract and lacked procedural fairness, rendering it unlawful.",

    "In this case, the defendant employer terminated the claimant on grounds of gross misconduct, alleging multiple instances of insubordination. \
    The claimant, however, provided evidence that all instructions in question had been followed precisely as given. \
    An internal HR investigation was conducted post-termination rather than beforehand. \
    The tribunal noted that due process was not followed, and that the employer had failed to observe its own disciplinary procedures outlined in the employee handbook.",

    "The plaintiff was removed from her position two weeks after filing a formal harassment complaint against a senior manager. \
    The employer justified the termination by citing 'department restructuring,' though no similar positions were impacted. \
    Emails showed that management discussed termination shortly after the complaint was lodged. \
    The court found this to be retaliatory action, in violation of both the company’s whistleblower policy and federal employment protection statutes.",

    "The employer claimed economic downsizing as the reason for the layoffs, affecting over twenty staff members. \
    However, it was revealed during litigation that the plaintiff's role was filled by a newly hired candidate within a month of termination. \
    Moreover, no financial documentation was submitted to support the downsizing claim. \
    The judge ruled the termination unjustified and awarded compensatory damages for wrongful dismissal.",

    "Under the terms of the employment agreement, either party was required to provide 30 days’ written notice prior to termination. \
    The defendant employer terminated the plaintiff via email, effective immediately, citing breach of confidentiality. \
    No hearing was conducted, and the accused breach was never substantiated. \
    The plaintiff argued that the lack of notice and due process invalidated the termination, and the court agreed, awarding damages and legal costs."
]

# Create DataFrame and apply
df = pd.DataFrame({'paragraph': paragraphs})
df['relevant_part'] = df['paragraph'].apply(lambda p: extract_semantic_chunks(p, top_k=1, chunk_size=1))

# Show result
pd.set_option('display.max_colwidth', None)
print(df[['relevant_part']])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

                                                                                                                                       relevant_part
0     Legal counsel argued that the dismissal constituted a breach of the employment contract and lacked procedural fairness, rendering it unlawful.
1       In this case, the defendant employer terminated the claimant on grounds of gross misconduct, alleging multiple instances of insubordination.
2                                                        Emails showed that management discussed termination shortly after the complaint was lodged.
3                                               The judge ruled the termination unjustified and awarded compensatory damages for wrongful dismissal.
4  The plaintiff argued that the lack of notice and due process invalidated the termination, and the court agreed, awarding damages and legal costs.
