<a href="https://colab.research.google.com/github/adarsh912/Finaly-Yr-Project/blob/main/FYP1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install datasets



In [4]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import DatasetDict, Dataset
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

Data Cleaning

In [5]:
import os
# List files in the directory to verify the correct path
os.listdir('/content/drive/MyDrive')

# Step 4: Load the CSV file using pandas
file_path = "/content/drive/MyDrive/Final Year Project/ipc_sections.csv"
df = pd.read_csv(file_path)
print("File loaded successfully!")


print("Dataset Preview:")
print(df.head())


print("\nMissing Values Check:")
print(df.isnull().sum())

# Handle missing values by replacing them with "Unknown"
df.fillna("Unknown", inplace=True)

# hf_dataset = Dataset.from_pandas(df)

# takinig only 10 entries ...subset training
hf_dataset = Dataset.from_pandas(df.head(10))

# Step  display the Hugging Face dataset preview
print("\nHugging Face Dataset Preview:")
print(hf_dataset)


File loaded successfully!
Dataset Preview:
                                         Description  \
0  Description of IPC Section 140\nAccording to s...   
1  Description of IPC Section 127\nAccording to s...   
2  Description of IPC Section 128\nAccording to s...   
3  Description of IPC Section 129\nAccording to s...   
4  Description of IPC Section 130\nAccording to s...   

                                             Offense  \
0  Wearing the dress or carrying any token used b...   
1  Receiving property taken by war or depredation...   
2  Public servant voluntarily allowing prisoner o...   
3  Public servant negligently suffering prisoner ...   
4  Aiding escape of, rescuing or harbouring, such...   

                                 Punishment  Section  
0                  3 Months or Fine or Both  IPC_140  
1   7 Years + Fine + forfeiture of property  IPC_127  
2  Imprisonment for Life or 10 Years + Fine  IPC_128  
3        Simple Imprisonment 3 Years + Fine  IPC_129  
4  Impri

In [6]:
# Step 10: Encode labels based on the 'Section' column
hf_dataset = hf_dataset.map(lambda x: {'label': pd.Categorical(x['Section']).codes}, batched=True)

# Check if 'label' is successfully added
print("Label Column in DataFrame:", hf_dataset.column_names)

# Step 11: Split dataset into training and testing sets
# Convert to pandas for train_test_split (to ensure splitting by pandas functionality)
train_df, test_df = train_test_split(hf_dataset.to_pandas(), test_size=0.2, random_state=42)

# Convert back to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df[["Description", "label"]])
test_dataset = Dataset.from_pandas(test_df[["Description", "label"]])

# Create a DatasetDict
data = DatasetDict({
    "train": train_dataset,
    "test": test_dataset,
})




# Step 12: Preprocess function for tokenization
def preprocess_function(examples, tokenizer):
    return tokenizer(examples["Description"], truncation=True, padding="max_length", max_length=512)

# Step 13: Metrics for evaluation
def compute_metrics(pred):
    predictions = np.argmax(pred.predictions, axis=1)
    labels = pred.label_ids
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted"),
        "precision": precision_score(labels, predictions, average="weighted"),
        "recall": recall_score(labels, predictions, average="weighted"),
    }

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Label Column in DataFrame: ['Description', 'Offense', 'Punishment', 'Section', 'label']


In [7]:
# Model evaluation function
def evaluate_model(model_name, num_labels, tokenizer, data):
    print(f"\nEvaluating model: {model_name}")

    # Load model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

    # Tokenize dataset
    tokenized_datasets = data.map(lambda x: preprocess_function(x, tokenizer), batched=True)

    # Training arguments
    training_args = TrainingArguments(
        output_dir=f"./results_{model_name.replace('/', '_')}",  # Directory to save model
        evaluation_strategy="epoch",                             # Evaluate after each epoch
        save_strategy="epoch",                                   # Save model after each epoch
        learning_rate=2e-5,                                      # Learning rate
        per_device_train_batch_size=16,                          # Batch size per device
        num_train_epochs=3,                                      # Number of training epochs
        weight_decay=0.01,                                       # Weight decay for optimization
        save_total_limit=2,                                      # Limit the number of saved checkpoints
        load_best_model_at_end=True,                             # Load the best model at the end
        metric_for_best_model="f1",                              # Metric for best model selection
        report_to=None                                           # Disable WandB logging
    )


    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["test"],
        processing_class=tokenizer,  # Replace `tokenizer` with `processing_class`
        compute_metrics=compute_metrics,
    )

    # Train model
    trainer.train()

    # Evaluate model
    results = trainer.evaluate()
    print(f"Results for {model_name}: {results}")
    return results


In [8]:
# Step 3: Evaluate multiple models
models = [
    "nlpaueb/legal-bert-base-uncased",
    "bert-base-uncased",
    "roberta-base",
]

# Load the tokenizer once (can be adjusted for other models)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")  # Use a default model or set a variable


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [9]:
# Ensure the DataFrame has a 'label' column based on the 'Section' column
if 'label' not in df.columns:
    try:
        df['label'] = pd.Categorical(df['Section']).codes
        print("Label column created based on 'Section'.")
    except KeyError:
        raise KeyError("The 'Section' column is missing in the DataFrame. Unable to create 'label' column.")
else:
    print("Label column already exists.")

# Confirm 'label' is now part of the DataFrame columns
print("Columns in DataFrame:", df.columns)


Label column created based on 'Section'.
Columns in DataFrame: Index(['Description', 'Offense', 'Punishment', 'Section', 'label'], dtype='object')


In [10]:

# Initialize an empty list to store results
results_summary = []

# Loop over each model in the list of models
for model_name in models:
    try:
        # Number of unique labels
        num_labels = len(df["label"].unique())

        # Evaluate the model and store the results
        results = evaluate_model(model_name, num_labels, tokenizer, data)

        # Append results to the summary list
        results_summary.append((model_name, results))
    except Exception as e:
        print(f"Error evaluating model {model_name}: {e}")

# Step 4: Compare Results
print("\nSummary of Results:")
if results_summary:
    for model_name, results in results_summary:
        print(f"{model_name}: {results}")
else:
    print("No results to summarize. Check for errors during model evaluation.")



Evaluating model: nlpaueb/legal-bert-base-uncased


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33madarsh_2021bite067[0m ([33madarsh_2021bite067-nit-srinagar[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,6.289034,0.0,0.0,0.0,0.0
2,No log,6.232118,0.0,0.0,0.0,0.0
3,No log,6.090025,0.0,0.0,0.0,0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results for nlpaueb/legal-bert-base-uncased: {'eval_loss': 6.289033889770508, 'eval_accuracy': 0.0, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 3.7914, 'eval_samples_per_second': 0.528, 'eval_steps_per_second': 0.264, 'epoch': 3.0}

Evaluating model: bert-base-uncased


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,6.20583,0.0,0.0,0.0,0.0
2,No log,6.312693,0.0,0.0,0.0,0.0
3,No log,6.368663,0.0,0.0,0.0,0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results for bert-base-uncased: {'eval_loss': 6.205829620361328, 'eval_accuracy': 0.0, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 4.7958, 'eval_samples_per_second': 0.417, 'eval_steps_per_second': 0.209, 'epoch': 3.0}

Evaluating model: roberta-base


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,5.885453,0.0,0.0,0.0,0.0
2,No log,5.892273,0.0,0.0,0.0,0.0
3,No log,5.894209,0.0,0.0,0.0,0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results for roberta-base: {'eval_loss': 5.885453224182129, 'eval_accuracy': 0.0, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 4.8155, 'eval_samples_per_second': 0.415, 'eval_steps_per_second': 0.208, 'epoch': 3.0}

Summary of Results:
nlpaueb/legal-bert-base-uncased: {'eval_loss': 6.289033889770508, 'eval_accuracy': 0.0, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 3.7914, 'eval_samples_per_second': 0.528, 'eval_steps_per_second': 0.264, 'epoch': 3.0}
bert-base-uncased: {'eval_loss': 6.205829620361328, 'eval_accuracy': 0.0, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 4.7958, 'eval_samples_per_second': 0.417, 'eval_steps_per_second': 0.209, 'epoch': 3.0}
roberta-base: {'eval_loss': 5.885453224182129, 'eval_accuracy': 0.0, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 4.8155, 'eval_samples_per_second': 0.415, 'eval_steps_per_second': 0.208, 'epoch': 3.0}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
