In [17]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [18]:
!pip install -U accelerate
!pip install -U transformers
!pip install datasets
!pip install -q -U google-generativeai

  pid, fd = os.forkpty()




In [19]:
import os
os.environ['GOOGLE_API_KEY'] = 'AIzaSyCZUO7rZzftrySdxwu7E6HQ3aKBnTz8mC4'

In [20]:
from datasets import load_dataset, Dataset
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
import torch
import numpy as np
import pandas as pd

In [21]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)


Using device: cuda


In [22]:
dataset = load_dataset("nguha/legalbench", 'cuad_audit_rights')

In [23]:
dataset

DatasetDict({
    train: Dataset({
        features: ['answer', 'index', 'text', 'document_name'],
        num_rows: 6
    })
    test: Dataset({
        features: ['answer', 'index', 'text', 'document_name'],
        num_rows: 1216
    })
})

In [24]:
# Convert to DataFrame for easier handling
df_train = pd.DataFrame(dataset['test'])
df_test = pd.DataFrame(dataset['train'])

# Preprocess text
df_train['cleaned_text'] = df_train['text'].apply(lambda text: text.strip().lower())
df_test['cleaned_text'] = df_test['text'].apply(lambda text: text.strip().lower())

#To split the data better
df_combined = pd.concat([df_train, df_test])
df_combined.drop(columns=['index'])

# Shuffle the data
df_combined_shuffled = df_combined.sample(frac=1).reset_index(drop=True)
df_combined_shuffled.drop(columns=['index'])

# Split the data into training, validation, and test sets
train_data, test_data = train_test_split(df_combined_shuffled, test_size=0.2, stratify = df_combined_shuffled['answer'])
val_data, test_data = train_test_split(test_data, test_size=0.4, stratify = test_data['answer'])

print(f"Training set size: {train_data.shape}")
print(f"Validation set size: {val_data.shape}")
print(f"Test set size: {test_data.shape}")
     

Training set size: (977, 5)
Validation set size: (147, 5)
Test set size: (98, 5)


In [25]:
# Initialize the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('nlpaueb/legal-bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('nlpaueb/legal-bert-base-uncased', num_labels=2)
model.to(device)

# Tokenize the inputs
train_encodings = tokenizer(train_data['cleaned_text'].tolist(), truncation=True, padding=True, max_length=512)
val_encodings = tokenizer(val_data['cleaned_text'].tolist(), truncation=True, padding=True, max_length=512)

# Convert labels to tensor
train_labels = torch.tensor(train_data['answer'].apply(lambda x: 1 if x.lower() == "yes" else 0).tolist())
val_labels = torch.tensor(val_data['answer'].apply(lambda x: 1 if x.lower() == "yes" else 0).tolist())

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Finetuning Checkpoint

In [26]:
# Create dataset class
class LegalDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

# Create datasets
train_dataset = LegalDataset(train_encodings, train_labels)
val_dataset = LegalDataset(val_encodings, val_labels)

In [27]:
def train_and_evaluate(train_texts, train_labels, val_texts, val_labels, model, tokenizer):

    # Tokenize the inputs
    train_encodings = tokenizer(train_texts.tolist(), truncation=True, padding=True, max_length=512)
    val_encodings = tokenizer(val_texts.tolist(), truncation=True, padding=True, max_length=512)

    # Convert labels to tensor
    train_labels = torch.tensor(train_labels.apply(lambda x: 1 if x.lower() == "yes" else 0).tolist())
    val_labels = torch.tensor(val_labels.apply(lambda x: 1 if x.lower() == "yes" else 0).tolist())

    # Create datasets
    train_dataset = LegalDataset(train_encodings, train_labels)
    val_dataset = LegalDataset(val_encodings, val_labels)

    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        eval_strategy="epoch"
    )

    trainer = Trainer(
        model = model,
        args = training_args,
        train_dataset = train_dataset,
        eval_dataset = val_dataset,
    )

    trainer.train()
    predictions = trainer.predict(val_dataset)
    preds = predictions.predictions.argmax(-1)
    labels = predictions.label_ids

    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)

    return accuracy, precision, recall, f1

# K -Fold Validation

In [28]:
kf = StratifiedKFold(n_splits=5)
accuracies, precisions, recalls, f1s = [], [], [], []
texts = train_data['cleaned_text']
labels = train_data['answer']
i=0
for train_index, val_index in kf.split(texts, labels):
    train_texts = texts[texts.index.isin(train_index)]
    val_texts = texts[texts.index.isin(val_index)]
    train_labels = labels[labels.index.isin(train_index)]
    val_labels = labels[labels.index.isin(val_index)]

    #Initializing a new model
    model = BertForSequenceClassification.from_pretrained('nlpaueb/legal-bert-base-uncased', num_labels=2)
    tokenizer = BertTokenizer.from_pretrained('nlpaueb/legal-bert-base-uncased')
    model.to(device)

    accuracy, precision, recall, f1 = train_and_evaluate(train_texts, train_labels, val_texts, val_labels, model, tokenizer)

    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)

    model.save_pretrained('fine-tuned-legal-bert-fold'+str(i))
    tokenizer.save_pretrained('fine-tuned-legal-bert-fold'+str(i))
    i+=1

# Print average metrics
print(f"Average Accuracy: {sum(accuracies) / len(accuracies)}")
print(f"Average Precision: {sum(precisions) / len(precisions)}")
print(f"Average Recall: {sum(recalls) / len(recalls)}")
print(f"Average F1 Score: {sum(f1s) / len(f1s)}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112818500000938, max=1.0…

Epoch,Training Loss,Validation Loss
1,0.1269,0.050131
2,0.0007,0.048007
3,0.118,0.123608


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.2582,0.198906
2,0.1304,0.015291
3,0.0002,0.041073


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.1982,0.106642
2,0.0009,0.10062
3,0.0003,0.106083


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.1735,0.04554
2,0.0009,0.034061
3,0.0002,0.000115


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.2349,0.079451
2,0.001,0.140555
3,0.0002,0.004194


Average Accuracy: 0.9923131620053658
Average Precision: 0.9877628778203491
Average Recall: 0.9974999999999999
Average F1 Score: 0.9925855769916836


In [29]:
# Define the test function
def test_model(test_texts, test_labels, model_path, tokenizer_path):
    # Load the fine-tuned model and tokenizer
    model = BertForSequenceClassification.from_pretrained(model_path)
    tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
    model.to(device)

    # Tokenize the test texts
    test_encodings = tokenizer(test_texts.tolist(), truncation=True, padding=True, max_length=512)

    # Convert labels to tensor
    test_labels_tensor = torch.tensor(test_labels.apply(lambda x: 1 if x.lower() == "yes" else 0).tolist())

    # Create a test dataset
    test_dataset = LegalDataset(test_encodings, test_labels_tensor)

    # Create a DataLoader for the test dataset
    test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

    # Evaluate the model
    model.eval()
    preds = []
    labels = []
    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels.extend(batch['labels'].cpu().numpy())
            outputs = model(input_ids, attention_mask=attention_mask)
            preds.extend(torch.argmax(outputs.logits, dim=-1).cpu().numpy())

    # Calculate metrics
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)

    return accuracy, precision, recall, f1

In [30]:
# Now, we will try loading the test data. 
test_texts = test_data['cleaned_text']
test_labels = test_data['answer']

# Iterate over the saved models and evaluate them
for i in range(5):  # I'm using 5 models here 
    model_path = f'fine-tuned-legal-bert-fold{i}'
    tokenizer_path = f'fine-tuned-legal-bert-fold{i}'

    accuracy, precision, recall, f1 = test_model(test_texts, test_labels, model_path, tokenizer_path)
    print(f"Model {i} - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")

accuracy, precision, recall, f1 = test_model(test_texts, test_labels,'nlpaueb/legal-bert-base-uncased', 'nlpaueb/legal-bert-base-uncased')
print(f"Model (untrained Legal-BERT) - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")

Model 0 - Accuracy: 0.9795918367346939, Precision: 0.9607843137254902, Recall: 1.0, F1 Score: 0.98
Model 1 - Accuracy: 0.9795918367346939, Precision: 0.9607843137254902, Recall: 1.0, F1 Score: 0.98
Model 2 - Accuracy: 0.9795918367346939, Precision: 0.9607843137254902, Recall: 1.0, F1 Score: 0.98
Model 3 - Accuracy: 0.9795918367346939, Precision: 0.9607843137254902, Recall: 1.0, F1 Score: 0.98
Model 4 - Accuracy: 0.9897959183673469, Precision: 0.98, Recall: 1.0, F1 Score: 0.98989898989899


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model (untrained Legal-BERT) - Accuracy: 0.45918367346938777, Precision: 0.3333333333333333, Recall: 0.08163265306122448, F1 Score: 0.13114754098360654


In [31]:
import os

# Directory to save models
save_dir = '/kaggle/working/fine-tuned-legal-bert-5folds'
os.makedirs(save_dir, exist_ok=True)

for i in range(5):  # Since we have five models. 
    model_path = f'fine-tuned-legal-bert-fold{i}'
    tokenizer_path = f'fine-tuned-legal-bert-fold{i}'

    model.save_pretrained(os.path.join(save_dir, model_path))
    tokenizer.save_pretrained(os.path.join(save_dir, tokenizer_path))

print(f"Models saved to {save_dir}")

Models saved to /kaggle/working/fine-tuned-legal-bert-5folds


In [32]:
from transformers import BertForSequenceClassification, BertTokenizer

# Path to the dataset on Kaggle
model_path = "/kaggle/working/fine-tuned-legal-bert-fold2"

# Load model and tokenizer
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

# Move model to the specified device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [33]:
# Load model
model = BertForSequenceClassification.from_pretrained("/kaggle/working/fine-tuned-legal-bert-fold0")
tokenizer = BertTokenizer.from_pretrained("/kaggle/working/fine-tuned-legal-bert-fold0")
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [34]:
# Function for classification using Legal-BERT
def classify_clause_legal_bert(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    return predictions.item()

test_clause= "The Recipient shall not advertise or otherwise make public the fact that it has a confidential relationship with UNHCR nor shall the Recipient in any manner whatsoever use the name emblem or official seal of the United Nations or UNHCR or any abbreviation of the name of the United Nations or UNHCR in connection with its business or otherwise"

# Get the combined result
response = classify_clause_legal_bert(test_clause)

# Print the combined result
print(response)

0


# Risk Analysis 

In [35]:
import os
import google.generativeai as genai

genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

# Create the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 2000,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
)

chat_session = model.start_chat(
  history=[]
)

def run_riskAnalysis(clause):
    risk_template = "You are a legal advisor. Identify any definite risks in the clauses given to you. Mention that there are none if there are no risks. "
    response = chat_session.send_message(f"{risk_template}\n\n{clause}")
    return response.text

# Accept input from the user for the clause
test_clause = input("Enter the clause you would like to analyze for risks: ")

# Get the combined result
response = run_riskAnalysis(test_clause)

# Print the combined result along with the sentences that lead to that conclusion
print(response)


Enter the clause you would like to analyze for risks:  The Recipient shall not be precluded from disclosing the Confidential Information that is (i) obtained by the Recipient without restriction from a third party who is not in breach of any obligation as to confidentiality to the owner of such Confidential Information or any other person, or


The clause you provided presents several potential risks:

**1. Ambiguity of "obtained without restriction":**

* **Defining "restriction":**  What constitutes a "restriction" on disclosure is unclear.  Is it a formal contract, a verbal agreement, or even an understanding between parties? This ambiguity could lead to disputes about whether information was truly "obtained without restriction."
* **Publicly available information:** The clause doesn't explicitly address information that is publicly available.  If information is publicly available, it is generally not considered confidential. This lack of clarity could lead to the Recipient claiming they obtained the information publicly, even if it was shared in a confidential setting.

**2. Reliance on third-party obligations:**

* **Due diligence on third party:** The Recipient's ability to rely on a third party's non-breach of confidentiality is problematic. The Recipient bears the burden of ensuring the third party is not in breach. T

In [59]:
import os
import google.generativeai as genai

# Configure the API key
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

# Create the model with generation configuration
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 2000,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
)

chat_session = model.start_chat(
  history=[]
)

def run_gemini_integration(classification_label, risk_analysis, clause):
    # Create the prompt based on classification and risks provided
    prompt = (
        f"Here is a contract clause that has been classified as '{classification_label}'. "
        f"Explain what that means:\n\n'{clause}'\n\n"
        f"The potential risks identified in this clause are:\n{risk_analysis}\n\n"
    )

    # Send message to chat session
    response = chat_session.send_message(
        "You are a legal advisor. Please provide an integrated, cohesive explanation of this clause, "
        "its classification, and the identified risks. Provide the response in the following template:\n\n" + prompt
    )
    return response.text

# Sample inputs
classification_label = "High Liability"
risk_analysis = "This clause may expose the company to significant legal liability due to the lack of limitation on indemnification."
clause = "The supplier agrees to indemnify and hold harmless the purchaser against any and all claims, damages, and expenses arising from the performance of this agreement."

# Get the combined result
response = run_gemini_integration(classification_label, risk_analysis, clause)

# Print the combined result
print(response)



## Contract Clause Analysis: 

**Clause:** "The supplier agrees to indemnify and hold harmless the purchaser against any and all claims, damages, and expenses arising from the performance of this agreement."

**Classification:** **High Liability**

**Explanation:** This clause, classified as "High Liability," is a broad indemnity provision. It obligates the supplier to protect the purchaser from any and all financial consequences stemming from the agreement's execution. This means the supplier is responsible for covering:

* **Claims:**  Any legal action brought against the purchaser related to the agreement.
* **Damages:** Financial losses incurred by the purchaser due to the supplier's actions or omissions.
* **Expenses:**  Costs incurred by the purchaser in defending against claims or lawsuits. 

**Potential Risks Identified:**

This clause exposes the company to significant legal liability due to the lack of limitation on indemnification. Here's why:

* **Unlimited Scope:** The cla

In [82]:
import os
import google.generativeai as genai
from transformers import BertForSequenceClassification, BertTokenizer
import torch

# Load Legal-BERT model and tokenizer
model_path = "/kaggle/working/fine-tuned-legal-bert-fold0"
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Configure API for generative AI
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

# Create generative model configuration
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

generative_model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
)

chat_session = generative_model.start_chat(history=[])

# Function to classify clause using Legal-BERT
def classify_clause_legal_bert(clause):
    inputs = tokenizer(clause, return_tensors="pt").to(device)
    outputs = model(**inputs)
    logits = outputs.logits
    prediction = torch.argmax(logits, dim=-1).item()
    return prediction

# Function to analyze risks using generative AI
def run_riskAnalysis(clause):
    risk_template = "You are a legal advisor. Identify any definite risks in the clauses given to you. Mention that there are none if there are no risks."
    response = chat_session.send_message(f"{risk_template}\n\n{clause}")
    return response.text

# Function to run prompt using Gemini model
def run_gemini_integration(classification_label, risk_analysis, clause):
    prompt = (
        f"Here is a contract clause that has been classified as '{classification_label}.Name what type of clause it is and explain what this clause means for the user.':\n\n"
        f"'{clause}'\n\n"
        f"The potential risks identified in this clause are:\n{risk_analysis}. Give the exact lines that explain this risk. \n\n"
    )

    response = chat_session.send_message(
        "You are a legal advisor. First, mention the legal name of the claus and explain how it works. Please provide an integrated, cohesive explanation of this clause, "
        "its classification, and the identified risks. Don't give the recommendations to mitigate the clause. Provide the response in the following template:\n\n" + prompt
    )
    return response.text

# Combined function to classify and analyze a clause
def classify_and_analyze_clause(clause):
    classification_result = classify_clause_legal_bert(clause)
    classification_label = "Audit Clause" if classification_result == 1 else "Not an Audit Clause"
    risk_analysis = run_riskAnalysis(clause)
    integrated_response = run_gemini_integration(classification_label, risk_analysis, clause)
    return integrated_response

# Test with a sample clause
test_clause = input("Please enter a contract clause to analyze: ")

# Get the result
response = classify_and_analyze_clause(test_clause)

# Print the result
print(response)


Please enter a contract clause to analyze:  


Please provide me with the clause you want me to review. I need the specific text of the clause to identify any potential risks. Once you provide the clause, I will:

1. **Classify the clause:** I will determine the type of clause (e.g., Audit Clause, Termination Clause, Indemnification Clause).
2. **Explain its meaning:** I will provide a clear and concise explanation of what the clause means for the user in the context of the agreement.
3. **Identify potential risks:** I will analyze the clause for any potential legal issues, such as:
    * **Ambiguity:** Unclear language that could be interpreted in multiple ways, leading to disputes.
    * **Unenforceability:** Clauses that violate laws or established legal principles, making them unenforceable.
    * **Liability:** Provisions that could expose a party to financial or other legal consequences.
    * **Breach of contract:** Clauses that create grounds for one party to terminate the agreement or seek damages.
    * **Misrepresentatio