In [76]:
import numpy as np 
import pandas as pd 
import spacy 

import nltk
from nltk.tokenize import sent_tokenize, word_tokenize 
from nltk.parse.corenlp import CoreNLPParser 

from dotenv import load_dotenv
import os
import google.generativeai as genai

In [46]:
df = pd.read_csv("Sentences.csv") 
print(df)

                                               sentence
0     The system shall refresh the display every 60 ...
1     The application shall match the color of the s...
2              If projected  the data must be readable.
3     On a 10x10 projection screen  90% of viewers m...
4      The product shall be available during normal ...
...                                                 ...
1285                                                â€‹
1286  Texts sent to that number will be sent to the ...
1287  If a question is not understood by our API, th...
1288  Upon the USB being plugged in the system shall...
1289  The system shall be able to handle 1000 custom...

[1290 rows x 1 columns]


In [84]:
class LingAnalyser: 
    MIN_SENT_LEN = 3 
    MAX_SENT_LEN = 40
    MIN_CLAUSE = 1
    MAX_CLAUSE = 3
    nlp = spacy.load("en_core_web_sm") 

    def analyseSent(self,txt): 
        words = nlp(txt)
        len_sent = len(words)
        len_clause =  sum(1 for word in words if word.dep_ in ("ROOT", "advcl", "ccomp", "xcomp", "relcl", "csubj")) 
        
        result={
            "isValid":True,
            "Complexity":"simple",
            "sent_len": len_sent, 
            "clause_count":len_clause,
            "SimplificationSuggested":False
        }
        
        if(len_sent<self.MIN_SENT_LEN or len_clause<self.MIN_CLAUSE ): 
            result["isValid"]=False 
            return result
        
        if(len_clause<=self.MAX_CLAUSE): 
            result["Complexity"]="medium"
        if(len_sent>self.MAX_SENT_LEN or len_clause>self.MAX_CLAUSE) : 
            result["Complexity"]="complex"
            result["SimplificationSuggested"]=True
            
        return result
        
    def printDepTree(self,txt): 
        words = nlp(txt)
        spacy.displacy.render(words, style="dep", jupyter=True)
    def print_tree(self,token, level=0):
        print(" - -" * level + f"{token.text}({token.dep_})")
        for child in token.children:
            self.print_tree(child, level + 1) 
    def printParseTree(self,txt): 
        words = nlp(txt)
        for word in words:
            if word.dep_ == "ROOT":
                self.print_tree(word)
        
    def getSimplerVersion(self,txt):
        load_dotenv()  # loads .env automatically
        api_key = os.getenv("GEMINI_API_KEY")
        genai.configure(api_key=api_key)
        
        model = genai.GenerativeModel("gemini-2.5-flash")
        
        prompt = f"""You are a text simplification agent. Your job is to simplify text provided and return the simplified sentence.
        The provided text might contain multilple clauses. The simplified sentences shouldn't contain more than 3 clauses per sentence and
        the sentence lenght shouldn't be more than 40. You are allowed to split sentences to multiple sentences to achieve this.
        provide no other text with the response.
        Input_text:\"{txt}\""""
        
        response = model.generate_content(prompt)
        return response.text

The Disputes System must allow the users to select disputable transactions (based on the age of the transaction) from a user interface and initiate a dispute (ticket retrieval request or chargeback notification) on the selected transaction.
Simplified: [ The Disputes System lets users select transactions.
Users choose based on transaction age.
They use a user interface.
They can then start a dispute.
This might be a ticket request.
Or it could be a chargeback notice. ]
A portfolio consists of documentation that would provide proof of a purchase such as the documentation that is received from a car rental agency that is more than a sales receipt.
Simplified: [ A portfolio holds documents. These documents prove purchases. An example is car rental paperwork. It is more than a sales receipt. ]
 The Disputes System must provide a confirmation to the user upon the creation of ticket retrieval request that contains the following information; the dispute case number  the type of retrieval requ

KeyboardInterrupt: 

In [None]:
la = LingAnalyser() 
for s in df["sentence"][0:100]: 
    # print(s)
    # la.printDepTree(s)
    # la.printParseTree(s)
    # print(la.analyseSent(s)) 
    if(la.analyseSent(s)["SimplificationSuggested"]):
        print(s)
        print("Simplified: [",la.getSimplerVersion(s),"]")

In [85]:
srs = pd.read_csv("srs.csv") 

                                                sentence
0                E-GOVERNANCE MISSION MODE PROJECT (MMP)
1      CRIME & CRIMINAL TRACKING NETWORK AND SYSTEMS ...
2      FUNCTIONAL REQUIREMENTS SPECIFICATION V1.0 (DR...
3           MINISTRY OF HOME AFFAIRS GOVERNMENT OF INDIA
4                                                    1.0
...                                                  ...
13034  The system shall provide a Glossary for indust...
13035                                         Interfaces
13036                               Applicable Standards
13037  The system shall retain existing HIPAA complia...
13038  The system shall follow HIPAA compliance capab...

[13039 rows x 1 columns]


In [87]:
la = LingAnalyser() 

for s in srs["sentence"][0:10]: 
    print(la.analyseSent(s))

{'isValid': True, 'Complexity': 'medium', 'SimplificationSuggested': False}
{'isValid': True, 'Complexity': 'medium', 'SimplificationSuggested': False}
{'isValid': True, 'Complexity': 'medium', 'SimplificationSuggested': False}
{'isValid': True, 'Complexity': 'medium', 'SimplificationSuggested': False}
{'isValid': False, 'Complexity': 'simple', 'SimplificationSuggested': False}
{'isValid': False, 'Complexity': 'simple', 'SimplificationSuggested': False}
{'isValid': True, 'Complexity': 'medium', 'SimplificationSuggested': False}
{'isValid': True, 'Complexity': 'complex', 'SimplificationSuggested': True}
{'isValid': False, 'Complexity': 'simple', 'SimplificationSuggested': False}
{'isValid': True, 'Complexity': 'medium', 'SimplificationSuggested': False}
