In [2]:
import re
import language_tool_python  # For grammar and spelling correction
import nltk  # For more advanced text processing (optional)
from nltk.corpus import stopwords #For stop words removal
from nltk.stem import WordNetLemmatizer #For lemmatization

nltk.download('words', quiet=True) #download the words corpus
nltk.download('wordnet', quiet=True) #download the wordnet corpus
nltk.download('stopwords', quiet=True) #download stopwords corpus
nltk.download('omw-1.4', quiet=True) #download omw corpus



True

In [3]:
tool = language_tool_python.LanguageTool('en-US')  # Initialize the language tool
stop_words = set(stopwords.words('english')) #set of english stopwords
lemmatizer = WordNetLemmatizer() #initialize lemmatizer


def preprocess_text(text):
    """Preprocesses text for CV data cleaning."""

    if not isinstance(text, str): #Check input type
        return ""

    # 1. Lowercasing
    text = text.lower()

    # 2. Remove special characters and punctuation (except apostrophes)
    text = re.sub(r"[^a-zA-Z0-9\s']", "", text)  # Keep apostrophes

    # 3. Correct grammar and spelling mistakes
    matches = tool.check(text)
    corrected_text = tool.correct(text)

    # 4. Remove extra whitespace
    corrected_text = re.sub(r"\s+", " ", corrected_text).strip()

    # 5. Remove Stop words
    words = corrected_text.split()
    filtered_words = [word for word in words if word not in stop_words]
    corrected_text = " ".join(filtered_words)

    # 6. Lemmatization (convert words to base form)
    words = corrected_text.split()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in words]
    corrected_text = " ".join(lemmatized_words)

    return corrected_text


# Example usage:
text = """
This is an Examplle text with some Grammer and speling Mistakez.  It also has   extra   whitespace.  And some stop words like 'the' and 'a'. It also has some words in different cases like 'Example' and 'TEXT'.
"""

cleaned_text = preprocess_text(text)
print(cleaned_text)

text2 = 123 # Example of non-string input
cleaned_text2 = preprocess_text(text2)
print(cleaned_text2) # Output should be empty string


text3 = "This is a test sentence with some repeating words words words."
cleaned_text3 = preprocess_text(text3)
print(cleaned_text3) # Output should be "test sentence repeating word word word"

text4 = "This is a test sentence with some contractions like it's and can't."
cleaned_text4 = preprocess_text(text4)
print(cleaned_text4) # Output should be "test sentence contraction like it's can't"

This example text grammar spelling mistake also extra whitespace stop word like 'the' 'a' also word different case like 'example' 'text'

This test sentence repeating word word word
This test sentence contraction like can't


In [6]:
text = '''Buisness DEVELOPMENT DIRECTOR
 Summary
 Business Development Director driven to exceed sales goals and build long-term relationships with customers. Creates a positive shopping
 experience through high-quality customer care.
 Highlights
 Flut in English and Spanish
 Action-oriented, results-oriented, "take charge" Sales and
 Customer Service Professional
 Superior communication skils
 Superb folow up skils
 Accomplishments
 Exceptional relationship building that leads to gaining the trust and
 credibility of individuals and groups
 Efective team player
 Capable of delivering a strong business case for client action with the
 skil set necessary to bring it to close
 Created strategies to develop and expand existing customer sales, which resulted in a 154% increase in monthly sales.
 Increased sales volume by adding 34 new accounts in the assigned territory.
 Managed a portfolio of 21 accounts totaling $14.5MM in sales.
 Experience
 Business Development Director 11/2012 to Current Company Name City , State
 Managed 3 Franchise , Kia , Hyundai , CDJR ( Chrysler,Dodge,Jeep & Ram )
 Trained/Developed Sales Team on engagement w/customers Trained/Developed Sales Team on CRM usage
 Met Weekly/Monthly with GM and Service Director for any emails and mail out Marketing campaigns
 Built Strong Relationships with Fleet Organizations
 Developed a two man team that handles al heat cases
 Sales Representative 01/2009 to 05/2012 Company Name City , State
 Developed and managed five Home Depot accounts, generating over $5MM in annual revenue
 Built strong relationships with Home Depot Management, Associates, Professional Contractors, ISD's, Government Agencies, Cities,
 Municipalities, Multi - Family Apartment Groups, Investors, and DIY'ers
 Monitored and forecast suficient inventory levels of Behr/Kilz product ready for demand
 Exceled at partnering with al core business operations to significantly increase Behr/Kilz footprint, expand market share, and generated
 sustainable revenue
 Won the Sales Driver Award, FY 2009
 Successfuly executed al initiatives and new product rol outs, Behr Ultra, Premium Plus SP Low VOC, Int/Ext Oil Base, Ext Wood Stains,
 Floor Coatings, and Kilz Pro X
 Trained and developed Home Depot associates to excel at driving both Home Depot and Behr/Kilz value while increasing sales through
 non-stop hands on training
 Won "Rookie of The Year" Award, 2010
 Serviced 2 territories due to lack of human capitol, total of 10 accounts, keeping them profitable & constantly growing
 Won "Ultimate Team Player" Award, 2011
 New Home Counselor 01/2006 to 07/2007 Company Name City , State
 Sold 28 new homes and produced $5,236,000 in revenue in first six months
 Achieved 97% capture rate for in-house lending company
 Sustained high profit margin through tactful and diplomatic negotiations
 Built strong strategic aliances with investor community, which led to multiple revenue streams and increased profitability
 Held first-time home buyer workshops to build comfort and demand
 Minimized sales cancelations by meticulous customer qualification and realistic expectation se ting
 New Home Counselor 10/2004 to 01/2006 Company Name City , State
 Sold 58 new homes and produced $8,700,000 in revenue
 Used blueprints without a model to sel 39 homes
 Participated in numerous TV commercials for Mi CasaTV
 Protected profits from margin degradation through advanced negotiation skils
 Won Beazer Homes National Marketing Award for best idea contributing to 14 new home sales in one day
 Orchestrated community outreach programs to create demand for move-ups and first-time buyers
 Achieved lowest cancelation rate in Dalas Division
 Built loyal relationships with a large, diverse Realtor base
 Developed strong relationships with a large portion of the investment community
Completed al requirements at Beazer University
 Won numerous sales contests
 New Business Development Sales Manager 11/1992 to 07/2004 Company Name City , State
 Achieved 154% of FY 2000 sales quota
 Implemented weekly training with sales team
 Achieved 125% of 1999 YTD sales quota
 Managed 21 wholesale distributer accounts
 Increased territory sales by 14% in Q2 and Q3 of 1998
 Preserved margin by seling at an average of four percentage points above industry standard
 Broadened market base by identifying new opportunities to expand the focus of wholesalers
 Tracked customer acquisition cost versus profitability
 Created and negotiated contracts, agreements, proposals, and purchases
 Education
 Superstar Seling, Brian Tracy , Ft. Worth, Texas, 2005 How to Build a Complete Sales Person , Plano, Texas 2005 Sales Success, Je frey
 Gitmer- Ft. Worth, Texas 2005 CSRE : Psychology 2011 P.E.E.R.S City , State , USA
 Psychology of Sales, Brian Tracy : Sales and Marketing 2005 Beazer University City , State , USA
 CFNI : Theology 2003 CSRE City , State , USA
 Associate of Arts : Theology 2003 CFNI City , State , USA 
Skils
 Sales and Marketing Strategy, Execution, Account Management, Business Development, Client Relations, Cold Caling, Computer Literate,
 Creative Problem Solving, CRM Systems, Customer Needs Assessment, Customer Satisfaction, Customer Service, Lead Development,
 Marketing'''
print(preprocess_text(text))

Buisness development director summary business development director driven exceed sale goal build long term relationship customer creates positive shopping experience high quality customer care highlight flat English Spanish action oriented result oriented take charge sale customer service professional superior communication skill superb flow skill accomplishment exceptional relationship building lead gaining trust credibility individual group effective team player capable delivering strong business case client action skin set necessary bring close created strategy develop expand existing customer sale resulted 154 increase monthly sale increased sale volume adding 34 new account assigned territory managed portfolio 21 account totaling 145mm sale experience business development director 112012 current company name city state managed 3 franchise KIA Hyundai CDR chryslerdodgejeep ram trained developed sale team engagement customer trained developed sale team CRM usage met weekly monthly 