In [3]:
import re
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

In [5]:
sample_text="As a dedicated Consultant at TCS, I specialize in utilizing SAP HANA applications to drive transformative solutions for the Aditya Birla Fashion and Retail project. With a background in computer science engineering and extensive experience in the IT industry, I bring a unique blend of technical expertise and strategic thinking to my role. I am passionate about sharing my knowledge and experience, as evidenced by my positive reviews and recommendations from peers and clients alike. Outside of work, I enjoy exploring new places and experiencing diverse cultures."

In [6]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [7]:
sentences = sent_tokenize(sample_text)
words = [word_tokenize(sentence) for sentence in sentences]

In [12]:
sentences

['As a dedicated Consultant at TCS, I specialize in utilizing SAP HANA applications to drive transformative solutions for the Aditya Birla Fashion and Retail project.',
 'With a background in computer science engineering and extensive experience in the IT industry, I bring a unique blend of technical expertise and strategic thinking to my role.',
 'I am passionate about sharing my knowledge and experience, as evidenced by my positive reviews and recommendations from peers and clients alike.',
 'Outside of work, I enjoy exploring new places and experiencing diverse cultures.']

In [13]:
words

[['As',
  'a',
  'dedicated',
  'Consultant',
  'at',
  'TCS',
  ',',
  'I',
  'specialize',
  'in',
  'utilizing',
  'SAP',
  'HANA',
  'applications',
  'to',
  'drive',
  'transformative',
  'solutions',
  'for',
  'the',
  'Aditya',
  'Birla',
  'Fashion',
  'and',
  'Retail',
  'project',
  '.'],
 ['With',
  'a',
  'background',
  'in',
  'computer',
  'science',
  'engineering',
  'and',
  'extensive',
  'experience',
  'in',
  'the',
  'IT',
  'industry',
  ',',
  'I',
  'bring',
  'a',
  'unique',
  'blend',
  'of',
  'technical',
  'expertise',
  'and',
  'strategic',
  'thinking',
  'to',
  'my',
  'role',
  '.'],
 ['I',
  'am',
  'passionate',
  'about',
  'sharing',
  'my',
  'knowledge',
  'and',
  'experience',
  ',',
  'as',
  'evidenced',
  'by',
  'my',
  'positive',
  'reviews',
  'and',
  'recommendations',
  'from',
  'peers',
  'and',
  'clients',
  'alike',
  '.'],
 ['Outside',
  'of',
  'work',
  ',',
  'I',
  'enjoy',
  'exploring',
  'new',
  'places',
  'and',

In [8]:
cleaned_words = [[re.sub(r'[^a-zA-Z0-9]', '', word.lower()) for word in sentence] for sentence in words]

In [14]:
cleaned_words

[['as',
  'a',
  'dedicated',
  'consultant',
  'at',
  'tcs',
  '',
  'i',
  'specialize',
  'in',
  'utilizing',
  'sap',
  'hana',
  'applications',
  'to',
  'drive',
  'transformative',
  'solutions',
  'for',
  'the',
  'aditya',
  'birla',
  'fashion',
  'and',
  'retail',
  'project',
  ''],
 ['with',
  'a',
  'background',
  'in',
  'computer',
  'science',
  'engineering',
  'and',
  'extensive',
  'experience',
  'in',
  'the',
  'it',
  'industry',
  '',
  'i',
  'bring',
  'a',
  'unique',
  'blend',
  'of',
  'technical',
  'expertise',
  'and',
  'strategic',
  'thinking',
  'to',
  'my',
  'role',
  ''],
 ['i',
  'am',
  'passionate',
  'about',
  'sharing',
  'my',
  'knowledge',
  'and',
  'experience',
  '',
  'as',
  'evidenced',
  'by',
  'my',
  'positive',
  'reviews',
  'and',
  'recommendations',
  'from',
  'peers',
  'and',
  'clients',
  'alike',
  ''],
 ['outside',
  'of',
  'work',
  '',
  'i',
  'enjoy',
  'exploring',
  'new',
  'places',
  'and',
  'exp

In [9]:
stop_words = set(stopwords.words('english'))
filtered_words = [[word for word in sentence if word not in stop_words] for sentence in cleaned_words]

In [16]:
print (filtered_words)
print (stop_words)

[['dedicated', 'consultant', 'tcs', '', 'specialize', 'utilizing', 'sap', 'hana', 'applications', 'drive', 'transformative', 'solutions', 'aditya', 'birla', 'fashion', 'retail', 'project', ''], ['background', 'computer', 'science', 'engineering', 'extensive', 'experience', 'industry', '', 'bring', 'unique', 'blend', 'technical', 'expertise', 'strategic', 'thinking', 'role', ''], ['passionate', 'sharing', 'knowledge', 'experience', '', 'evidenced', 'positive', 'reviews', 'recommendations', 'peers', 'clients', 'alike', ''], ['outside', 'work', '', 'enjoy', 'exploring', 'new', 'places', 'experiencing', 'diverse', 'cultures', '']]
{"haven't", 'be', "shouldn't", "wouldn't", 'not', 'into', 'how', 'll', "should've", "needn't", 'more', "you'll", 'has', 'doesn', 'them', 'only', 's', 'down', 'these', 'through', 't', 'isn', 'wouldn', 'hasn', 'aren', "she's", 'will', 'such', 'haven', 'very', "mightn't", "shan't", 'were', 'a', 'as', 'where', 'his', "you're", 'ma', 'herself', 'your', 'yourself', 'do

In [10]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

stemmed_words = [[stemmer.stem(word) for word in sentence] for sentence in filtered_words]
lemmatized_words = [[lemmatizer.lemmatize(word) for word in sentence] for sentence in filtered_words]

In [11]:
print("Original Sentences:")
for sentence in sentences:
    print(sentence)

print("\nProcessed Sentences (Lemmatized):")
for sentence in lemmatized_words:
    print(' '.join(sentence))

Original Sentences:
As a dedicated Consultant at TCS, I specialize in utilizing SAP HANA applications to drive transformative solutions for the Aditya Birla Fashion and Retail project.
With a background in computer science engineering and extensive experience in the IT industry, I bring a unique blend of technical expertise and strategic thinking to my role.
I am passionate about sharing my knowledge and experience, as evidenced by my positive reviews and recommendations from peers and clients alike.
Outside of work, I enjoy exploring new places and experiencing diverse cultures.

Processed Sentences (Lemmatized):
dedicated consultant tc  specialize utilizing sap hana application drive transformative solution aditya birla fashion retail project 
background computer science engineering extensive experience industry  bring unique blend technical expertise strategic thinking role 
passionate sharing knowledge experience  evidenced positive review recommendation peer client alike 
outside 