In [None]:
import nltk
import spacy
from nltk.stem import PorterStemmer,SnowballStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet


nltk.download('punkt_tab')
nltk.download('wordnet')
nltk.download('omw-1.4')


Porter=PorterStemmer()
Snowball=SnowballStemmer(language='english')
WordNet=WordNetLemmatizer()
nlp=spacy.load('en_core_web_sm')

corpus = {
    "news": "The economic downturn is affecting global markets. Investors are reconsidering their portfolios.",
    "wikipedia": "The mitochondrion is often referred to as the powerhouse of the cell.",
    "science_paper": "In recent studies, convolutional neural networks have demonstrated state-of-the-art performance in image classification tasks."
}

tokenized_corpus={key:nltk.word_tokenize(value) for key,value in corpus.items()}

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
#Stemming
def apply_stemming(token_text,stemmer):
  return [stemmer.stem(word) for word in token_text]

for domain,token in tokenized_corpus.items():
  print(f"** {domain.upper()} DOMAIN**")
  print("\n")
  print("porter stemmin",apply_stemming(token,Porter))
  print("snowball stemmin",apply_stemming(token,Snowball))
  print("\n")

#corpus = {
   # "news": "The economic downturn is affecting global markets. Investors are reconsidering their portfolios.",
   # "wikipedia": "The mitochondrion is often referred to as the powerhouse of the cell.",
    #"science_paper": "In recent studies, convolutional neural networks have demonstrated state-of-the-art performance in image classification tasks."
#}

** NEWS DOMAIN**


porter stemmin ['the', 'econom', 'downturn', 'is', 'affect', 'global', 'market', '.', 'investor', 'are', 'reconsid', 'their', 'portfolio', '.']
snowball stemmin ['the', 'econom', 'downturn', 'is', 'affect', 'global', 'market', '.', 'investor', 'are', 'reconsid', 'their', 'portfolio', '.']


** WIKIPEDIA DOMAIN**


porter stemmin ['the', 'mitochondrion', 'is', 'often', 'refer', 'to', 'as', 'the', 'powerhous', 'of', 'the', 'cell', '.']
snowball stemmin ['the', 'mitochondrion', 'is', 'often', 'refer', 'to', 'as', 'the', 'powerhous', 'of', 'the', 'cell', '.']


** SCIENCE_PAPER DOMAIN**


porter stemmin ['in', 'recent', 'studi', ',', 'convolut', 'neural', 'network', 'have', 'demonstr', 'state-of-the-art', 'perform', 'in', 'imag', 'classif', 'task', '.']
snowball stemmin ['in', 'recent', 'studi', ',', 'convolut', 'neural', 'network', 'have', 'demonstr', 'state-of-the-art', 'perform', 'in', 'imag', 'classif', 'task', '.']




In [None]:
#Lemmetizing
def apply_lemmetizer(token_text,lemmatizer,pos=wordnet.VERB):
  return [lemmatizer.lemmatize(word,pos) for word in token_text]

for domain,token in tokenized_corpus.items():
  print(f"** {domain.upper()} DOMAIN**")
  print("\n")
  print("wordnet lemmatizer",apply_lemmetizer(token,WordNet))
  print("\n")


** NEWS DOMAIN**


wordnet lemmatizer ['The', 'economic', 'downturn', 'be', 'affect', 'global', 'market', '.', 'Investors', 'be', 'reconsider', 'their', 'portfolios', '.']


** WIKIPEDIA DOMAIN**


wordnet lemmatizer ['The', 'mitochondrion', 'be', 'often', 'refer', 'to', 'as', 'the', 'powerhouse', 'of', 'the', 'cell', '.']


** SCIENCE_PAPER DOMAIN**


wordnet lemmatizer ['In', 'recent', 'study', ',', 'convolutional', 'neural', 'network', 'have', 'demonstrate', 'state-of-the-art', 'performance', 'in', 'image', 'classification', 'task', '.']




In [None]:
#Lemmetizer usin spacy

def spacy_lemma(text):
  doc=nlp(text)
  return [token.lemma_ for token in doc]

for domain,text in corpus.items():
  print(f"** {domain.upper()} Domain**")
  print("\n")
  print("Spacy Lemmetizer",spacy_lemma(text))
  print("\n")

** NEWS Domain**


Spacy Lemmetizer ['the', 'economic', 'downturn', 'be', 'affect', 'global', 'market', '.', 'investor', 'be', 'reconsider', 'their', 'portfolio', '.']


** WIKIPEDIA Domain**


Spacy Lemmetizer ['the', 'mitochondrion', 'be', 'often', 'refer', 'to', 'as', 'the', 'powerhouse', 'of', 'the', 'cell', '.']


** SCIENCE_PAPER Domain**


Spacy Lemmetizer ['in', 'recent', 'study', ',', 'convolutional', 'neural', 'network', 'have', 'demonstrate', 'state', '-', 'of', '-', 'the', '-', 'art', 'performance', 'in', 'image', 'classification', 'task', '.']




In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer


nltk.download('punkt')
nltk.download('wordnet')

lemmatizer=WordNetLemmatizer()


GEN_Z_DICT = {
    "money": "bread", "work": "grind", "friend": "bestie", "tired": "exhausted af",
    "smart": "big-brain", "cool": "vibe check passed", "boring": "mid",
    "excited": "hyped", "love": "obsessed", "understand": "get it, bestie",
    "bad": "sus", "good": "fire", "funny": "lowkey hilarious", "amazing": "goated",
}

BOOMER_DICT = {
    "money": "hard-earned cash", "work": "a 9-to-5 job", "friend": "pal", "tired": "worn out",
    "smart": "book smart", "cool": "neat", "boring": "dull",
    "excited": "thrilled", "love": "truly admire", "understand": "comprehend",
    "bad": "not up to the mark", "good": "decent", "funny": "a real hoot", "amazing": "astonishing",
}

CORPORATE_DICT = {
    "money": "capital", "work": "workflow optimization", "friend": "stakeholder", "tired": "resource depletion",
    "smart": "data-driven", "cool": "innovative", "boring": "low engagement",
    "excited": "strategically motivated", "love": "synergize with", "understand": "leverage insights",
    "bad": "off-track", "good": "best-in-class", "funny": "engagement-boosting", "amazing": "disruptive",
}

def translate_mode(text,dictionary):
  """convert text into GEN_Z form"""
  words=word_tokenize(text.lower())
  return " ".join([dictionary.get(w,w) for w in words])

def translate():
  text=input("\n enter text")
  print(f"\n normal text :{text}")
  print("**GEN_Z mode**",translate_mode(text,GEN_Z_DICT))
  print("**BOOMER mode**",translate_mode(text,BOOMER_DICT))
  print("**CORPARATE mode**",translate_mode(text,CORPORATE_DICT))

translate()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!



 enter textI love my cat, but she is not cool and smart

 normal text :I love my cat, but she is not cool and smart
**GEN_Z mode** i obsessed my cat , but she is not vibe check passed and big-brain
**BOOMER mode** i truly admire my cat , but she is not neat and book smart
**CORPARATE mode** i synergize with my cat , but she is not innovative and data-driven
