<a href="https://colab.research.google.com/github/Vaibhavvs7/Tic-Tac-Toe-React-/blob/main/Exp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords, wordnet
from nltk.stem import PorterStemmer, WordNetLemmatizer
import json

In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
# Load CSV
df = pd.read_csv("poem.csv")

In [None]:
# 1. Tokenization and Stopword Removal
def tokenize_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word.isalpha()]  # Remove punctuation
    return tokens

In [None]:
stop_words = set(stopwords.words('english'))

In [None]:
def remove_stopwords(tokens):
    return [word for word in tokens if word not in stop_words]

In [None]:
df["tokenized"] = df["Poem"].apply(tokenize_text)
df["no_stopwords"] = df["tokenized"].apply(remove_stopwords)

In [None]:
# 2. Stemming
stemmer = PorterStemmer()

def stem_words(tokens):
    return [stemmer.stem(word) for word in tokens]

df["stemmed"] = df["no_stopwords"].apply(stem_words)

In [None]:
# 3. Lemmatization
lemmatizer = WordNetLemmatizer()

def lemmatize_words(tokens):
    return [lemmatizer.lemmatize(word) for word in tokens]

df["lemmatized"] = df["no_stopwords"].apply(lemmatize_words)

In [None]:
# 4. Optional: Convert lists to JSON strings for saving
df["tokenized"] = df["tokenized"].apply(json.dumps)
df["no_stopwords"] = df["no_stopwords"].apply(json.dumps)
df["stemmed"] = df["stemmed"].apply(json.dumps)
df["lemmatized"] = df["lemmatized"].apply(json.dumps)


In [None]:
# 5. Save final output
final_df = df[["Poem", "tokenized", "no_stopwords", "stemmed", "lemmatized"]]
final_df.to_csv("poem_proc_final.csv", index=False)


In [None]:
# Show a nicely formatted table in Colab
df[["Poem", "no_stopwords", "stemmed", "lemmatized"]].head(10)


Unnamed: 0,Poem,no_stopwords,stemmed,lemmatized
0,A woman walks by the bench I’m sitting onwith ...,"[""woman"", ""walks"", ""bench"", ""sitting"", ""onwith...","[""woman"", ""walk"", ""bench"", ""sit"", ""onwith"", ""d...","[""woman"", ""walk"", ""bench"", ""sitting"", ""onwith""..."
1,"Because I am a boy, the untouchability of beau...","[""boy"", ""untouchability"", ""beautyis"", ""subject...","[""boy"", ""untouch"", ""beautyi"", ""subject"", ""alre...","[""boy"", ""untouchability"", ""beautyis"", ""subject..."
2,"Because today we did not leave this world,We n...","[""today"", ""leave"", ""world"", ""embody"", ""promine...","[""today"", ""leav"", ""world"", ""embodi"", ""promin"",...","[""today"", ""leave"", ""world"", ""embody"", ""promine..."
3,"Big Bend has been here, been here. Shouldn’t i...","[""big"", ""bend"", ""say"", ""call"", ""mountains"", ""w...","[""big"", ""bend"", ""say"", ""call"", ""mountain"", ""wa...","[""big"", ""bend"", ""say"", ""call"", ""mountain"", ""wa..."
4,"I put shells there, along the lip of the road....","[""put"", ""shells"", ""along"", ""lip"", ""last"", ""sum...","[""put"", ""shell"", ""along"", ""lip"", ""last"", ""summ...","[""put"", ""shell"", ""along"", ""lip"", ""last"", ""summ..."
5,I thought I would write a novelabout the windo...,"[""thought"", ""would"", ""write"", ""novelabout"", ""w...","[""thought"", ""would"", ""write"", ""novelabout"", ""w...","[""thought"", ""would"", ""write"", ""novelabout"", ""w..."
6,"I was afraid the past would catch up with me,w...","[""afraid"", ""past"", ""would"", ""catch"", ""would"", ...","[""afraid"", ""past"", ""would"", ""catch"", ""would"", ...","[""afraid"", ""past"", ""would"", ""catch"", ""would"", ..."
7,Let us enter this again. In the context of thi...,"[""let"", ""us"", ""enter"", ""context"", ""paragraph"",...","[""let"", ""us"", ""enter"", ""context"", ""paragraph"",...","[""let"", ""u"", ""enter"", ""context"", ""paragraph"", ..."
8,More than the fuchsia funnels breaking outof t...,"[""fuchsia"", ""funnels"", ""breaking"", ""outof"", ""c...","[""fuchsia"", ""funnel"", ""break"", ""outof"", ""craba...","[""fuchsia"", ""funnel"", ""breaking"", ""outof"", ""cr..."
9,"No strawberry moon for me, tonight. No strawbe...","[""strawberry"", ""moon"", ""tonight"", ""strawberry""...","[""strawberri"", ""moon"", ""tonight"", ""strawberri""...","[""strawberry"", ""moon"", ""tonight"", ""strawberry""..."
