In [None]:
# Replace this with your actual filename shown in the sidebar
filename = "[Anita_Desai]_Fasting,_Feasting(BookFi).txt"  # e.g., "fasting_feasting.txt"

# Step 1: Read File
with open(filename, "r", encoding="utf-8") as f:
    text = f.read()

Title: Comparative study between two novels (Anita Desai’s “Fasting & Feasting” and Amitav Ghosh’s “The Calcutta Chromosome ”) using digital tools like - Word Clustering , NER, TopicModeling

Research Question

1."How are gendered experiences and emotional expressions constructed and contrasted in Anita Desai’s Fasting, Feasting and Amitav Ghosh’s The Calcutta Chromosome, and what do these constructions reveal about cultural narratives in Indian English literature?"

2."In what ways are gendered identities thematically linked to space, emotion, and agency in Fasting, Feasting and The Calcutta Chromosome, and how can digital methods reveal these links?”



# Spacy

In [None]:
# Step 1: Install and load spaCy
!pip install -U spacy
!python -m spacy download en_core_web_sm

import spacy
nlp = spacy.load("en_core_web_sm")

# Step 2: Read your uploaded file (already in left panel)
filename = "[Anita_Desai]_Fasting,_Feasting(BookFi).txt"  # replace with actual file name
with open(filename, "r", encoding="utf-8") as f:
    text = f.read()

# Step 3: Preprocessing with spaCy
def preprocess_spacy(text):
    doc = nlp(text.lower())
    tokens = [
        token.lemma_ for token in doc
        if not token.is_stop and not token.is_punct and token.is_alpha and len(token) > 2
    ]
    return " ".join(tokens)

cleaned_text = preprocess_spacy(text)

Collecting en-core-web-sm==3.8.0
  Using cached https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Turn the cleaned text into a document-term matrix
vectorizer = CountVectorizer(
    max_df=1,       # exclude overly common words
    min_df=1,         # word must appear in at least 3 paragraphs
    max_features=1000 # only the top 1000 words
)
doc_term_matrix = vectorizer.fit_transform([cleaned_text])

# Run LDA
lda_model = LatentDirichletAllocation(n_components=5, random_state=42)
lda_model.fit(doc_term_matrix)


# Step 5: Display topics
feature_names = vectorizer.get_feature_names_out()

def display_topics(model, feature_names, no_top_words=10):
    for topic_idx, topic in enumerate(model.components_):
        print(f"\nTopic {topic_idx + 1}: ", " | ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]]))

display_topics(lda_model, feature_names, 10)


Topic 1:  abandon | wide | board | wind | silver | situation | skirt | slam | wooden | ahead

Topic 2:  abandon | wide | board | wind | silver | situation | skirt | slam | wooden | ahead

Topic 3:  abandon | wide | board | wind | silver | situation | skirt | slam | wooden | ahead

Topic 4:  uma | mama | arun | look | papa | come | like | mrs | say | go

Topic 5:  abandon | wide | board | wind | silver | situation | skirt | slam | wooden | ahead


# NLTK

In [None]:
import nltk
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Download necessary NLTK data
nltk.download("punkt")
nltk.download("stopwords")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
import nltk
nltk.download('all')

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/abc.zip.
[nltk_data]    | Downloading package alpino to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/alpino.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping
[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.
[nltk_data]    | Downloading package averaged_perceptron_tagger_rus to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |  

True

In [None]:
# Step 1: Preprocess function (using NLTK)
def preprocess_nltk(text):
    text = text.lower()
    text = re.sub(r'\W+', ' ', text)  # Remove non-word characters
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))  # Default NLTK stopwords
    tokens = [word for word in tokens if word not in stop_words and len(word) > 2]
    return " ".join(tokens)

# Read your uploaded file (replace 'your_file.txt' with actual filename)
filename = "[Anita_Desai]_Fasting,_Feasting(BookFi).txt"  # Replace with actual filename
with open(filename, "r", encoding="utf-8") as f:
    text = f.read()

# Split into paragraphs (or chapters)
paragraphs = [p.strip() for p in text.split("\n") if len(p.strip()) > 30]  # Avoid tiny paragraphs

# Preprocess each paragraph with NLTK
processed_paragraphs = [preprocess_nltk(para) for para in paragraphs]

In [None]:
# Step 2: CountVectorizer to convert text to a document-term matrix
vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
doc_term_matrix = vectorizer.fit_transform(processed_paragraphs)

In [None]:
# Step 3: LDA Topic Modeling
num_topics = 5  # Start with 2 or 3 topics
lda_model = LatentDirichletAllocation(n_components=num_topics, random_state=42)
lda_model.fit(doc_term_matrix)

In [None]:
# Step 4: Display Topics
feature_names = vectorizer.get_feature_names_out()

def display_topics(model, feature_names, no_top_words=10):
    for topic_idx, topic in enumerate(model.components_):
        print(f"\nTopic {topic_idx + 1}: ", " | ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]]))

# Show topics
display_topics(lda_model, feature_names, 10)


Topic 1:  mrs | patton | arun | uma | mira | masi | mama | room | away | mother

Topic 2:  uma | mama | papa | house | son | eyes | look | come | told | aruna

Topic 3:  uma | arun | away | day | time | papa | like | making | way | old

Topic 4:  uma | know | aruna | like | mama | come | mother | say | said | face

Topic 5:  arun | anamika | mama | masi | like | mira | veranda | eyes | went | sit



The topic modeling result of Fasting, Feasting by Anita Desai reveals a strong thematic focus on family dynamics, gender roles, and emotional dislocation within domestic spaces. Across the five identified topics, central characters such as Uma, Arun, Mama, Papa, Mira-Masi, and Mrs. Patton recur, signaling their thematic significance in exploring interpersonal relationships and identity construction. Topic 1 captures a confined domestic environment with references to room, mother, and away, highlighting Uma’s emotional isolation and spatial limitation. Topic 2 centers around eyes, look, and come, indicating a visual and relational dimension of family life, especially from Uma’s and Aruna’s perspectives, where appearances and expectations carry emotional weight. Topics 3 and 4 delve deeper into time, making, say, and face, suggesting tensions around growing up, decision-making, and emotional expression, especially in relation to paternal authority (Papa) and the daughters’ restrained agency. Topic 5’s inclusion of veranda, went, and sit conveys an almost static, observational existence—particularly of female characters like Uma and Mira-Masi—who dwell in the margins of action. Altogether, the modeling illustrates how Desai’s novel navigates the gendered construction of space, emotional suppression, and fragmented identity within a traditionally structured Indian family, offering a poignant critique of cultural expectations imposed on women.


