# Load the book

In [1]:
with open ("miracle_in_the_andes.txt", "r", encoding="utf8") as file:
    book = file.read()

# The most used words (non-article)

In [2]:
import re
pattern = re.compile("[a-zA-Z]+")
findings = re.findall(pattern, book.lower())
findings[:5]

['chapter', 'before', 'it', 'was', 'friday']

In [3]:
d = {}
for word in findings:
    if word in d.keys():
        d[word] = d[word] + 1
    else:
        d[word] = 1

In [4]:
d_list = [(value, key) for (key, value) in d.items()]
d_list = sorted(d_list, reverse =True)
d_list[:5]

[(5346, 'the'), (2795, 'and'), (2729, 'i'), (2400, 'to'), (2060, 'of')]

# Import Natural Language processor

In [5]:
%pip install nltk

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: C:\Users\ASUS\AppData\Local\Programs\Python\Python313\python.exe -m pip install --upgrade pip


In [6]:
import nltk

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Removing stop words from our list

In [7]:
from nltk.corpus import stopwords
english_stopwords = stopwords.words("english")
english_stopwords

['a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 "he'd",
 "he'll",
 'her',
 'here',
 'hers',
 'herself',
 "he's",
 'him',
 'himself',
 'his',
 'how',
 'i',
 "i'd",
 'if',
 "i'll",
 "i'm",
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it'd",
 "it'll",
 "it's",
 'its',
 'itself',
 "i've",
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'on

In [8]:
filtered_words = []
for count, word in d_list:
    if word not in english_stopwords:
        filtered_words.append((word, count))

In [9]:
filtered_words[:10]

[('would', 575),
 ('us', 519),
 ('said', 292),
 ('roberto', 284),
 ('could', 252),
 ('one', 249),
 ('snow', 227),
 ('mountain', 183),
 ('time', 182),
 ('like', 165)]

# Sentimental Analysis: What is the most positive and most negative chapter?

In [36]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...


True

In [39]:
analyzer = SentimentIntensityAnalyzer()

In [43]:
analyzer.polarity_scores(text="Hey look how beautifu you are. I love you")

{'neg': 0.0, 'neu': 0.625, 'pos': 0.375, 'compound': 0.6369}

In [44]:
analyzer.polarity_scores(text=book)

{'neg': 0.116, 'neu': 0.76, 'pos': 0.125, 'compound': 1.0}

### Chapter sentimental analysis

In [45]:
import re
pattern = re.compile("Chapter [0-9]+")
chapters = re.split(pattern, book)

In [47]:
chapters = chapters[1:]

In [55]:
 scores_chapter = []
for nr, chapter in enumerate(chapters):
    scores = analyzer.polarity_scores(text=chapter)
    scores_chapter.append((nr+1, scores))
scores_chapter

[(1, {'neg': 0.061, 'neu': 0.779, 'pos': 0.16, 'compound': 1.0}),
 (2, {'neg': 0.12, 'neu': 0.726, 'pos': 0.154, 'compound': 0.9991}),
 (3, {'neg': 0.145, 'neu': 0.751, 'pos': 0.105, 'compound': -0.9999}),
 (4, {'neg': 0.141, 'neu': 0.721, 'pos': 0.138, 'compound': -0.9963}),
 (5, {'neg': 0.118, 'neu': 0.742, 'pos': 0.141, 'compound': 0.9997}),
 (6, {'neg': 0.124, 'neu': 0.761, 'pos': 0.115, 'compound': -0.9979}),
 (7, {'neg': 0.136, 'neu': 0.761, 'pos': 0.103, 'compound': -0.9999}),
 (8, {'neg': 0.12, 'neu': 0.786, 'pos': 0.094, 'compound': -0.9998}),
 (9, {'neg': 0.097, 'neu': 0.824, 'pos': 0.079, 'compound': -0.9996}),
 (10, {'neg': 0.086, 'neu': 0.733, 'pos': 0.181, 'compound': 1.0})]

In [75]:
most_negative = scores_chapter[0]
most_positive = scores_chapter[0]
for chapter, score in scores_chapter:
    if score["pos"] > most_positive[1]["pos"]:
        most_positive = ((chapter, score))
    if score["neg"] > most_negative[1]["neg"]:
        most_negative = ((chapter, score))

print(f"Most positivve chapter is {most_positive[0]}")

Most positivve chapter is 10


In [76]:
print(f"Most positivve chapter is {most_negative[0]}")

Most positivve chapter is 3
