In [None]:
# Preprocessing of Textual Input

# Step 1: Create a text file and take text from it
# Step 2: Convert the text to lowercase
# Step 3: Remove punctuation like . , ! ?

In [None]:
# Read input text from a file
text = open('InputText.txt', encoding='utf-8').read()
print("Original Text:\n", text)

In [None]:
# Convert text to lowercase
lower_case = text.lower()
print("\nLowercase Text:\n", lower_case)

In [None]:
# Import the string module to access punctuation characters
import string
print("\nPunctuation Characters:", string.punctuation)

In [None]:
# Cleaning the text by removing punctuation marks
# Arguments for maketrans(str1, str2, str3):
#   str1: Characters to be replaced
#   str2: Characters to replace with
#   str3: Characters to delete
cleaned_text = lower_case.translate(str.maketrans('', '', string.punctuation))
print("\nCleaned Text:\n", cleaned_text)

In [None]:
# Tokenizing words by splitting the cleaned text
tokenized_words = cleaned_text.split()
print("\nTokenized Words:\n", tokenized_words)

In [None]:
# Stop words: Common words that carry little useful information for NLP tasks
stop_words = [
    "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours",
    "yourself", "yourselves", "he", "him", "his", "himself", "she", "her", "hers", "herself",
    "it", "its", "itself", "they", "them", "their", "theirs", "themselves", "what", "which",
    "who", "whom", "this", "that", "these", "those", "am", "is", "are", "was", "were", "be",
    "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an",
    "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for",
    "with", "about", "against", "between", "into", "through", "during", "before", "after",
    "above", "below", "to", "from", "up", "down", "in", "out", "on", "off", "over", "under",
    "again", "further", "then", "once", "here", "there", "when", "where", "why", "how", "all",
    "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", "not",
    "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don",
    "should", "now"
]

# Removing stop words
final_words = [word for word in tokenized_words if word not in stop_words]
print("\nFiltered Words (Stop Words Removed):\n", final_words)

In [None]:
# NLP Emotion Algorithm
# Step 1: Check if a word in the final list matches words in Emotions.txt
# Step 2: Extract the corresponding emotion
# Step 3: Count the occurrences of each emotion

In [None]:
# Reading and processing the Emotions.txt file
emotion_list = []
with open('Emotions.txt', 'r') as file:
    for line in file:
        clean_line = line.strip().replace("'", '').replace(",", '')
        word, emotion = clean_line.split(':')
        if word in final_words:
            emotion_list.append(emotion)

print("\nEmotion List:\n", emotion_list)

In [None]:
# Counting emotions using Counter from collections
from collections import Counter
emotion_counts = Counter(emotion_list)
print("\nEmotion Counts:\n", emotion_counts)

In [None]:
# Visualizing emotions using Matplotlib
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.bar(emotion_counts.keys(), emotion_counts.values(), color='skyblue')
plt.title("Emotion Distribution")
plt.xlabel("Emotions")
plt.ylabel("Frequency")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("emotion_graph.png")
plt.show()