<a href="https://colab.research.google.com/github/ilansarbac95/DI158/blob/main/WEEK_3_Python_File_I_O%2C_JSON_and_API_%E2%80%93_Daily_Challenge_Text_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ============================================
# Python File I/O, JSON and API – Daily Challenge: Text Analysis
# ============================================
# Name: Ilan Sarbac
# Date: October 2025
# Description: Classes for analyzing and modifying text (string or file)
# ============================================

import string
import re

# ---------- PART 1: Analyzing a Simple String ----------

class Text:
    def __init__(self, text):
        self.text = text

    def word_frequency(self, word):
        words = self.text.lower().split()
        count = words.count(word.lower())
        if count == 0:
            return None
        return count

    def most_common_word(self):
        words = self.text.lower().split()
        freq = {}
        for w in words:
            freq[w] = freq.get(w, 0) + 1
        if not freq:
            return None
        most_common = max(freq, key=freq.get)
        return most_common

    def unique_words(self):
        words = self.text.lower().split()
        return list(set(words))

    @classmethod
    def from_file(cls, file_path):
        try:
            with open(file_path, "r") as f:
                content = f.read()
                return cls(content)
        except FileNotFoundError:
            print("File not found.")
            return cls("")


# ---------- BONUS PART: Text Modification ----------

class TextModification(Text):
    def remove_punctuation(self):
        no_punct = self.text.translate(str.maketrans('', '', string.punctuation))
        return no_punct

    def remove_stop_words(self):
        stop_words = ["a", "the", "is", "in", "and", "to", "with", "of", "for", "on"]
        words = self.text.split()
        filtered = [w for w in words if w.lower() not in stop_words]
        return " ".join(filtered)

    def remove_special_characters(self):
        # Only keep letters, numbers, spaces
        cleaned = re.sub(r'[^A-Za-z0-9\s]', '', self.text)
        return cleaned


# ---------- Simple Test Zone ----------

if __name__ == "__main__":
    print("=== Text Analysis ===")

    sample = "Hello, hello! Is this working? Yes, it is working. Working well."

    txt = Text(sample)
    print("Frequency of 'working':", txt.word_frequency("working"))
    print("Most common word:", txt.most_common_word())
    print("Unique words:", txt.unique_words())

    mod = TextModification(sample)
    print("\n=== Text Modification ===")
    print("Without punctuation:", mod.remove_punctuation())
    print("Without stop words:", mod.remove_stop_words())
    print("Without special characters:", mod.remove_special_characters())
