In [4]:
"""
Routine to load master dicitonary

Bill McDonald
Date: 201510 Updated: 202201 / 202308 / 202402

----------------------------

Retrieved by: Michael Fryer
Retrieved from: https://sraf.nd.edu/loughranmcdonald-master-dictionary/
Retrieved on: 2-12-2024
"""

import datetime as dt
import sys


def load_masterdictionary(file_path, print_flag=False, f_log=None, get_other=False):
    start_local = dt.datetime.now()
    # Setup dictionaries
    _master_dictionary = {}
    _sentiment_categories = [
        "negative",
        "positive",
        "uncertainty",
        "litigious",
        "strong_modal",
        "weak_modal",
        "constraining",
        "complexity",
    ]
    _sentiment_dictionaries = dict()
    for sentiment in _sentiment_categories:
        _sentiment_dictionaries[sentiment] = dict()

    # Load slightly modified common stopwords.
    # Dropped from traditional: A, I, S, T, DON, WILL, AGAINST
    # Added: AMONG
    _stopwords = [
        "ME",
        "MY",
        "MYSELF",
        "WE",
        "OUR",
        "OURS",
        "OURSELVES",
        "YOU",
        "YOUR",
        "YOURS",
        "YOURSELF",
        "YOURSELVES",
        "HE",
        "HIM",
        "HIS",
        "HIMSELF",
        "SHE",
        "HER",
        "HERS",
        "HERSELF",
        "IT",
        "ITS",
        "ITSELF",
        "THEY",
        "THEM",
        "THEIR",
        "THEIRS",
        "THEMSELVES",
        "WHAT",
        "WHICH",
        "WHO",
        "WHOM",
        "THIS",
        "THAT",
        "THESE",
        "THOSE",
        "AM",
        "IS",
        "ARE",
        "WAS",
        "WERE",
        "BE",
        "BEEN",
        "BEING",
        "HAVE",
        "HAS",
        "HAD",
        "HAVING",
        "DO",
        "DOES",
        "DID",
        "DOING",
        "AN",
        "THE",
        "AND",
        "BUT",
        "IF",
        "OR",
        "BECAUSE",
        "AS",
        "UNTIL",
        "WHILE",
        "OF",
        "AT",
        "BY",
        "FOR",
        "WITH",
        "ABOUT",
        "BETWEEN",
        "INTO",
        "THROUGH",
        "DURING",
        "BEFORE",
        "AFTER",
        "ABOVE",
        "BELOW",
        "TO",
        "FROM",
        "UP",
        "DOWN",
        "IN",
        "OUT",
        "ON",
        "OFF",
        "OVER",
        "UNDER",
        "AGAIN",
        "FURTHER",
        "THEN",
        "ONCE",
        "HERE",
        "THERE",
        "WHEN",
        "WHERE",
        "WHY",
        "HOW",
        "ALL",
        "ANY",
        "BOTH",
        "EACH",
        "FEW",
        "MORE",
        "MOST",
        "OTHER",
        "SOME",
        "SUCH",
        "NO",
        "NOR",
        "NOT",
        "ONLY",
        "OWN",
        "SAME",
        "SO",
        "THAN",
        "TOO",
        "VERY",
        "CAN",
        "JUST",
        "SHOULD",
        "NOW",
        "AMONG",
    ]

    # Loop thru words and load dictionaries
    with open(file_path) as f:
        _total_documents = 0
        _md_header = f.readline()  # Consume header line

        for line in f:
            cols = line.rstrip("\n").split(",")
            word = cols[0]
            _master_dictionary[word] = MasterDictionary(cols, _stopwords)
            for sentiment in _sentiment_categories:
                if getattr(_master_dictionary[word], sentiment):
                    _sentiment_dictionaries[sentiment][word] = 0
            _total_documents += _master_dictionary[cols[0]].doc_count
            if len(_master_dictionary) % 5000 == 0 and print_flag:
                print(
                    f"\r ...Loading Master Dictionary {len(_master_dictionary):,}",
                    end="",
                    flush=True,
                )

    if print_flag:
        print("\r", end="")  # clear line
        print(f"\nMaster Dictionary loaded from file:\n  {file_path}\n")
        print(f"  master_dictionary has {len(_master_dictionary):,} words.\n")

    if f_log:
        try:
            f_log.write(
                "\n\n  FUNCTION: load_masterdictionary"
                + "(file_path, print_flag, f_log, get_other)\n"
            )
            f_log.write(f"\n    file_path  = {file_path}")
            f_log.write(f"\n    print_flag = {print_flag}")
            f_log.write(f"\n    f_log      = {f_log.name}")
            f_log.write(f"\n    get_other  = {get_other}")
            f_log.write(
                f"\n\n    {len(_master_dictionary):,} words loaded in master_dictionary.\n"
            )
            f_log.write("\n    Sentiment:")
            for sentiment in _sentiment_categories:
                f_log.write(
                    f"\n      {sentiment:13}: {len(_sentiment_dictionaries[sentiment]):8,}"
                )
            f_log.write(
                f"\n\n  END FUNCTION: load_masterdictionary: {(dt.datetime.now()-start_local)}"
            )
        except Exception as e:
            print("Log file in load_masterdictionary is not available for writing")
            print(f"Error = {e}")

    if get_other:
        return (
            _master_dictionary,
            _md_header,
            _sentiment_categories,
            _sentiment_dictionaries,
            _stopwords,
            _total_documents,
        )
    else:
        return _master_dictionary


class MasterDictionary:
    def __init__(self, cols, _stopwords):
        for ptr, col in enumerate(cols):
            if col == "":
                cols[ptr] = "0"
        try:
            self.word = cols[0].upper()
            self.sequence_number = int(cols[1])
            self.word_count = int(cols[2])
            self.word_proportion = float(cols[3])
            self.average_proportion = float(cols[4])
            self.std_dev_prop = float(cols[5])
            self.doc_count = int(cols[6])
            self.negative = int(cols[7])
            self.positive = int(cols[8])
            self.uncertainty = int(cols[9])
            self.litigious = int(cols[10])
            self.strong_modal = int(cols[11])
            self.weak_modal = int(cols[12])
            self.constraining = int(cols[13])
            self.complexity = int(cols[14])
            self.syllables = int(cols[15])
            self.source = cols[16]
            if self.word in _stopwords:
                self.stopword = True
            else:
                self.stopword = False
        except Exception as e:
            print("ERROR in class MasterDictionary")
            print(f"word = {cols[0]} : seqnum = {cols[1]}")
            print(f"Exception: {e}")
            quit()
        return


if __name__ == "__main__":
    from pathlib import Path

    base_path = Path(__file__).parent / "loughran_mcdonald_dictionary"
    LOG_FILE = base_path / "Load_MD_Logfile.txt"
    start = dt.datetime.now()
    print(f'\n\n{start.strftime("%c")}\nPROGRAM NAME: {sys.argv[0]}\n')
    with open(LOG_FILE, "w", encoding="utf-8") as f_log:
        md = base_path / "Loughran-McDonald_MasterDictionary_1993-2024.csv"
        (
            master_dictionary,
            md_header,
            sentiment_categories,
            sentiment_dictionaries,
            stopwords,
            total_documents,
        ) = load_masterdictionary(md, True, f_log, True)
        print(f"\n\nRuntime: {(dt.datetime.now()-start)}")
        print(f'\nNormal termination.\n{dt.datetime.now().strftime("%c")}\n')


NameError: name '__file__' is not defined

In [3]:
import csv
import re
import os
from pathlib import Path
import pandas as pd

In [5]:
BASE_DIR = Path.cwd()
PROCESSED_DIR = BASE_DIR / "processed"
RESULTS_FILE = BASE_DIR / "sentiment_results.csv"

In [6]:
MASTER_DICT_FILE = BASE_DIR / "Loughran-McDonald_MasterDictionary_1993-2021.csv"

In [8]:
"""
Routine to load master dicitonary

Bill McDonald
Date: 201510 Updated: 202201 / 202308 / 202402

----------------------------

Retrieved by: Michael Fryer
Retrieved from: https://sraf.nd.edu/loughranmcdonald-master-dictionary/
Retrieved on: 2-12-2024
"""

import datetime as dt
import sys


def load_masterdictionary(file_path, print_flag=False, f_log=None, get_other=False):
    start_local = dt.datetime.now()
    # Setup dictionaries
    _master_dictionary = {}
    _sentiment_categories = [
        "negative",
        "positive",
        "uncertainty",
        "litigious",
        "strong_modal",
        "weak_modal",
        "constraining",
        "complexity",
    ]
    _sentiment_dictionaries = dict()
    for sentiment in _sentiment_categories:
        _sentiment_dictionaries[sentiment] = dict()

    # Load slightly modified common stopwords.
    # Dropped from traditional: A, I, S, T, DON, WILL, AGAINST
    # Added: AMONG
    _stopwords = [
        "ME",
        "MY",
        "MYSELF",
        "WE",
        "OUR",
        "OURS",
        "OURSELVES",
        "YOU",
        "YOUR",
        "YOURS",
        "YOURSELF",
        "YOURSELVES",
        "HE",
        "HIM",
        "HIS",
        "HIMSELF",
        "SHE",
        "HER",
        "HERS",
        "HERSELF",
        "IT",
        "ITS",
        "ITSELF",
        "THEY",
        "THEM",
        "THEIR",
        "THEIRS",
        "THEMSELVES",
        "WHAT",
        "WHICH",
        "WHO",
        "WHOM",
        "THIS",
        "THAT",
        "THESE",
        "THOSE",
        "AM",
        "IS",
        "ARE",
        "WAS",
        "WERE",
        "BE",
        "BEEN",
        "BEING",
        "HAVE",
        "HAS",
        "HAD",
        "HAVING",
        "DO",
        "DOES",
        "DID",
        "DOING",
        "AN",
        "THE",
        "AND",
        "BUT",
        "IF",
        "OR",
        "BECAUSE",
        "AS",
        "UNTIL",
        "WHILE",
        "OF",
        "AT",
        "BY",
        "FOR",
        "WITH",
        "ABOUT",
        "BETWEEN",
        "INTO",
        "THROUGH",
        "DURING",
        "BEFORE",
        "AFTER",
        "ABOVE",
        "BELOW",
        "TO",
        "FROM",
        "UP",
        "DOWN",
        "IN",
        "OUT",
        "ON",
        "OFF",
        "OVER",
        "UNDER",
        "AGAIN",
        "FURTHER",
        "THEN",
        "ONCE",
        "HERE",
        "THERE",
        "WHEN",
        "WHERE",
        "WHY",
        "HOW",
        "ALL",
        "ANY",
        "BOTH",
        "EACH",
        "FEW",
        "MORE",
        "MOST",
        "OTHER",
        "SOME",
        "SUCH",
        "NO",
        "NOR",
        "NOT",
        "ONLY",
        "OWN",
        "SAME",
        "SO",
        "THAN",
        "TOO",
        "VERY",
        "CAN",
        "JUST",
        "SHOULD",
        "NOW",
        "AMONG",
    ]

    # Loop thru words and load dictionaries
    with open(file_path) as f:
        _total_documents = 0
        _md_header = f.readline()  # Consume header line

        for line in f:
            cols = line.rstrip("\n").split(",")
            word = cols[0]
            _master_dictionary[word] = MasterDictionary(cols, _stopwords)
            for sentiment in _sentiment_categories:
                if getattr(_master_dictionary[word], sentiment):
                    _sentiment_dictionaries[sentiment][word] = 0
            _total_documents += _master_dictionary[cols[0]].doc_count
            if len(_master_dictionary) % 5000 == 0 and print_flag:
                print(
                    f"\r ...Loading Master Dictionary {len(_master_dictionary):,}",
                    end="",
                    flush=True,
                )

    if print_flag:
        print("\r", end="")  # clear line
        print(f"\nMaster Dictionary loaded from file:\n  {file_path}\n")
        print(f"  master_dictionary has {len(_master_dictionary):,} words.\n")

    if f_log:
        try:
            f_log.write(
                "\n\n  FUNCTION: load_masterdictionary"
                + "(file_path, print_flag, f_log, get_other)\n"
            )
            f_log.write(f"\n    file_path  = {file_path}")
            f_log.write(f"\n    print_flag = {print_flag}")
            f_log.write(f"\n    f_log      = {f_log.name}")
            f_log.write(f"\n    get_other  = {get_other}")
            f_log.write(
                f"\n\n    {len(_master_dictionary):,} words loaded in master_dictionary.\n"
            )
            f_log.write("\n    Sentiment:")
            for sentiment in _sentiment_categories:
                f_log.write(
                    f"\n      {sentiment:13}: {len(_sentiment_dictionaries[sentiment]):8,}"
                )
            f_log.write(
                f"\n\n  END FUNCTION: load_masterdictionary: {(dt.datetime.now()-start_local)}"
            )
        except Exception as e:
            print("Log file in load_masterdictionary is not available for writing")
            print(f"Error = {e}")

    if get_other:
        return (
            _master_dictionary,
            _md_header,
            _sentiment_categories,
            _sentiment_dictionaries,
            _stopwords,
            _total_documents,
        )
    else:
        return _master_dictionary


class MasterDictionary:
    def __init__(self, cols, _stopwords):
        for ptr, col in enumerate(cols):
            if col == "":
                cols[ptr] = "0"
        try:
            self.word = cols[0].upper()
            self.sequence_number = int(cols[1])
            self.word_count = int(cols[2])
            self.word_proportion = float(cols[3])
            self.average_proportion = float(cols[4])
            self.std_dev_prop = float(cols[5])
            self.doc_count = int(cols[6])
            self.negative = int(cols[7])
            self.positive = int(cols[8])
            self.uncertainty = int(cols[9])
            self.litigious = int(cols[10])
            self.strong_modal = int(cols[11])
            self.weak_modal = int(cols[12])
            self.constraining = int(cols[13])
            self.complexity = int(cols[14])
            self.syllables = int(cols[15])
            self.source = cols[16]
            if self.word in _stopwords:
                self.stopword = True
            else:
                self.stopword = False
        except Exception as e:
            print("ERROR in class MasterDictionary")
            print(f"word = {cols[0]} : seqnum = {cols[1]}")
            print(f"Exception: {e}")
            quit()
        return


if __name__ == "__main__":
    from pathlib import Path
    import datetime as dt

    # Use Path.cwd() instead of Path(__file__).parent for Jupyter Notebook compatibility
    base_path = Path.cwd() / "loughran_mcdonald_dictionary"
    LOG_FILE = base_path / "Load_MD_Logfile.txt"
    start = dt.datetime.now()
    
    print(f'\n\n{start.strftime("%c")}\nPROGRAM NAME: {sys.argv[0]}\n')
    with open(LOG_FILE, "w", encoding="utf-8") as f_log:
        md = base_path / "Loughran-McDonald_MasterDictionary_1993-2024.csv"
        (
            master_dictionary,
            md_header,
            sentiment_categories,
            sentiment_dictionaries,
            stopwords,
            total_documents,
        ) = load_masterdictionary(md, True, f_log, True)
        print(f"\n\nRuntime: {(dt.datetime.now()-start)}")
        print(f'\nNormal termination.\n{dt.datetime.now().strftime("%c")}\n')




Thu Dec  4 16:22:35 2025
PROGRAM NAME: /opt/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py

 ...Loading Master Dictionary 85,000
Master Dictionary loaded from file:
  /Users/min/loughran_mcdonald_dictionary/Loughran-McDonald_MasterDictionary_1993-2024.csv

  master_dictionary has 86,553 words.



Runtime: 0:00:00.856693

Normal termination.
Thu Dec  4 16:22:36 2025



**Read year 2016, 2017, 2018**

In [10]:
import csv
import re
from pathlib import Path

# ================= CONFIGURATION =================
BASE_DIR = Path.cwd()
PROCESSED_DIR = BASE_DIR / "processed"
OUTPUT_FILE = BASE_DIR / "sentiment_analysis_results_189.csv"
DICT_PATH = Path("/Users/min/loughran_mcdonald_dictionary/Loughran-McDonald_MasterDictionary_1993-2024.csv")

# Month mapping for filename conversion
MONTH_MAP = {
    "Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05", "Jun": "06",
    "Jul": "07", "Aug": "08", "Sep": "09", "Oct": "10", "Nov": "11", "Dec": "12"
}

# The list of 189 specific files to analyze
TARGET_RAW_FILES = [
    # Tesco
    "2016-Apr-14-TSCO.L-139810984905-Transcript.txt", "2016-Apr-13-TSCO.L-137046574372-Transcript.txt",
    "2016-Oct-05-TSCO.L-139932963486-Transcript.txt", "2016-Oct-05-TSCO.L-138488445184-Transcript.txt",
    "2017-Apr-12-TSCO.L-140153460189-Transcript.txt", "2017-Oct-04-TSCO.L-138456938442-Transcript.txt",
    "2017-Oct-04-TSCO.L-139408526789-Transcript.txt", "2018-Apr-11-TSCO.L-138743165366-Transcript.txt",
    "2018-Apr-11-TSCO.L-139959883222-Transcript.txt", "2018-Oct-03-TSCO.L-139272056987-Transcript.txt",
    # Barclays
    "2016-Mar-01-BARC.L-139824873187-Transcript.txt", "2016-Mar-01-BARC.L-137548274670-Transcript.txt",
    "2016-Jul-29-BARC.L-138523614655-Transcript.txt", "2016-Jul-29-BARC.L-139004361518-Transcript.txt",
    "2017-Feb-23-BARC.L-139508609743-Transcript.txt", "2017-Feb-23-BARC.L-138555219749-Transcript.txt",
    "2017-Jul-28-BARC.L-141010579615-Transcript.txt", "2017-Jul-28-BARC.L-138477306147-Transcript.txt",
    "2017-Oct-26-BARC.L-137754650048-Transcript.txt", "2018-Feb-22-BARC.L-136963783161-Transcript.txt",
    "2018-Feb-22-BARC.L-137901359180-Transcript.txt", "2018-Apr-26-BARC.L-139732542527-Transcript.txt",
    "2018-Aug-02-BARC.L-139224686385-Transcript.txt", "2018-Aug-02-BARC.L-137366822025-Transcript.txt",
    "2018-Oct-24-BARC.L-140944839383-Transcript.txt",
    # AstraZeneca
    "2016-Feb-04-AZN.L-137149101673-Transcript.txt", "2016-Apr-29-AZN.L-138460076277-Transcript.txt",
    "2016-Jul-28-AZN.L-140055454105-Transcript.txt", "2016-Nov-10-AZN.L-140407419257-Transcript.txt",
    "2017-Feb-02-AZN.L-140607754798-Transcript.txt", "2017-Apr-27-AZN.L-139861839552-Transcript.txt",
    "2017-Jul-27-AZN.L-137003893218-Transcript.txt", "2017-Nov-09-AZN.L-138610867690-Transcript.txt",
    "2018-Feb-02-AZN.L-137054448061-Transcript.txt", "2018-May-18-AZN.L-138645788490-Transcript.txt",
    "2018-Jul-26-AZN.L-140818664492-Transcript.txt", "2018-Nov-08-AZN.L-140470213539-Transcript.txt",
    # Kroger
    "2016-Mar-03-KR.N-137071463801-Transcript.txt", "2016-Jun-16-KR.N-137382711624-Transcript.txt",
    "2016-Sep-09-KR.N-139724544695-Transcript.txt", "2016-Dec-01-KR.N-138964739188-Transcript.txt",
    "2017-Mar-02-KR.N-137268985869-Transcript.txt", "2017-Jun-15-KR.N-137028455557-Transcript.txt",
    "2017-Sep-08-KR.N-139332442878-Transcript.txt", "2017-Nov-30-KR.N-139233726377-Transcript.txt",
    "2018-Mar-08-KR.N-138743414845-Transcript.txt", "2018-Jun-21-KR.N-139399065775-Transcript.txt",
    "2018-Sep-13-KR.N-140662712213-Transcript.txt", "2018-Dec-06-KR.N-137368561785-Transcript.txt",
    # Ford
    "2016-Jan-28-F.N-140702560456-Transcript.txt", "2016-Jan-28-F.N-140929213537-Transcript.txt",
    "2016-Apr-28-F.N-139397969691-Transcript.txt", "2016-Apr-28-F.N-139061241116-Transcript.txt",
    "2016-Jul-28-F.N-140686794229-Transcript.txt", "2016-Jul-28-F.N-138385069851-Transcript.txt",
    "2016-Oct-27-F.N-139351278755-Transcript.txt", "2016-Oct-27-F.N-138379355375-Transcript.txt",
    "2017-Jan-26-F.N-139159806156-Transcript.txt", "2017-Jan-26-F.N-141181459722-Transcript.txt",
    "2017-Apr-27-F.N-140902625025-Transcript.txt", "2017-Jul-26-F.N-138852696142-Transcript.txt",
    "2017-Oct-26-F.N-137075183639-Transcript.txt", "2018-Jan-24-F.N-138389904781-Transcript.txt",
    "2018-Apr-25-F.N-137616469531-Transcript.txt", "2018-Jul-25-F.N-140890935247-Transcript.txt",
    "2018-Oct-24-F.N-137531908921-Transcript.txt",
    # Johnson & Johnson
    "2016-Jan-26-JNJ.N-138676405818-Transcript.txt", "2016-Apr-19-JNJ.N-137441522591-Transcript.txt",
    "2016-Jul-19-JNJ.N-140074050620-Transcript.txt", "2016-Oct-18-JNJ.N-139180104058-Transcript.txt",
    "2017-Jan-24-JNJ.N-137993682362-Transcript.txt", "2017-Apr-18-JNJ.N-139863288812-Transcript.txt",
    "2017-Jul-18-JNJ.N-137992811209-Transcript.txt", "2017-Oct-17-JNJ.N-137387649725-Transcript.txt",
    "2018-Jan-23-JNJ.N-137596207567-Transcript.txt", "2018-Apr-17-JNJ.N-137267325560-Transcript.txt",
    "2018-Jul-17-JNJ.N-141010222562-Transcript.txt", "2018-Oct-16-JNJ.N-138373365491-Transcript.txt",
    # Kraft Heinz
    "2016-Feb-25-KHC.OQ-138842966670-Transcript.txt", "2016-May-04-KHC.OQ-139041781128-Transcript.txt",
    "2016-Aug-04-KHC.OQ-138495811214-Transcript.txt", "2016-Nov-03-KHC.OQ-137203242866-Transcript.txt",
    "2017-Feb-15-KHC.OQ-139150056386-Transcript.txt", "2017-May-03-KHC.OQ-139224781261-Transcript.txt",
    "2017-Aug-03-KHC.OQ-140749085059-Transcript.txt", "2017-Nov-01-KHC.OQ-138744889212-Transcript.txt",
    "2018-Feb-16-KHC.OQ-137796859188-Transcript.txt", "2018-May-02-KHC.OQ-140336666357-Transcript.txt",
    "2018-Aug-03-KHC.OQ-139625297076-Transcript.txt", "2018-Nov-01-KHC.OQ-139957170311-Transcript.txt",
    # DHL
    "2016-Mar-09-DPWGn.DE-137982971470-Transcript.txt", "2016-May-11-DPWGn.DE-141134862749-Transcript.txt",
    "2016-Aug-03-DPWGn.DE-140062875928-Transcript.txt", "2016-Nov-08-DPWGn.DE-138136081245-Transcript.txt",
    "2017-Mar-08-DPWGn.DE-139183886140-Transcript.txt", "2017-May-11-DPWGn.DE-139196110732-Transcript.txt",
    "2017-Aug-08-DPWGn.DE-139817232520-Transcript.txt", "2017-Nov-09-DPWGn.DE-136901510295-Transcript.txt",
    "2018-Mar-07-DPWGn.DE-140776301443-Transcript.txt", "2018-Aug-07-DPWGn.DE-137436527491-Transcript.txt",
    "2018-Nov-06-DPWGn.DE-138067543106-Transcript.txt", "2018-Nov-06-DPWGn.DE-139056129466-Transcript.txt",
    # Toyota
    "2016-Feb-05-7203.T-139386834162-Transcript.txt", "2016-May-11-7203.T-139232969728-Transcript.txt",
    "2016-Aug-04-7203.T-140552523227-Transcript.txt", "2016-Nov-08-7203.T-140750745571-Transcript.txt",
    "2017-Feb-06-7203.T-139401254994-Transcript.txt", "2017-May-10-7203.T-141025822521-Transcript.txt",
    "2017-May-10-7203.T-139668947849-Transcript.txt", "2017-Aug-04-7203.T-140487691474-Transcript.txt",
    "2017-Nov-07-7203.T-139563770726-Transcript.txt", "2018-Feb-06-7203.T-138825723624-Transcript.txt",
    "2018-May-09-7203.T-137864330980-Transcript.txt", "2018-May-09-7203.T-139256922882-Transcript.txt",
    "2018-Aug-03-7203.T-137294661410-Transcript.txt", "2018-Nov-05-7203.T-136945998670-Transcript.txt",
    # Eli Lilly
    "2016-Jan-28-LLY.N-139753177768-Transcript.txt", "2016-Apr-26-LLY.N-138440853820-Transcript.txt",
    "2016-Jul-26-LLY.N-138043614001-Transcript.txt", "2016-Oct-25-LLY.N-137654586127-Transcript.txt",
    "2017-Jan-31-LLY.N-137441172831-Transcript.txt", "2017-Apr-25-LLY.N-138399030694-Transcript.txt",
    "2017-Jul-25-LLY.N-137807180604-Transcript.txt", "2017-Oct-24-LLY.N-139538448754-Transcript.txt",
    "2018-Jan-31-LLY.N-138912850488-Transcript.txt", "2018-Apr-24-LLY.N-139809901587-Transcript.txt",
    "2018-Jul-24-LLY.N-139253294349-Transcript.txt", "2018-Nov-06-LLY.N-138855989064-Transcript.txt",
    # Unilever
    "2016-Jan-19-ULVR.L-139914041816-Transcript.txt", "2016-Jul-21-ULVR.L-136975403569-Transcript.txt",
    "2017-Jan-26-ULVR.L-137807207448-Transcript.txt", "2017-Jul-20-ULVR.L-140135791033-Transcript.txt",
    "2018-Feb-01-ULVR.L-137010525657-Transcript.txt", "2018-Jul-19-ULVR.L-137927763788-Transcript.txt",
    # Land Rover (Tata)
    "2016-Feb-03-TATA.NS-137676422472-Transcript.txt", "2016-May-30-TATA.NS-138515578162-Transcript.txt",
    "2016-Aug-02-TATA.NS-137290025993-Transcript.txt", "2016-Nov-09-TATA.NS-138685415110-Transcript.txt",
    "2017-Jan-24-TATA.NS-139471832639-Transcript.txt", "2017-May-04-TATA.NS-140768671254-Transcript.txt",
    "2017-Jul-25-TATA.NS-137873824680-Transcript.txt", "2017-Oct-26-TATA.NS-139209110121-Transcript.txt",
    "2018-Feb-09-TATA.NS-139173494325-Transcript.txt", "2018-May-11-TATA.NS-137190394224-Transcript.txt",
    "2018-Aug-10-TATA.NS-138219347027-Transcript.txt", "2018-Nov-02-TATA.NS-137854802714-Transcript.txt"
]

# ================= CORE LOGIC =================

def load_master_dictionary(file_path):
    master_dict = {}
    sentiment_categories = ['Negative', 'Positive', 'Uncertainty', 'Litigious', 'StrongModal', 'WeakModal', 'Constraining']
    
    if not file_path.exists():
        print(f"Error: Dictionary not found at {file_path}")
        return None

    print(f"Loading dictionary from: {file_path}")
    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            word = row['Word'].upper()
            master_dict[word] = {}
            for category in sentiment_categories:
                try:
                    if int(row[category]) > 0:
                        master_dict[word][category] = True
                except (ValueError, KeyError):
                    continue
    return master_dict

def analyze_file(file_path, master_dict):
    with open(file_path, 'r', encoding='utf-8') as f:
        text = f.read().upper()
    
    # Tokenize: Keep only uppercase words with length > 1
    tokens = re.findall(r'\b[A-Z]{2,}\b', text)
    
    counts = {
        'Negative': 0, 'Positive': 0, 'Uncertainty': 0, 
        'Litigious': 0, 'StrongModal': 0, 'WeakModal': 0, 'Constraining': 0
    }
    
    for token in tokens:
        if token in master_dict:
            for cat in counts:
                if cat in master_dict[token]:
                    counts[cat] += 1
                    
    return len(tokens), counts

if __name__ == "__main__":
    master_dictionary = load_master_dictionary(DICT_PATH)
    
    if master_dictionary:
        print(f"Dictionary loaded. Starting analysis on {len(TARGET_RAW_FILES)} specified files...")
        
        results = []
        
        for raw_name in TARGET_RAW_FILES:
            # Parse raw filename to construct the path in the 'processed' folder
            # Raw: 2016-Apr-14-TSCO.L-139810984905-Transcript.txt
            # Processed target: processed/2016/TSCO.L-2016-04-14.txt
            
            parts = raw_name.split("-")
            
            if len(parts) >= 4:
                year = parts[0]
                month_str = parts[1]
                day = parts[2]
                ticker = parts[3]
                
                # Convert month string to number (e.g., Apr -> 04)
                month_num = MONTH_MAP.get(month_str, "00")
                
                # Construct new filename and path
                new_fname = f"{ticker}-{year}-{month_num}-{day}.txt"
                file_path = PROCESSED_DIR / year / new_fname
                
                # Check existence and analyze
                if file_path.exists():
                    total_words, scores = analyze_file(file_path, master_dictionary)
                    
                    # Calculate Net Sentiment (Pos - Neg) / Total
                    if total_words > 0:
                        net_sentiment = (scores['Positive'] - scores['Negative']) / total_words
                    else:
                        net_sentiment = 0
                    
                    row = {
                        'Filename': raw_name,
                        'Ticker': ticker,
                        'Date': f"{year}-{month_num}-{day}",
                        'Total_Words': total_words,
                        'Net_Sentiment': net_sentiment,
                        **scores 
                    }
                    results.append(row)
                else:
                    print(f"Warning: File not found on disk: {file_path}")
            else:
                 print(f"Warning: Invalid filename format: {raw_name}")

        # Save to CSV
        if results:
            keys = results[0].keys()
            with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames=keys)
                writer.writeheader()
                writer.writerows(results)
            
            print("-" * 50)
            print("Analysis Complete.")
            print(f"Processed: {len(results)} files.")
            print(f"Results saved to: {OUTPUT_FILE}")

Loading dictionary from: /Users/min/loughran_mcdonald_dictionary/Loughran-McDonald_MasterDictionary_1993-2024.csv
Dictionary loaded. Starting analysis on 146 specified files...
--------------------------------------------------
Analysis Complete.
Processed: 146 files.
Results saved to: /Users/min/sentiment_analysis_results_189.csv
