In [1]:
import csv
import spacy
nlp = spacy.load("en_core_web_sm")
import en_core_web_sm
import re

In [2]:
def create_word_patterns(base_words):
    patterns = []
    for word in base_words:
        word = word.lower()
        if word == "gaza":
            patterns.append(rf"gaza[n]?[s]?")
        elif word == "israel":
            patterns.append(rf"israel[i]?[s]?")
        elif word == "palestine":
            patterns.append(rf"palestine|palestinian[s]?")
        elif word == "idf":
            patterns.append(rf"idf")
        elif word == "hamas":
            patterns.append(rf"hamas")
        elif word == "jewish":
            patterns.append(rf"jewish")
        elif word == "islamic":
            patterns.append(rf"islamic")
        elif word == "muslim":
            patterns.append(rf"muslim[s]?")
    return patterns

def check_strings(text, relevant_words):
    patterns = create_word_patterns(relevant_words)
    combined_pattern = '|'.join(patterns)
    text = str(text).lower()
    return bool(re.search(combined_pattern, text, re.IGNORECASE))

def add_unique_items(source_list, target_list):
    if source_list:
        target_list.extend(item.strip() for item in source_list if item.strip() not in target_list)


In [7]:
RELEVANT_WORDS = ["Israel", "Palestine", "Gaza", "IDF", "Hamas", "Jewish", "Muslim", "Islamic"]
TIMELINE = ["2023_10", "2023_11", "2023_12", "2024_01", "2024_02", "2024_03", "2024_04", "2024_05"]

# Initialize lists and dictionaries for counting
all_people = []
all_places = []
all_orgs = []

# Dictionaries to keep count
people_count = {}
places_count = {}
orgs_count = {}

def update_counts(items_list, count_dict):
    """
    Update counts for each item in the input list
    """
    for item in items_list:
        item = item.strip()
        if item:  # Only count non-empty strings
            count_dict[item] = count_dict.get(item, 0) + 1

for date in TIMELINE:
    with open(f"NYT_Articles_{date}.csv", 'r') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip header
        for line in reader:
            headline = line[0]
            if check_strings(headline, RELEVANT_WORDS):
                # Add to unique lists
                add_unique_items(line[2].split(";"), all_people)
                add_unique_items(line[3].split(";"), all_places)
                add_unique_items(line[5].split(";"), all_orgs)
                
                # Update counts
                update_counts(line[2].split(";"), people_count)
                update_counts(line[3].split(";"), places_count)
                update_counts(line[5].split(";"), orgs_count)

# Print some example results
print("\nTop 10 Most Mentioned People:")
for person, count in sorted(people_count.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"{person}: {count} times")

print("\nTop 10 Most Mentioned Places:")
for place, count in sorted(places_count.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"{place}: {count} times")

print("\nTop 10 Most Mentioned Organizations:")
for org, count in sorted(orgs_count.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"{org}: {count} times")



Top 10 Most Mentioned People:
Netanyahu, Benjamin: 369 times
Biden, Joseph R Jr: 317 times
Blinken, Antony J: 91 times
Trump, Donald J: 45 times
Gallant, Yoav: 30 times
Sinwar, Yehya: 25 times
ABBAS, MAHMOUD: 22 times
Austin, Lloyd J III: 19 times
Gantz, Benny: 19 times
Ben-Gvir, Itamar: 17 times

Top 10 Most Mentioned Places:
Israel: 1521 times
Gaza Strip: 1403 times
Rafah (Gaza Strip): 167 times
Iran: 100 times
West Bank: 96 times
United States: 80 times
Middle East: 78 times
Khan Younis (Gaza Strip): 72 times
Egypt: 72 times
Lebanon: 56 times

Top 10 Most Mentioned Organizations:
Hamas: 1007 times
United Nations: 115 times
Hezbollah: 98 times
Palestinian Authority: 82 times
Democratic Party: 74 times
Al Shifa Hospital (Gaza Strip): 55 times
Republican Party: 51 times
United Nations Relief and Works Agency: 49 times
International Court of Justice (UN): 49 times
House of Representatives: 46 times


In [8]:
def save_counts_to_csv(counts_dict, filename):
    with open(filename, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Item', 'Count'])  # Header
        for item, count in sorted(counts_dict.items(), key=lambda x: x[1], reverse=True):
            writer.writerow([item, count])

# Save all counts to separate CSV files
save_counts_to_csv(people_count, 'people_counts.csv')
save_counts_to_csv(places_count, 'places_counts.csv')
save_counts_to_csv(orgs_count, 'orgs_counts.csv')