In [24]:
import pandas as pd

# Load the dataset
file_path = 'articles_israel1.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()

Unnamed: 0,url,url_mobile,title,seendate,socialimage,domain,language,sourcecountry
0,https://www.republika.co.id/berita/rnshzt370/w...,https://m.republika.co.id/amp/rnshzt370,Warga Gaza Rayakan HUT Partai Fatah yang Langka,20230101T060000Z,https://static.republika.co.id/uploads/images/...,republika.co.id,Indonesian,Indonesia
1,https://www.voaindonesia.com/a/warga-palestina...,https://www.voaindonesia.com/amp/warga-palesti...,Warga Palestina Rayakan Hari Jadi Fatah di Gaza,20230101T114500Z,https://gdb.voanews.com/09410000-0a00-0242-a55...,voaindonesia.com,Indonesian,Indonesia
2,https://www.clevelandjewishnews.com/jns/hamas-...,,Hamas event in Jerusalem : Wage jihad to destr...,20230101T130000Z,https://bloximages.chicago2.vip.townnews.com/c...,clevelandjewishnews.com,English,Israel
3,https://www.republika.co.id/berita/rnssk5370/p...,https://m.republika.co.id/amp/rnssk5370,Palestina Sambut Keputusan PBB Minta ICJ Beri ...,20230101T090000Z,https://static.republika.co.id/uploads/images/...,republika.co.id,Indonesian,Indonesia
4,https://www.trtworld.com/middle-east/in-hamas-...,https://www.trtworld.com/middle-east/in-hamas-...,"In Hamas - governed Gaza , Fatah rallies again...",20230101T064500Z,https://cdni0.trtworld.com/w480/h270/q75/14709...,trtworld.com,English,Turkey


Classification with DeBERTa v3

In [25]:
from transformers import pipeline

# Filter the dataset for English language articles
english_articles = data[data['language'] == 'English']

# Extract the titles
titles = english_articles['title'].tolist()

# Initialize the zero-shot-classification pipeline
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli", use_fast=False)

# Define candidate labels for sentiment analysis
candidate_labels = ["pro-Israel", "anti-Israel"]
titles

['Hamas event in Jerusalem : Wage jihad to destroy Israel',
 'In Hamas - governed Gaza , Fatah rallies against  racist  Netanyahu government',
 'Universities Are Seeing An Enormous Spike In Antisemitism . Here Why',
 'Palestinian Authority facing year of critical challenges',
 'Culture of hope : 2022 and the margins of victory in Palestine',
 'Universities Are Seeing An Enormous Spike In Antisemitism . Here Why',
 'Foreign Ministry : Israel killing of young Palestinians  organized state terrorism  ',
 '  Unprecedented : Enormous spike in antisemitism at U . S . colleges',
 'A challenge for the UN Security Council',
 'Bnai Brith denounces UN referral of Israel to the International Court of Justice | JNS',
 'Palestinian prime minister raps Israel killing of 2 young Palestinians',
 'Marc Lamont Hill  From the River to the Sea  Double Standard',
 '  The Bottom Line  - Adam H . Klein | Letters | caledonianrecord . com',
 'Under the radar negotiations on Hashemite Kingdom of Palestine – J - 

In [26]:
# Filter out any rows where the title is NaN or not a string
english_articles = english_articles.dropna(subset=['title'])
english_articles = english_articles[english_articles['title'].apply(lambda x: isinstance(x, str))]

# Now extract the titles again
titles = english_articles['title'].tolist()

# Optionally, inspect the first few titles
print(titles[:5])

# Then proceed with your classification as before

['Hamas event in Jerusalem : Wage jihad to destroy Israel', 'In Hamas - governed Gaza , Fatah rallies against  racist  Netanyahu government', 'Universities Are Seeing An Enormous Spike In Antisemitism . Here Why', 'Palestinian Authority facing year of critical challenges', 'Culture of hope : 2022 and the margins of victory in Palestine']


In [27]:
# Apply the classifier to the titles
sentiment_results = []
for title in titles:
    result = classifier(title, candidate_labels, multi_label=False)
    sentiment_results.append(result)

In [28]:
print(len(sentiment_results))

# Displaying the first few results for review
sentiment_results[:20]

2653


[{'sequence': 'Hamas event in Jerusalem : Wage jihad to destroy Israel',
  'labels': ['anti-Israel', 'pro-Israel'],
  'scores': [0.9986788630485535, 0.0013211950426921248]},
 {'sequence': 'In Hamas - governed Gaza , Fatah rallies against  racist  Netanyahu government',
  'labels': ['anti-Israel', 'pro-Israel'],
  'scores': [0.9928072690963745, 0.007192753255367279]},
 {'sequence': 'Universities Are Seeing An Enormous Spike In Antisemitism . Here Why',
  'labels': ['anti-Israel', 'pro-Israel'],
  'scores': [0.7671117186546326, 0.23288823664188385]},
 {'sequence': 'Palestinian Authority facing year of critical challenges',
  'labels': ['pro-Israel', 'anti-Israel'],
  'scores': [0.5899437665939331, 0.4100562036037445]},
 {'sequence': 'Culture of hope : 2022 and the margins of victory in Palestine',
  'labels': ['anti-Israel', 'pro-Israel'],
  'scores': [0.6406125426292419, 0.35938748717308044]},
 {'sequence': 'Universities Are Seeing An Enormous Spike In Antisemitism . Here Why',
  'label

In [29]:
# Write it into a file:
import csv

# Determine unique keys
keys = set()
for item in sentiment_results:
    keys.update(item.keys())

# Convert the set to a list to keep the order consistent
keys = list(keys)

# Write to CSV
with open('stance_classification.csv', 'w', newline='', encoding='utf-8') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(sentiment_results)

Sentiment Analysis with VADER

In [30]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# Download the VADER lexicon
nltk.download('vader_lexicon')

# Initialize the Sentiment Intensity Analyzer
analyzer = SentimentIntensityAnalyzer()

# Extract the titles
titles = english_articles['title'].tolist()

# Initialize a list to store sentiment results
sentiment_results = []

# Analyze the sentiment of each title and store the results
for title in titles:
    scores = analyzer.polarity_scores(title)
    sentiment_results.append({"Title": title, "Sentiment Scores": scores})

# Print the first 5 results
for result in sentiment_results[:5]:
    print(result)


{'Title': 'Hamas event in Jerusalem : Wage jihad to destroy Israel', 'Sentiment Scores': {'neg': 0.304, 'neu': 0.696, 'pos': 0.0, 'compound': -0.5423}}
{'Title': 'In Hamas - governed Gaza , Fatah rallies against  racist  Netanyahu government', 'Sentiment Scores': {'neg': 0.308, 'neu': 0.692, 'pos': 0.0, 'compound': -0.6124}}
{'Title': 'Universities Are Seeing An Enormous Spike In Antisemitism . Here Why', 'Sentiment Scores': {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}}
{'Title': 'Palestinian Authority facing year of critical challenges', 'Sentiment Scores': {'neg': 0.258, 'neu': 0.449, 'pos': 0.292, 'compound': -0.1779}}
{'Title': 'Culture of hope : 2022 and the margins of victory in Palestine', 'Sentiment Scores': {'neg': 0.0, 'neu': 0.775, 'pos': 0.225, 'compound': 0.4404}}


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/busraoguzoglu/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [31]:
# Write it into a file:
import csv

# Determine unique keys
keys = set()
for item in sentiment_results:
    keys.update(item.keys())

# Convert the set to a list to keep the order consistent
keys = list(keys)

# Write to CSV
with open('sentiment.csv', 'w', newline='', encoding='utf-8') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(sentiment_results)