Installing Prerequisites & Dependencies

In [None]:
pip install GoogleNews

In [None]:
from GoogleNews import GoogleNews
googlenews = GoogleNews()
googlenews.enableException(True)
googlenews = GoogleNews(lang='en')
googlenews = GoogleNews(period='100d')
googlenews = GoogleNews(encode='utf-8')
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
import matplotlib.pyplot as plt
from textblob import TextBlob
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
from nltk.probability import FreqDist
nltk.download('maxent_ne_chunker')
nltk.download('words')
from nltk import ne_chunk
from nltk.tag import pos_tag
import spacy
nlp = spacy.load("en_core_web_sm")
!python -m textblob.download_corpora

Scraping Data

In [78]:
googlenews.clear()

In [79]:
googlenews.get_news('Congress NYAY Manifesto')
news_data = googlenews.get_texts()
df = pd.DataFrame(news_data, columns=['news'])

Classes of each use:

In [80]:
def preprocess_text(text):
    # Tokenize text into words
    words = word_tokenize(text.lower())
    # Filter out stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word.isalnum() and word not in stop_words]
    return filtered_words

# Function to extract key political issues
def extract_political_issues(news_data):
    # Preprocess news text
    preprocessed_news = news_data.apply(preprocess_text)
    # Flatten list of words
    flattened_words = [word for sublist in preprocessed_news for word in sublist]
    # Calculate word frequencies
    word_freq = FreqDist(flattened_words)
    # Get most common words
    most_common_words = word_freq.most_common(10)  # Adjust the number as needed
    return most_common_words

def perform_sentiment_analysis(news_data):
    sentiments = []
    for news_article in news_data:
        analysis = TextBlob(news_article)
        sentiment_score = analysis.sentiment.polarity
        if sentiment_score > 0:
            sentiment = 'Positive'
        elif sentiment_score < 0:
            sentiment = 'Negative'
        else:
            sentiment = 'Neutral'
        sentiments.append(sentiment)
    return sentiments

# Perform sentiment analysis on news data
sentiments = perform_sentiment_analysis(df['news'])

# Add sentiment information to the DataFrame
df['Sentiment'] = sentiments

def extract_framing_techniques(news_data):
    framing_techniques = []
    for news_article in news_data:
        analysis = TextBlob(news_article)
        noun_phrases = analysis.noun_phrases
        framing_techniques.extend(noun_phrases)
    return framing_techniques

def extract_named_entity_relationships(text):
    doc = nlp(text)
    relationships = []
    for ent in doc.ents:
        if ent.label_ == "ORG" or ent.label_ == "LOC" or ent.label_ == "PERSON":
            relationships.append((ent.text, ent.label_))
    return relationships

Results:

1. Key Political issues with no of mentions.

In [81]:
# Extract key political issues from news data
key_political_issues = extract_political_issues(df['news'])
print("Key Political Issues:")
for issue, frequency in key_political_issues:
    print(f"{issue}: {frequency} mentions")

Key Political Issues:
congress: 96 mentions
manifesto: 74 mentions
nyay: 36 mentions
patra: 34 mentions
lok: 31 mentions
sabha: 31 mentions
2024: 27 mentions
modi: 21 mentions
elections: 15 mentions
bjp: 15 mentions


2. Segregating df into positive, negative and neutral dfs

In [82]:
positive_df = df[df['Sentiment'] == 'Positive']
negative_df = df[df['Sentiment'] == 'Negative']
neutral_df = df[df['Sentiment'] == 'Neutral']

3. Framing the text

In [83]:
# Extract framing techniques from news data
framing_techniques = extract_framing_techniques(df['news'])

# Count the frequency of each framing technique
framing_technique_counts = Counter(framing_techniques)

# Display the dominant framing techniques
print("Dominant Framing Techniques in the News Articles:")
for technique, count in framing_technique_counts.most_common():
    print(f"{technique}: {count} mentions")

Dominant Framing Techniques in the News Articles:
congress: 53 mentions
lok sabha: 21 mentions
nyay patra: 21 mentions
bjp: 15 mentions
pm modi: 13 mentions
mint: 11 mentions
patra: 9 mentions
lok sabha elections: 9 mentions
nyay: 8 mentions
modi: 7 mentions
’ s: 7 mentions
kharge: 7 mentions
social justice: 4 mentions
release manifesto: 4 mentions
rahul gandhi: 4 mentions
congress manifesto: 4 mentions
manifesto: 4 mentions
telangana: 3 mentions
guarantee: 3 mentions
sankalp patra: 3 mentions
caa: 3 mentions
poll manifesto: 3 mentions
party 's: 2 mentions
muslims: 2 mentions
cong: 2 mentions
releases manifesto: 2 mentions
's ‘: 2 mentions
party manifesto: 2 mentions
express: 2 mentions
manifesto creates: 2 mentions
manifesto |: 2 mentions
‘ nyay ’: 2 mentions
opinion: 2 mentions
lgbtqia: 2 mentions
promises: 2 mentions
pmla: 2 mentions
pakistan: 2 mentions
mallikarjun kharge: 2 mentions
april: 2 mentions
key: 2 mentions
expensive wishlist: 1 mentions
rs: 1 mentions
lakh crore: 1 menti

4. Creating CSV of each df

In [84]:
df.to_csv("output.csv", index=False)
positive_df.to_csv("positive_output.csv", index=False)
negative_df.to_csv("negative_output.csv", index=False)
neutral_df.to_csv("neutral_output.csv", index=False)