In [1]:
!pip install vaderSentiment



In [2]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [3]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

In [4]:
data = pd.read_csv('/content/News_sentiment_Jan2017_to_Apr2021.csv')

In [5]:
data.head()

Unnamed: 0,Date,Title,URL,sentiment,confidence,Unnamed: 5
0,05/01/17,Eliminating shadow economy to have positive im...,http://economictimes.indiatimes.com/news/econo...,POSITIVE,0.996185,
1,05/01/17,Two Chinese companies hit roadblock with India...,http://economictimes.indiatimes.com/news/econo...,NEGATIVE,-0.955493,
2,05/01/17,SoftBank India Vision gets new $100,http://economictimes.indiatimes.com/small-biz/...,POSITIVE,0.595612,
3,05/01/17,Nissan halts joint development of luxury cars ...,http://economictimes.indiatimes.com/news/inter...,NEGATIVE,-0.996672,
4,05/01/17,Despite challenges Rajasthan continues to prog...,http://economictimes.indiatimes.com/news/polit...,POSITIVE,0.997388,


In [6]:
data=data[['Date','Title','sentiment']]

In [7]:
pd.set_option('display.max_colwidth',None)

In [8]:
data.head()

Unnamed: 0,Date,Title,sentiment
0,05/01/17,Eliminating shadow economy to have positive impact on GDP : Arun Jaitley,POSITIVE
1,05/01/17,Two Chinese companies hit roadblock with Indian investments,NEGATIVE
2,05/01/17,SoftBank India Vision gets new $100,POSITIVE
3,05/01/17,Nissan halts joint development of luxury cars with Daimler : Sources,NEGATIVE
4,05/01/17,Despite challenges Rajasthan continues to progress : Vasundhara Raje,POSITIVE


In [9]:
data.shape

(200500, 3)

In [10]:
data['Title'].isnull().sum()

0

In [11]:
analyzer = SentimentIntensityAnalyzer()

In [12]:
#function to get sentiment scores
def get_sentiment_scores(text):
    sentiment_score = analyzer.polarity_scores(text)
    return sentiment_score['compound']


In [13]:
data['SentimentScore'] = data['Title'].apply(get_sentiment_scores)

In [14]:
data.head()

Unnamed: 0,Date,Title,sentiment,SentimentScore
0,05/01/17,Eliminating shadow economy to have positive impact on GDP : Arun Jaitley,POSITIVE,0.5574
1,05/01/17,Two Chinese companies hit roadblock with Indian investments,NEGATIVE,0.0
2,05/01/17,SoftBank India Vision gets new $100,POSITIVE,0.25
3,05/01/17,Nissan halts joint development of luxury cars with Daimler : Sources,NEGATIVE,0.0
4,05/01/17,Despite challenges Rajasthan continues to progress : Vasundhara Raje,POSITIVE,0.3773


In [15]:
data['predicted_sentiment'] = data['SentimentScore'].apply(lambda score: 'POSITIVE' if score > 0 else 'NEGATIVE')

In [16]:
data.head()

Unnamed: 0,Date,Title,sentiment,SentimentScore,predicted_sentiment
0,05/01/17,Eliminating shadow economy to have positive impact on GDP : Arun Jaitley,POSITIVE,0.5574,POSITIVE
1,05/01/17,Two Chinese companies hit roadblock with Indian investments,NEGATIVE,0.0,NEGATIVE
2,05/01/17,SoftBank India Vision gets new $100,POSITIVE,0.25,POSITIVE
3,05/01/17,Nissan halts joint development of luxury cars with Daimler : Sources,NEGATIVE,0.0,NEGATIVE
4,05/01/17,Despite challenges Rajasthan continues to progress : Vasundhara Raje,POSITIVE,0.3773,POSITIVE


In [17]:
data.dtypes

Date                    object
Title                   object
sentiment               object
SentimentScore         float64
predicted_sentiment     object
dtype: object

In [18]:
from sklearn.metrics import accuracy_score

In [19]:
data['predicted_sentiment'].value_counts()

NEGATIVE    124687
POSITIVE     75813
Name: predicted_sentiment, dtype: int64

In [20]:
data['sentiment'].value_counts()

NEGATIVE    108118
POSITIVE     92382
Name: sentiment, dtype: int64

In [21]:
accuracy = accuracy_score(data['sentiment'], data['predicted_sentiment'])
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 60.93%


In [22]:
data.head()

Unnamed: 0,Date,Title,sentiment,SentimentScore,predicted_sentiment
0,05/01/17,Eliminating shadow economy to have positive impact on GDP : Arun Jaitley,POSITIVE,0.5574,POSITIVE
1,05/01/17,Two Chinese companies hit roadblock with Indian investments,NEGATIVE,0.0,NEGATIVE
2,05/01/17,SoftBank India Vision gets new $100,POSITIVE,0.25,POSITIVE
3,05/01/17,Nissan halts joint development of luxury cars with Daimler : Sources,NEGATIVE,0.0,NEGATIVE
4,05/01/17,Despite challenges Rajasthan continues to progress : Vasundhara Raje,POSITIVE,0.3773,POSITIVE


In [26]:
threshold_values = [0.1,0.2,0.4, 0.5, 0.6]  # Experiment with different threshold values
best_threshold = None
best_accuracy = 0.0

for threshold in threshold_values:
    data['predicted_sentiment'] = data['SentimentScore'].apply(lambda x: 'POSITIVE' if x >= threshold else 'NEGATIVE')
    accuracy = accuracy_score(data['sentiment'], data['predicted_sentiment'])

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_threshold = threshold

print(f"Best Threshold: {best_threshold}, Best Accuracy: {best_accuracy * 100:.2f}%")

Best Threshold: 0.2, Best Accuracy: 61.71%
