In [1]:
import string
from collections import Counter
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

import GetOldTweets3  as got # library used to scrape data directly from twitter without any tool
import warnings
warnings.filterwarnings('ignore')

In [2]:
text = open('National_news.txt' , encoding="utf-8").read()

print(text)


Prime Minister Narendra Modi said there are three COVID vaccines under development within the country.
Indian companies have sought technical details on the trials of Russia’s COVID-19 vaccine from the Russian Direct Investment Fund (RDIF) for possible production in India, an Indian Embassy source in Moscow said.

Separately, in an interview to Russian state media Sputnik, Indian Envoy in Moscow D.B. Venkatesh Varma said, “I had a productive discussion with CEO of RDIF, Mr. Kirill Dmitriev, and we are hopeful of positive outcome.”

Also read: Mass production of vaccine in 2021: Russia

Last Tuesday, Russia became the first country to officially register a coronavirus vaccine and Russian President Vladimir Putin announced that one of his daughters had also been inoculated.

“Indian companies are in touch with RDIF regarding vaccines and have asked for technical details about Phase I and Phase II trials of the vaccine and also for production of vaccines in India for third country exports

In [3]:
Lower_text = text.lower()

In [4]:
Lower_text

'prime minister narendra modi said there are three covid vaccines under development within the country.\nindian companies have sought technical details on the trials of russia’s covid-19 vaccine from the russian direct investment fund (rdif) for possible production in india, an indian embassy source in moscow said.\n\nseparately, in an interview to russian state media sputnik, indian envoy in moscow d.b. venkatesh varma said, “i had a productive discussion with ceo of rdif, mr. kirill dmitriev, and we are hopeful of positive outcome.”\n\nalso read: mass production of vaccine in 2021: russia\n\nlast tuesday, russia became the first country to officially register a coronavirus vaccine and russian president vladimir putin announced that one of his daughters had also been inoculated.\n\n“indian companies are in touch with rdif regarding vaccines and have asked for technical details about phase i and phase ii trials of the vaccine and also for production of vaccines in india for third count

In [5]:
length = len(Lower_text)


In [6]:
length

3167

In [7]:
# removing all punctuation
cleaned_text = Lower_text.translate(str.maketrans('','', string.punctuation))

In [8]:
cleaned_text

'prime minister narendra modi said there are three covid vaccines under development within the country\nindian companies have sought technical details on the trials of russia’s covid19 vaccine from the russian direct investment fund rdif for possible production in india an indian embassy source in moscow said\n\nseparately in an interview to russian state media sputnik indian envoy in moscow db venkatesh varma said “i had a productive discussion with ceo of rdif mr kirill dmitriev and we are hopeful of positive outcome”\n\nalso read mass production of vaccine in 2021 russia\n\nlast tuesday russia became the first country to officially register a coronavirus vaccine and russian president vladimir putin announced that one of his daughters had also been inoculated\n\n“indian companies are in touch with rdif regarding vaccines and have asked for technical details about phase i and phase ii trials of the vaccine and also for production of vaccines in india for third country exports and prod

In [9]:
# let's splitting text into words

tokenization_words = word_tokenize(cleaned_text, 'english')

In [10]:
tokenization_words

['prime',
 'minister',
 'narendra',
 'modi',
 'said',
 'there',
 'are',
 'three',
 'covid',
 'vaccines',
 'under',
 'development',
 'within',
 'the',
 'country',
 'indian',
 'companies',
 'have',
 'sought',
 'technical',
 'details',
 'on',
 'the',
 'trials',
 'of',
 'russia',
 '’',
 's',
 'covid19',
 'vaccine',
 'from',
 'the',
 'russian',
 'direct',
 'investment',
 'fund',
 'rdif',
 'for',
 'possible',
 'production',
 'in',
 'india',
 'an',
 'indian',
 'embassy',
 'source',
 'in',
 'moscow',
 'said',
 'separately',
 'in',
 'an',
 'interview',
 'to',
 'russian',
 'state',
 'media',
 'sputnik',
 'indian',
 'envoy',
 'in',
 'moscow',
 'db',
 'venkatesh',
 'varma',
 'said',
 '“',
 'i',
 'had',
 'a',
 'productive',
 'discussion',
 'with',
 'ceo',
 'of',
 'rdif',
 'mr',
 'kirill',
 'dmitriev',
 'and',
 'we',
 'are',
 'hopeful',
 'of',
 'positive',
 'outcome',
 '”',
 'also',
 'read',
 'mass',
 'production',
 'of',
 'vaccine',
 'in',
 '2021',
 'russia',
 'last',
 'tuesday',
 'russia',
 'becam

In [11]:
# removing word from the tockenization words list
final_words =[]
for word in tokenization_words:
    if word not in stopwords.words('english'):
        final_words.append(word)
        

In [12]:
final_words

['prime',
 'minister',
 'narendra',
 'modi',
 'said',
 'three',
 'covid',
 'vaccines',
 'development',
 'within',
 'country',
 'indian',
 'companies',
 'sought',
 'technical',
 'details',
 'trials',
 'russia',
 '’',
 'covid19',
 'vaccine',
 'russian',
 'direct',
 'investment',
 'fund',
 'rdif',
 'possible',
 'production',
 'india',
 'indian',
 'embassy',
 'source',
 'moscow',
 'said',
 'separately',
 'interview',
 'russian',
 'state',
 'media',
 'sputnik',
 'indian',
 'envoy',
 'moscow',
 'db',
 'venkatesh',
 'varma',
 'said',
 '“',
 'productive',
 'discussion',
 'ceo',
 'rdif',
 'mr',
 'kirill',
 'dmitriev',
 'hopeful',
 'positive',
 'outcome',
 '”',
 'also',
 'read',
 'mass',
 'production',
 'vaccine',
 '2021',
 'russia',
 'last',
 'tuesday',
 'russia',
 'became',
 'first',
 'country',
 'officially',
 'register',
 'coronavirus',
 'vaccine',
 'russian',
 'president',
 'vladimir',
 'putin',
 'announced',
 'one',
 'daughters',
 'also',
 'inoculated',
 '“',
 'indian',
 'companies',
 'tou

In [18]:
 # get emotion Text
# let's create empty emotion_list
# let's check before adding another emotion in existing emotion  file

emotion_list = []
with open('emotion.txt','r') as file:
    for line in file: # it print all line which is available in file
        clear_line= line.replace('\n', '').replace(',', '').replace("'", '').strip() # first we replace new line by \n and also replace quote with space
        #print(clear_line)
        word,emotion = clear_line.split(':')
       # print('word :' + word +"  " + 'Emotion :'+emotion)
        if word in final_words: # let's check word prsent in final word or not
            emotion_list.append(emotion)
print(emotion_list)
len(emotion_list)

[' free', ' attracted', ' sad', ' sad', 'wish', 'fair', 'happiness', 'tough', 'enjoyment', ' confront']


10

In [20]:
# let's check again how many emotion text present after adding  new emotions in 'emotion.txt' file
# we add {'hope':'wish','affordable':'fair','fight':'confront','wellbeing':'happiness','difficult':'tough,'interest':'enjoyment'
emotion_list

# after adding new emotion list

[' free',
 ' attracted',
 ' sad',
 ' sad',
 'wish',
 'fair',
 'happiness',
 'tough',
 'enjoyment',
 ' confront']