In [1]:
#Basic Libraries

import pandas as pd

import numpy as np

#Plotting Libraries
import matplotlib.pyplot as plt
import seaborn as sns

#Text Mining
from wordcloud import WordCloud
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import nltk

#Warnings
import warnings

#Topic Modelling
from gensim.models.coherencemodel import CoherenceModel
from gensim.models import LdaMulticore
import gensim

#Metrics
from sklearn.metrics import accuracy_score

#Others
from datetime import datetime



ModuleNotFoundError: No module named 'textblob'

In [None]:
#Reading Files

fb_obama = pd.read_csv('Facebook_Obama.csv')
gp_obama = pd.read_csv('GooglePlus_Obama.csv')
li_obama = pd.read_csv('LinkedIn_Obama.csv')

fb_economy = pd.read_csv('Facebook_Economy.csv')
gp_economy = pd.read_csv('GooglePlus_Economy.csv')
li_economy = pd.read_csv('LinkedIn_Economy.csv')

final = pd.read_csv('News_Final.csv')

### Removing Palestine and Microsoft

In [None]:
# Cleaning data based on the topics required
temp1 = np.where((final['Topic']=='palestine'))[0]   #index of Palestine
temp2 = np.where(final['Topic']=='microsoft')[0]     #index of microsoft
print(len(temp1)+len(temp2))
final.shape #initial shape  -- final dataframe

In [None]:
final.drop(temp1,axis=0,inplace=True)   #Dropping Palestine
final.drop(temp2,axis=0,inplace=True)   #Dropping Microsoft
final.shape #final shape  -- final dataframe

In [None]:
final.dtypes

In [None]:
final.head(1)

In [None]:
#sorting the data according to the publish date
final=final.sort_values(by='PublishDate')
final.head(1)

In [None]:
final['PublishDate']=pd.to_datetime(final['PublishDate'])    #converting the data type from category to date format

### Removing records that never entered Social Media

In [None]:
temp1 = set(np.where(final['Facebook']==-1)[0])   #index of Facebook = -1
temp2 = set(np.where(final['LinkedIn']==-1)[0])     #index of LinkedIn = -1
temp3 = set(np.where(final['GooglePlus']==-1)[0])   #index of GooglePlus = -1

In [None]:
#Replacing the -1 values with nan
final.Facebook.replace(-1,np.nan,inplace=True)
final.LinkedIn.replace(-1,np.nan,inplace=True)
final.GooglePlus.replace(-1,np.nan,inplace=True)

In [None]:
#droping all the nan values if all 3 social media attributes are having -1 
final.dropna(thresh=9,inplace=True)

In [None]:
# replacing back the -1 values
final.Facebook.replace(np.nan,-1,inplace=True)
final.LinkedIn.replace(np.nan,-1,inplace=True)
final.GooglePlus.replace(np.nan,-1,inplace=True)

### Null Values

In [None]:
#Identifying the null columns

final.isnull().sum()

In [None]:
#Identifying the null indices

l1 = list(final[final['Headline'].isnull()].index)
l2 = list(final[final['Source'].isnull()].index)
s1 = set(l1)
s2 = set(l2)
len(s1.union(s2))

In [None]:
#Percentage of null indices compared to the overall size

len(s1.union(s2))/final.shape[0]

## Categorizing Source

In [None]:
#Reading the journal excel file and appending to an empty list 
journal = pd.read_excel('ESIMasterJournalList-122018.xlsx',sheet_name='Sheet1')
journal = journal['Full title'] 
k = list()
for i in journal:
    j=str(i).strip()
    j=j.lower()
    k.append(j)

In [None]:
j=0
l=list()
for i in final['Source']:
    i=str(i)
    i=i.strip()
    i=i.lower()
#Categorizing all the online source from source
    if(i.find('yahoo') >= 0 or i.find('.gov')>0 or i.find('.fr')>=0):
        l.append('Online')
    elif(i.find('.com')>0 or i.find('.co')>=0 or i.find('blog')>=0 or i.find('.ca')>=0 or i.find('.tv')>=0 or (i=='bloomberg')):
        l.append('Online')
#categorizing all the dailies and journals 
    
    elif(i in k):
        l.append('Dailies/Journals')
    
    elif(i.find('today') >= 0 or i.find('times') >= 0 or i.find('daily')>=0 or i.startswith('the')):
        l.append('Dailies/Journals')
        
    elif(i.find('journal')>=0 or i.find('post')>=0 or i.find('magazine')>=0 or i.find('forbes')>=0 or i.find('tass')>=0):
        l.append('Dailies/Journals')
    elif(i.find('nasdaq')>=0 or i.find('reuters')>=0 or i.find('sputnik')>=0 or i.find('mid-day')>=0 or i.find('quartz')>=0):
        l.append('Dailies/Journals')
    elif(i.find('herald')>=0 or i.find('market')>=0 or i.find('business')>=0):
        l.append('Dailies/Journals')
        
#Categorizing all the TV news chaneels
        
    elif(i.find('cnn') >= 0 or i.find('fxstreet')>=0 or i.find('bcc') >= 0 or i.find('cnbc') >= 0 or i.find('news')>=0):
        l.append('Television')
    elif(i=='time' or i.find('live')>0 or i.find('ndtv')>=0 or i.find('abc') >= 0 or i.find('press')>=0 or (i=='msnbc')):
        l.append('Television')
    elif(i.find('cbs')>0):
        l.append('Television')

    else: 
        l.append('Dailies/Journals')
final['S_Source']=l

In [None]:
#count of news items each category has
final['S_Source'].value_counts()

In [None]:
# Striping the publish date to find the mont column
final['PublishDate'] = final.PublishDate.astype('str')
final['month'] = final['PublishDate'].str[0:7]
final['month'] = pd.to_datetime(final['month'])
final.head(2)

### Creating a Master DataFrame

In [None]:
# Shape of all the 6 files
print(fb_obama.shape)
print(fb_economy.shape)
print(li_obama.shape)
print(li_economy.shape)
print(gp_obama.shape)
print(gp_economy.shape)

In [None]:
# Different categories of titles 
fb_obama['sm_title']='fb_obama'
fb_economy['sm_title']='fb_economy'
li_obama['sm_title']='li_obama'
li_economy['sm_title']='li_economy'
gp_obama['sm_title']='gp_obama'
gp_economy['sm_title']='gp_economy'

In [None]:
frame = [fb_obama,fb_economy,li_obama,li_economy,gp_obama,gp_economy]  # cancating all the 6 files
result = pd.concat(frame)
result.head()

In [None]:
result.shape

In [None]:
result.replace(-1,np.nan,inplace=True)

In [None]:
result.dropna(thresh=3,inplace=True)

In [None]:
result.shape

In [None]:
2455/177395

In [None]:
result.replace(np.nan,-1,inplace=True)

In [None]:
#merging the files using the IDLink
data = pd.merge(final,result,on='IDLink')
data.shape

In [None]:
data.isnull().sum()

In [None]:
data.dropna(inplace=True)  #Dropping all the null values of headlines and source

In [None]:
r=pd.concat(frame)

In [None]:
r[r['TS144']==-1].groupby(['TS144']).sum() 

In [None]:
len(r)

In [None]:
2455/177395

## Data Visualization

In [None]:
#count of all the news of economy and obama in the final file
final['Topic'].value_counts()

In [None]:
#count of news of economy and obama in the data file
data['Topic'].value_counts()

In [None]:
#striping of the 3 social media names from sm_title
data['sm'] = data['sm_title'].str[0:2]

In [None]:
#count plot
sns.countplot('Topic',hue='sm',data=data)
plt.legend(loc=0)
plt.show()

Popularity based on weekday and weekend

In [None]:
final['weekday']=final['month'].apply(lambda x:x.weekday())

In [None]:
final['weekday/weekend']=np.where(final['weekday']<=5,'weekday','weekend')

In [None]:
final.tail()

In [None]:
#Sentiment analysis of title and headlines 
sentiment_title = final.SentimentTitle
sentiment_headline = final.SentimentHeadline

plt.figure(figsize=(8, 4))
plt.plot(sentiment_title)
plt.xlabel('Time')
plt.ylabel('Sentiment Title')
plt.title('Timeline Vs SentiTitle')
plt.show()

plt.figure(figsize=(8, 4))
plt.plot(sentiment_headline)
plt.xlabel('Time')
plt.ylabel('Sentiment Headline')
plt.title('Timeline Vs SentiHeadline')
plt.show()

In [None]:
#Count of obama and economy per month
print('\n')
for i in final.Topic.unique():
    plt.plot(final[final['Topic']==i].groupby(['month']).IDLink.count(),label=i)
plt.xticks(rotation=60)
plt.legend()
plt.show()

In [None]:
final[final['Topic']=='obama'].groupby(['month']).month.value_counts()

In [None]:
final[final['Topic']=='economy'].groupby(['month']).month.value_counts()

In [None]:
# avarage sentiments of economy and obama
print('Sentiment Title\n')
for i in final.Topic.unique():
    plt.plot(final[final['Topic']==i].groupby(['month']).SentimentTitle.mean(),label=i)
plt.xticks(rotation=60)
plt.title('Sentiment Title')
plt.legend()
plt.show()

print('Sentiment HeadLine\n')
for i in final.Topic.unique():
    plt.plot(final[final['Topic']==i].groupby(['month']).SentimentHeadline.mean(),label=i)
plt.xticks(rotation=60)
plt.title('Sentiment Headline')
plt.legend()
plt.show()

In [None]:
# Plot 1- Average unique topics in facebook for each month

for i in data.Topic.unique():
    plt.plot(data[data['Topic']==i].groupby(['month']).Facebook.mean(),label=i)
plt.xticks(rotation=60)
plt.title('Facebook')
plt.legend()
plt.show()

# Plot 2- Average unique topics in LinkedIn for each month


for i in data.Topic.unique():
    plt.plot(data[data['Topic']==i].groupby(['month']).LinkedIn.mean(),label=i)
plt.xticks(rotation=60)
plt.title('LinkedIn')
plt.legend()
plt.show()

#Plot 3- Average unique topics in GooglePlus for each month

for i in data.Topic.unique():
    plt.plot(data[data['Topic']==i].groupby(['month']).GooglePlus.mean(),label=i)
plt.xticks(rotation=60)
plt.title('Google Plus')
plt.legend()
plt.show()

In [None]:
facebook = [fb_obama,fb_economy]   #concating all the items according to the social platform
facebook = pd.concat(facebook)

linkedin = [li_obama,li_economy]
linkedin = pd.concat(linkedin)

gplus = [gp_obama,gp_economy]
googleplus = pd.concat(gplus)

In [None]:
#plot-1 The grap explains the time span the news took to enter social media platform
print('Facebook:\n\n')

columns = [i for i in facebook if i not in ('IDLink','sm_title')]
l1 = list()
l2 = list()
l3 = range(1,145)
for i in columns:
    t_ob=np.mean(fb_obama[i])
    t_ec=np.mean(fb_economy[i])
    l1.append(t_ob/np.mean(fb_obama['TS144']))
    l2.append(t_ec/np.mean(fb_economy['TS144']))
plt.plot(l3,l1,label='obama')
plt.plot(l3,l2,label='economy')
plt.title('Facebook')
plt.legend()
plt.show()

print('LinkedIN:\n\n')

columns = [i for i in linkedin if i not in ('IDLink','sm_title')]
l1 = list()
l2 = list()
l3 = range(1,145)
for i in columns:
    t_ob=np.mean(li_obama[i])
    t_ec=np.mean(li_economy[i])
    l1.append(t_ob/np.mean(li_obama['TS144']))
    l2.append(t_ec/np.mean(li_economy['TS144']))
plt.plot(l3,l1,label='obama')
plt.plot(l3,l2,label='economy')
plt.title('LinkedIn')
plt.legend()
plt.show()

print('Google Plus:\n\n')

columns = [i for i in googleplus if i not in ('IDLink','sm_title')]
l1 = list()
l2 = list()
l3 = range(1,145)
for i in columns:
    t_ob=np.mean(gp_obama[i])
    t_ec=np.mean(gp_economy[i])
    l1.append(t_ob/np.mean(gp_obama['TS144']))
    l2.append(t_ec/np.mean(gp_economy['TS144']))
plt.plot(l3,l1,label='obama')
plt.plot(l3,l2,label='economy')
plt.title('Google Plus')
plt.legend()
plt.show()

In [None]:
#The plots demonstrates the positive and negative score of Economy and Obama in 3 different social media platform

for i in data.Topic.unique():
    temp=data[data['Topic']==i]
    
    print(i.upper(),':')
    for j in temp.sm.unique():
        print(j.upper(),':')
        t = temp[temp['sm']==j]
        temp_pos=t[t['SentimentTitle']>=0]
        temp_neg=t[t['SentimentTitle']<0]
        
        f , ax_arr = plt.subplots(1 , 2 , figsize=(8,4),sharex=True)
        
        ax_arr[0].hist(temp_pos.groupby('month')['SentimentTitle'].mean(),align='mid',histtype='step')
        ax_arr[0].set_title('Positive')
        ax_arr[0].set_xlabel('Frequency')
        ax_arr[0].set_xlabel('Sentiment Title')
        
    
        ax_arr[1].hist(temp_neg.groupby('month')['SentimentTitle'].mean())
        ax_arr[1].set_title('Negative')
        ax_arr[1].set_xlabel('Frequency')
        ax_arr[1].set_xlabel('Sentiment Title')
        

        plt.show()

## Sentiment Score Calculation

In [None]:
final.head(1)

In [None]:
df = final

In [None]:
df['Positive'] = np.where(round(df['SentimentTitle'])<0,0,1)

In [None]:
sentiment = SentimentIntensityAnalyzer()
df.dropna(inplace=True)
def get_sentiment(review):
    compound = sentiment.polarity_scores(review)['compound']
    if compound<0:
        return 0
    else:
        return 1
    
df['vaderTitle']=df['Title'].apply(get_sentiment)

accuracy_score(df['Positive'],df['vaderTitle'])

In [None]:
def get_sentiment(review):
    compound = TextBlob(review).sentiment[0]
    if compound<0:
        return 0
    else:
        return 1
    
df['textblobTitle']=df['Title'].apply(get_sentiment)
accuracy_score(df['Positive'],df['textblobTitle'])

In [None]:
df['Positive_HL'] = np.where(round(df['SentimentHeadline'])<0,0,1)

In [None]:
df.dropna(inplace=True)
def get_sentiment(review):
    compound = sentiment.polarity_scores(review)['compound']
    if compound<0:
        return 0
    else:
        return 1
    
df['vaderHL']=df['Headline'].apply(get_sentiment)

accuracy_score(df['Positive_HL'],df['vaderHL'])

In [None]:
def get_sentiment(review):
    compound = TextBlob(review).sentiment[0]
    if compound<0:
        return 0
    else:
        return 1
    
df['textblobHL']=df['Headline'].apply(get_sentiment)

accuracy_score(df['Positive_HL'],df['textblobHL'])

In [None]:
df.dropna(inplace=True)
def get_sentiment(review):
    compound = sentiment.polarity_scores(review)['compound']
    return compound
    
df['vaderHL']=df['Headline'].apply(get_sentiment)
df['vaderTitle']=df['Title'].apply(get_sentiment)

In [None]:
def get_sentiment(review):
    compound = TextBlob(review).sentiment[0]
    return compound
    
df['textblobTitle']=df['Title'].apply(get_sentiment)
df['textblobHL']=df['Headline'].apply(get_sentiment)

## Topic Modelling

In [None]:
df_obama = df[df['Topic'] == 'obama']
df_economy = df[df['Topic'] == 'economy']

In [None]:
#Corpus

docs = df_obama.Title.str.lower().str.replace('[^a-z\' ]','')
stopwords = nltk.corpus.stopwords.words('English')
stopwords.extend(['obama','barack','say','call','new'])

stemmer = nltk.stem.PorterStemmer()
def clean_sen(text):
    text=str(text)
    ws = text.split(' ')
    ws = [w.strip() for w in ws]
    ws_clean = [w for w in ws if w not in stopwords]
    return ' '.join(ws_clean)

docs_clean = docs.apply(clean_sen)

#Merging all paragraphs as a single string
text = ' '.join(para for para in docs_clean)

#Creating the word cloud
plt.figure(figsize=(10,10))
wc = WordCloud().generate(text)
plt.imshow(wc)

In [None]:
#Corpus

docs = df_economy.Title.str.lower().str.replace('[^a-z\' ]','')
stopwords = nltk.corpus.stopwords.words('English')
stopwords.extend(['economy'])

stemmer = nltk.stem.PorterStemmer()
def clean_sen(text):
    text=str(text)
    ws = text.split(' ')
    ws_clean = [w for w in ws if w not in stopwords]
    return ' '.join(ws_clean)

docs_clean = docs.apply(clean_sen)

#Merging all paragraphs as a single string
text = ' '.join(para for para in docs_clean)

#Creating the word cloud
plt.figure(figsize=(10,10))
wc = WordCloud().generate(text)
plt.imshow(wc)

In [None]:
docs_title = df['Title'].fillna('').str.lower().str.replace('[^a-z ]','')
docs_headline = df['Headline'].fillna('').str.lower().str.replace('[^a-z ]','')

In [None]:
stopwords = nltk.corpus.stopwords.words('English')
stemmer = nltk.stem.PorterStemmer()
stopwords.extend([])

docs_title_clean = []
for doc in docs_title:
    words = doc.split(' ')
    wc = [stemmer.stem(word) for word in words if word not in stopwords]
    wc = [word for word in wc if word not in stopwords]
    docs_title_clean.append(wc)

In [None]:
stopwords = nltk.corpus.stopwords.words('English')
stemmer = nltk.stem.PorterStemmer()
stopwords.extend([])

docs_headline_clean = []
for doc in docs_headline:
    words = doc.split(' ')
    wc = [stemmer.stem(word) for word in words if word not in stopwords]
    wc = [word for word in wc if word not in stopwords]
    docs_headline_clean.append(wc)

In [None]:
import gensim

dictionary_t = gensim.corpora.Dictionary(docs_title_clean)
dictionary_hl = gensim.corpora.Dictionary(docs_headline_clean)

In [None]:
docs_bow_title = []
for doc in docs_title_clean:
    bow = dictionary_t.doc2bow(doc)
    docs_bow_title.append(bow)

docs_bow_headline = []
for doc in docs_headline_clean:
    bow = dictionary_t.doc2bow(doc)
    docs_bow_headline.append(bow)

In [None]:
c_scores = []
for i in range(4,20):
    lda_model = LdaMulticore(docs_bow_title,id2word=dictionary_t,num_topics=i,random_state=100)
    coher_model = CoherenceModel(lda_model,corpus=docs_bow_title,coherence='u_mass')
    score = coher_model.get_coherence()
    c_scores.append(score)

In [None]:
l1=list(range(4,20))
d=dict()
j=0
for i in l1:
    d[i]=c_scores[j]
    j=j+1
sns.lineplot(l1,c_scores)

In [None]:
lda_model = LdaMulticore(docs_bow_title,id2word=dictionary_t,num_topics=5,random_state=100)

In [None]:
lda_model.get_document_topics(docs_bow_title[0])

In [None]:
topics_df = pd.DataFrame(lda_model.get_document_topics(docs_bow_title[1]),columns=['Topic','Probablity'])
topics_df.sort_values(by='Probablity').iloc[:]['Topic']

In [None]:
lda_model.print_topics()

In [None]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

In [None]:
nltk.download('wordnet')

In [None]:
temp = df[df['Topic']=='obama']

docs_title = temp['Title'].fillna('').str.lower().str.replace('[^a-z ]','')
docs_headline = temp['Headline'].fillna('').str.lower().str.replace('[^a-z ]','')

stopwords = nltk.corpus.stopwords.words('English')
stemmer = nltk.stem.PorterStemmer()
stopwords.extend(['obama','','barack','say','call','new','obamas','u'])

docs_title_clean = []
for doc in docs_title:
    words = doc.split(' ')
    wc = [lemmatizer.lemmatize(word) for word in words if word not in stopwords]
    wc = [word for word in wc if word not in stopwords]
    docs_title_clean.append(wc)

In [None]:
stopwords = nltk.corpus.stopwords.words('English')
stemmer = nltk.stem.PorterStemmer()
stopwords.extend(['obama','','barack','say','call','new','obamas','u'])

docs_headline_clean = []
for doc in docs_headline:
    words = doc.split(' ')
    wc = [lemmatizer.lemmatize(word) for word in words if word not in stopwords]
    wc = [word for word in wc if word not in stopwords]
    docs_headline_clean.append(wc)

In [None]:
dictionary_t = gensim.corpora.Dictionary(docs_title_clean)
dictionary_hl = gensim.corpora.Dictionary(docs_headline_clean)

docs_bow_title = []
for doc in docs_title_clean:
    bow = dictionary_t.doc2bow(doc)
    docs_bow_title.append(bow)

docs_bow_headline = []
for doc in docs_headline_clean:
    bow = dictionary_t.doc2bow(doc)
    docs_bow_headline.append(bow)

In [None]:
c_scores = []
for i in range(1,10 ):
    lda_model = LdaMulticore(docs_bow_title,id2word=dictionary_t,num_topics=i,random_state=100)
    coher_model = CoherenceModel(lda_model,corpus=docs_bow_title,coherence='u_mass')
    score = coher_model.get_coherence()
    c_scores.append(score)

In [None]:
l1=list(range(1,10))
d=dict()
j=0
for i in l1:
    d[i]=c_scores[j]
    j=j+1
sns.lineplot(l1,c_scores)

In [None]:
lda_model = LdaMulticore(docs_bow_title,id2word=dictionary_t,num_topics=2,random_state=100)

In [None]:
lda_model.get_document_topics(docs_bow_title[0])

In [None]:
topics_df = pd.DataFrame(lda_model.get_document_topics(docs_bow_title[1]),columns=['Topic','Probablity'])
topics_df.sort_values(by='Probablity').iloc[:]['Topic']

In [None]:
lda_model.print_topics()

Topic modeling for Economy

In [None]:
temp_eco = df[df['Topic']=='economy']

docs_title = temp_eco['Title'].fillna('').str.lower().str.replace('[^a-z ]','')
docs_headline = temp_eco['Headline'].fillna('').str.lower().str.replace('[^a-z ]','')

stopwords = nltk.corpus.stopwords.words('English')
stemmer = nltk.stem.PorterStemmer()
stopwords.extend(['economy','us','say','would','could','new',''])

docs_title_clean = []
for doc in docs_title:
    words = doc.split(' ')
    wc = [lemmatizer.lemmatize(word) for word in words if word not in stopwords]
    wc = [word for word in wc if word not in stopwords]
    docs_title_clean.append(wc)

In [None]:
stopwords = nltk.corpus.stopwords.words('English')
stemmer = nltk.stem.PorterStemmer()
stopwords.extend(['economy','us','say','would','could','new',''])

docs_headline_clean = []
for doc in docs_headline:
    words = doc.split(' ')
    wc = [lemmatizer.lemmatize(word) for word in words if word not in stopwords]
    wc = [word for word in wc if word not in stopwords]
    docs_headline_clean.append(wc)

In [None]:
dictionary_e = gensim.corpora.Dictionary(docs_title_clean)
dictionary_eco = gensim.corpora.Dictionary(docs_headline_clean)

docs_bow_title = []
for doc in docs_title_clean:
    bow = dictionary_e.doc2bow(doc)
    docs_bow_title.append(bow)

docs_bow_headline = []
for doc in docs_headline_clean:
    bow = dictionary_e.doc2bow(doc)
    docs_bow_headline.append(bow)

In [None]:
c_scores = []
for i in range(1,10 ):
    lda_model = LdaMulticore(docs_bow_title,id2word=dictionary_e,num_topics=i,random_state=100)
    coher_model = CoherenceModel(lda_model,corpus=docs_bow_title,coherence='u_mass')
    score = coher_model.get_coherence()
    c_scores.append(score)

In [None]:
l1=list(range(1,10))
d=dict()
j=0
for i in l1:
    d[i]=c_scores[j]
    j=j+1
sns.lineplot(l1,c_scores)

In [None]:
lda_model = LdaMulticore(docs_bow_title,id2word=dictionary_e,num_topics=5,random_state=100)

In [None]:
lda_model.get_document_topics(docs_bow_title[0])

In [None]:
topics_df = pd.DataFrame(lda_model.get_document_topics(docs_bow_title[1]),columns=['Topic','Probablity'])
topics_df.sort_values(by='Probablity').iloc[:]

In [None]:
lda_model.print_topics()

In [None]:
 df.head(2)

In [None]:
sns.scatterplot(df.SentimentHeadline,df.textblobHL)

In [None]:
sns.scatterplot(df.SentimentHeadline,df.vaderHL)

In [None]:
plt.scatter(df.SentimentHeadline,df.Facebook)
plt.title('Facebook')
plt.xlabel('Sentiment Headline')
plt.ylabel('Popularity')
plt.show()

plt.scatter(df.SentimentHeadline,df.LinkedIn)
plt.title('LinkedIn')
plt.xlabel('Sentiment Headline')
plt.ylabel('Popularity')
plt.show()

plt.scatter(df.SentimentHeadline,df.GooglePlus)
plt.title('Google Plus')
plt.xlabel('Sentiment Headline')
plt.ylabel('Popularity')
plt.show()