In [60]:
#Handles the web scrapping
import requests
from bs4 import BeautifulSoup
import re
import time
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
chromedriver = "/Applications/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
import pickle

#Handles stats and data management
import numpy as np
import pandas as pd
import sklearn
import sys
import random
import matplotlib.pyplot as plt
import math
import scipy
import json
%matplotlib inline
plt.style.use('seaborn')
#NLP
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from nltk.stem import porter

## Scrapping articles from the Huffington Post

In [None]:
#Used to request first page of Huffington Post politics section
url = 'https://www.huffingtonpost.com/section/politics'
response = requests.get(url)
response.status_code

In [None]:
#Beautiful Soups the url
page = response.text
soup = BeautifulSoup(page, "lxml")

In [None]:
#Creates a list of articles
article_link = []
for article in soup.find_all('a', class_="card__link yr-card-headline"):
    link = 'https://www.huffingtonpost.com' + article['href']
    article_link.append(link)
    #article_link = article_link[5:]
    print(article_link)

In [None]:
#Function to cycle through page with links
current_url = 'https://www.huffingtonpost.com/section/politics'
next_url    = 'https://www.huffingtonpost.com/section/politics?page=2'

def url_generator(page_num):
    return 'https://www.huffingtonpost.com/section/politics?page={}'.format(page_num)

In [None]:
def get_article(url):
    '''Function that will get each url'''
    html = requests.get(url).text
    return html

def parse_article(html):
    '''Will find the title, sub title, and main text of Huffington Post articles'''
    soup = BeautifulSoup(html, 'lxml')
    title = soup.find('h1', class_='headline__title').text
    sub_title = soup.find('div', class_='headline__subtitle').text
    body = soup.find('div', class_='entry__text js-entry-text yr-entry-text').text

    article = {
        'title': title,
        'sub_title': sub_title,
        'body': body,
    }
    
    return article

def get_parsed_article_from_link(url):
    '''Runs the parse article function on each url'''
    return parse_article(get_article(url))

In [None]:
#Selenium will start at the current url and iterate through the following pages
driver = webdriver.Chrome(chromedriver)
driver.get(current_url)
time.sleep(1)
pages = [2, 3, 4, 5]
more_articles = []
for x in pages:
    driver.get(url_generator(x))
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, 'lxml')
    for article in soup.find_all('a', class_="card__link yr-card-headline"):
        link = 'https://www.huffingtonpost.com' + article['href']
        more_articles.append(link)

In [None]:
#First pass tries to use requests
list_o_articles = []
problem_articles = []
for text in article_link[5:-12]:
    #print(text)
    try:
        art = get_parsed_article_from_link(text.encode())
        print(art)
        list_o_articles.append(art)
    
    except:
        print("Problem processing url " + text)
        problem = text
        problem_articles.append(problem)
    time.sleep(3)

In [None]:
#Selenium gathers articles that were passed into the problem articles list
driver = webdriver.Chrome(chromedriver)
for x in problem_articles:
    driver.get(x)
    time.sleep(3)
    soupy = BeautifulSoup(driver.page_source, 'lxml')
    title = soupy.find('h1', class_='headline__title').text
    sub_title = soupy.find('div', class_='headline__subtitle').text
    body = soupy.find('div', class_='entry__text js-entry-text yr-entry-text').text

    articley = {
        'title': title,
        'sub_title': sub_title,
        'body': body,
    }

    problem_articles.append(articley)

In [None]:
#First pass at expanded list of articles
more_list_o_articles = []
more_problem_articles = []
for text in more_articles:
    #print(text)
    try:
        art = get_parsed_article_from_link(text.encode())
        print(art)
        more_list_o_articles.append(art)
    
    except:
        print("Problem processing url " + text)
        problem = text
        more_problem_articles.append(problem)
    time.sleep(3)

In [None]:
#Selenium takes a second pass at rejected request articles
driver = webdriver.Chrome(chromedriver)
for x in more_problem_articles:
    driver.get(x)
    time.sleep(3)
    soupy = BeautifulSoup(driver.page_source, 'lxml')
    title = soupy.find('h1', class_='headline__title').text
    sub_title = soupy.find('div', class_='headline__subtitle').text
    body = soupy.find('div', class_='entry__text js-entry-text yr-entry-text').text

    articley = {
        'title': title,
        'sub_title': sub_title,
        'body': body,
    }

    more_list_o_articles.append(articley)

In [None]:
#Save lists as pickle files
with open('list_o_articles.pkl', 'wb') as picklefile:
        pickle.dump(list_o_articles, picklefile)
with open('more_list_o_articles.pkl', 'wb') as picklefile:
        pickle.dump(more_list_o_articles, picklefile)

## Scrapping articles from Reuters

In [None]:
#Requests Reuters politics section
url_rueters = 'https://www.reuters.com/politics'
response = requests.get(url_rueters)
response.status_code

In [None]:
def get_case_links_from_rueters(html):
    """
    Function scrolls through Reuters politics section for more links
    """
    driver.get(html)
    time.sleep(3)

    SCROLL_PAUSE_TIME = 0.5
    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")
    i = 0
    while i < 11:
    # Scroll down to bottom
        soup = BeautifulSoup(driver.page_source, 'lxml')
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    # Wait to load page
        time.sleep(SCROLL_PAUSE_TIME)
        i += 1
    return list(set(article['href'] for article in soup.find_all('a')))

In [None]:
#Gets links from Reuters politics section
driver = webdriver.Chrome(chromedriver)
rueters_politics = get_case_links_from_rueters(url_rueters)

In [None]:
#Cleans the list of articles from Reuters politics section
links = []
regex = re.compile("https:\/\/www.reuters.com\/article\/[^']+")
linkstr = re.findall(regex, str(rueters_politics))
links.append(linkstr)
print(linkstr)

In [None]:
#Gets rid of the list
links_ = []
for linklist in links:
    for link in linklist:
        links_.append(link)
for ln in links_:
    print(ln)

In [None]:
def get_rueters_article(url):
    '''Makes requests for Reuters articles url'''
    page = requests.get(url)
    html = page.text
    return html

def parse_rueters_article(html):
    '''Takes the title and text from the url'''
    soup = BeautifulSoup(html, 'lxml')
    title = soup.find('h1', class_='headline_2zdFM').text
    body = soup.find('div', class_='body_1gnLA').text

    article = {
        'title': title,
        'sub_title': sub_title,
        'body': body,
    }
    
    return article

def get_parsed_article_from_link(url):
    '''Works the parse article function on each url'''
    return parse_rueters_article(get_rueters_article(url))

In [None]:
#First pass on Reuters risk of articles
rueters_list_o_articles = []
rueters_problem_articles = []
for text in links_:
    try:
        art = get_parsed_article_from_link(text.encode())
        print(art)
        rueters_list_o_articles.append(art)
    
    except:
        print("Problem processing url " + text)
        problem = text
        rueters_problem_articles.append(problem)
    time.sleep(3)

In [None]:
#Selenium takes a second pass at Reuters articles
driver = webdriver.Chrome(chromedriver)
for x in links_:
    driver.get(x)
    time.sleep(3)
    soupy = BeautifulSoup(driver.page_source, 'lxml')
    title = soupy.find('h1', class_='headline_2zdFM').text
    sub_title = 'That percentage has barely budged since last y.'
    body = soupy.find('div', class_='body_1gnLA').text

    articley = {
        'title': title,
        'sub_title': sub_title,
        'body': body,
    }
    print(articley)
    rueters_list_o_articles.append(articley)

In [None]:
#Saves list as a pickle file
with open('rueters_list_o_articles.pkl', 'wb') as picklefile:
        pickle.dump(rueters_list_o_articles, picklefile)

## Scrapping articles from Breitbart

In [None]:
#Gets the politics section of Breitbart
url_breitbart = 'http://www.breitbart.com/big-government/'
response = requests.get(url_breitbart)
response.status_code

In [None]:
def get_case_links_from_html(html):
    """
    Beautiful Soup function for getting links from page
    """
    soup = BeautifulSoup(html, 'lxml')
    return list(set(article['href'] for article in soup.find_all('a', class_="tumbnail-url")))

def get_case_links_from_page(page_num, driver):
    '''Used to iterate through Breitbart pages'''
    url = url_generator(page_num)
    driver.get(url)
    time.sleep(1)
    html = driver.page_source
    return get_case_links_from_html(html)

In [None]:
#Starts at the first politics page and creates function to continue on
current_url = 'http://www.breitbart.com/big-government/'
next_url    = 'http://www.breitbart.com/big-government/page/2/'

def url_generator(page_num):
    return 'http://www.breitbart.com/big-government/page/{}/'.format(page_num)

In [None]:
#Gets list of links from Breitbart politics page
driver = webdriver.Chrome(chromedriver)
driver.get(current_url)
time.sleep(1)
pages = [2, 3, 4, 5]
b_article = []
soup = BeautifulSoup(driver.page_source, 'lxml')
really_b_articles = []
for article in soup.find_all('a', class_="thumbnail-url"):
    link = article['href']
    really_b_articles.append(link)
    #article_link = article_link[5:]
for x in pages:
    driver.get(url_generator(x))
    time.sleep(3)
    soup = BeautifulSoup(driver.page_source, 'lxml')
    for article in soup.find_all('a', class_="thumbnail-url"):
        link = article['href']
        b_article.append(link)

In [None]:
def get_b_article(url):
    '''Gets text from url'''
    page = requests.get(url)
    html = page.text
    return html

def parse_b_article(html):
    '''Gets title and main text from Breitbart page'''
    soup = BeautifulSoup(html, 'lxml')
    title = soup.find('h1').text
    body = soup.find('div', class_='entry-content').text

    article = {
        'title': title,
        'sub_title': sub_title,
        'body': body,
    }
    
    return article

def get_parsed_article_from_link(url):
    '''Runs function on each url'''
    return parse_b_article(get_b_article(url))

In [None]:
#First pass at Breitbart articles
h_list_o_articles = []
h_problem_articles = []
for text in b_article:
    try:
        art = get_parsed_article_from_link(text.encode())
        print(art)
        h_list_o_articles.append(art)
    
    except:
        print("Problem processing url " + text)
        problem = text
        h_problem_articles.append(problem)
    time.sleep(3)

In [None]:
#Fixes url issues
for x in really_b_articles[:15]:
    y = 'http://www.breitbart.com' + x
    really_b_articles.append(y)

In [None]:
with open('h_list_o_articles.pkl', 'wb') as picklefile:
        pickle.dump(h_list_o_articles, picklefile)

In [None]:
#First pass at other pages of Breitbart articles
really_b_list_o_articles = []
really_b_problem_articles = []
for text in really_b_articles:
    try:
        art = get_parsed_article_from_link(text.encode())
        print(art)
        really_b_list_o_articles.append(art)
    
    except:
        print("Problem processing url " + text)
        problem = text
        really_b_problem_articles.append(problem)
    time.sleep(3)

In [None]:
#Saves the pickle file
with open('really_b_list_o_articles.pkl', 'wb') as picklefile:
        pickle.dump(really_b_list_o_articles, picklefile)

## Loading into MongoDB

In [13]:
#Creates a data frame for Huffington Post articles
huffpo1 = pd.DataFrame.from_dict(list_o_articles)
huffpo2 = pd.DataFrame.from_dict(more_list_o_articles)
huffpo_df = pd.concat([huffpo1,huffpo2])

In [14]:
#As a numerical category and string source
huffpo_df['topic'] = 1
huffpo_df['source'] = 'Huffington Post'

In [15]:
#Creates a data frame for Reuters articles
rueters = pd.DataFrame.from_dict(rueters_list_o_articles)
rueters.head()

Unnamed: 0,body,sub_title,title
0,SEOUL (Reuters) - South Korea’s presidential B...,That percentage has barely budged since last y.,South Korea says release of U.S. prisoners 'po...
1,WASHINGTON (Reuters) - President Donald Trump’...,That percentage has barely budged since last y.,Trump pick for State Department energy job app...
2,MOSCOW (Reuters) - Russian President Vladimir ...,That percentage has barely budged since last y.,Russia's Putin deeply concerned at USA leaving...
3,WASHINGTON (Reuters) - U.S. President Donald T...,That percentage has barely budged since last y.,Trump says he will propose new tax cuts prior ...
4,"SINGAPORE (Reuters) - Sinopec, Asia’s largest ...",That percentage has barely budged since last y.,China's Sinopec to boost U.S. crude imports to...


In [16]:
#As a numerical category and string source
rueters['topic'] = 2
rueters['source'] = 'Rueters'

In [18]:
#Creates a data frame for Breitbart articles
breitbart1 = pd.DataFrame.from_dict(really_b_list_o_articles)
breitbart2 = pd.DataFrame.from_dict(h_list_o_articles)
breitbart_df = pd.concat([breitbart1,breitbart2])

In [19]:
#As a numerical category and string source
breitbart_df['topic'] = 3
breitbart_df['source'] = 'Breitbart'

In [20]:
#Combines all parts into one data frame
news_bias_df= pd.concat([huffpo_df, rueters, breitbart_df])
news_bias_df = news_bias_df.reset_index(drop=True)

In [22]:
#Launches Mongo Client
from pymongo import MongoClient

client = MongoClient()
news_bias = client.project4.news_bias

In [23]:
#See where in mongo to place the articles
client.database_names()

['admin', 'catalog', 'companies', 'config', 'events', 'local']

In [24]:
#Create news bias events
db = client.events
news_bias = db.news_bias

In [None]:
#Loads data frame into Mongo
news_articles = json.loads(news_bias_df.T.to_json()).values()
news_bias.insert(news_articles)

In [None]:
#Sanity check
news_bias.count()

In [37]:
#Lists articles that are duplicates
list(news_bias.aggregate([{'$group' : {'_id': '$title', 'count': {'$sum': 1}}},
    {'$match': {'count': {'$gte': 2}}},
    ]))

[{'_id': 'Unions Give $1.3 Billion in Member Dues to Left-Wing Groups',
  'count': 2},
 {'_id': 'Report: Rank-and-File FBI Agents Eager to Blow Whistle on Comey, Holder, Lynch',
  'count': 2},
 {'_id': 'WV Senate Watch: Joe Manchin Internal Poll Claims Lead over Morrisey',
  'count': 2},
 {'_id': 'Pompeo Says Trump Will ‘Walk Away’ if North Korea Deal Fails: ‘A Bad Deal Is Not an Option’',
  'count': 2},
 {'_id': 'Pentagon Disinvites China from Naval Exercise for Militarizing the South China Sea',
  'count': 2},
 {'_id': 'Judge Rules President Donald Trump Cannot Block Twitter Trolls',
  'count': 2},
 {'_id': 'Muslim Republican Candidate Omar Qudrat: ‘I Am Against Sharia Law’',
  'count': 2},
 {'_id': 'Dave Grohl Wants To Apologize To The World For ‘Massive Jerk’ Trump',
  'count': 4},
 {'_id': 'Delingpole: Global Warming Has Rotted the Brains of the Political Class',
  'count': 2},
 {'_id': 'NFL Bans Kneeling During The National Anthem', 'count': 4},
 {'_id': 'Philip Roth Once Torched

In [32]:
#Create cursor
cursor = news_bias.find()

In [33]:
#Loads from Mongo
news_bias_df = pd.DataFrame(list(cursor))

In [35]:
#check the data frame
news_bias_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 605 entries, 0 to 604
Data columns (total 6 columns):
_id          605 non-null object
body         605 non-null object
source       605 non-null object
sub_title    605 non-null object
title        605 non-null object
topic        605 non-null int64
dtypes: int64(1), object(5)
memory usage: 28.4+ KB


## Dropping redundant articles

In [36]:
#Get a list of duplicate titles
news_bias_df.title.value_counts()

Dave Grohl Wants To Apologize To The World For ‘Massive Jerk’ Trump                                                 4
When You Should Replace Your Bras, According To Lingerie Experts                                                    4
Emilia Clarke Introduced Herself To Prince William In A Pretty Embarrassing Way                                     4
Bad Lip Reading Reveals What Was Really Said At The Royal Wedding                                                   4
Michelle Obama Reflects On ‘Scary’ Time At Princeton With Sweet Throwback Snap                                      4
NFL Bans Kneeling During The National Anthem                                                                        4
Philip Roth Once Torched Donald Trump In The Most Literary Way                                                      4
Trevor Noah Reveals The Question He’d Ask Donald Trump That No Else Has Dared To                                    4
Robert De Niro Bans Trump From Every Nobu Restaurant    

In [None]:
#See what cases the article appears in
huh = news_bias_df['title'] == "Delingpole: Global Warming Has Rotted the Brains of the Political Class"
wha = news_bias_dff[huh]
wha

In [None]:
#Drop the redundant cases and repeate step 1
label = [597]
news_bias_df = news_bias_df.drop(labels=label,axis=0)

In [None]:
#Save the pickle file
with open('news_bias_df.pkl', 'wb') as picklefile:
        pickle.dump(news_bias_df, picklefile)

## Fitting a model

In [None]:
#Readies for stop words and stem words
stemmer = porter.PorterStemmer()
stopwords = stopwords.words()

In [None]:
def clean_text(text):
    '''Removes stop words and changes word to stem words'''
    cleaned_text = []
    for post in text:
        cleaned_words = []
        for word in post.split():
            low_word = stemmer.stem(word.lower())
            if low_word not in stopwords:
                cleaned_words.append(low_word)
        cleaned_text.append(' '.join(cleaned_words))
    return cleaned_text

In [None]:
main_body = news_bias_df.body

In [None]:
#Cleans the text
cleaned_text = clean_text(main_body)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
#Create a matrix of word counts
count_vectorizer2 = CountVectorizer(ngram_range=(1, 4),  
                                   stop_words='english', 
                                   token_pattern="\\b[a-z][a-z]+\\b",
                                   lowercase=True,
                                   max_df = 0.6)

X = count_vectorizer2.fit_transform(cleaned_text)

In [None]:
from sklearn.decomposition import LatentDirichletAllocation
#Fit an LDA model
n_topics = 3
n_iter = 10
lda = LatentDirichletAllocation(n_topics=n_topics,
                                max_iter=n_iter,
                                random_state=42,
                               learning_method='online')
X_centered_projected = lda.fit_transform(X)


In [None]:
def display_topics(model, feature_names, no_top_words):
    '''Creates a list of words in each topics'''
    for ix, topic in enumerate(model.components_):
        print("Topic ", ix+1)
        print(" ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))
        
display_topics(lda,count_vectorizer2.get_feature_names(),120)

In [None]:
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans

#Creates a list of silhouette scores for KMeans
SSEs = []
Sil_coefs = []
for k in range(2,15):
    km = KMeans(n_clusters=k, random_state=1)
    km.fit(X_centered_projected)
    labels = km.labels_
    Sil_coefs.append(silhouette_score(X_centered_projected, labels, metric='euclidean'))
    SSEs.append(km.inertia_) 

In [None]:
#Graphs to show the silhouette scores
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(15,5), sharex=True, dpi=200)
k_clusters = range(2,15)
ax1.plot(k_clusters, Sil_coefs)
ax1.set_xlabel('number of clusters')
ax1.set_ylabel('silhouette coefficient')

ax2.plot(k_clusters, SSEs)
ax2.set_xlabel('number of clusters')
ax2.set_ylabel('SSE');

In [None]:
#Fit KMeans
km = KMeans(n_clusters=3)
clusts = km.fit_predict(X_centered_projected)

In [None]:
#Plot two of the dimenisons
plt.scatter(X_centered_projected[:, 0], X_centered_projected[:, 1], c=clusts, cmap=plt.cm.rainbow, alpha=0.25)


In [None]:
from sklearn.manifold import TSNE
#Fits t-SNE for visualization
tsne = TSNE(n_components=2)
X_transformed = tsne.fit_transform(X_centered_projected)

In [None]:
#Graphs t-SNE
plt.figure(dpi=100)

cmap = plt.cm.get_cmap('rainbow', 10)
plt.scatter(X_transformed[:, 0], X_transformed[:,1], c=clusts, cmap=cmap, alpha = 0.15)
#plt.xlim(-20,30)
#plt.ylim(-30,-10)
plt.colorbar()
plt.clim()

In [None]:
#Adds a column for all of the cluster
news_bias_df['topic_model'] = clusts

In [None]:
#Check the number of cases in each cluster
news_bias_df.topic_model.value_counts()

In [None]:
#Creates a data frame for quick reference
reference = news_bias_df[['source','topic_model']].pivot_table(index='source', columns='topic_model', aggfunc=len)

## Calculating the scores

In [39]:
reference

topic_model,0,1,2
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Breitbart,48,79,53
Huffington Post,33,56,62
Rueters,120,89,10


In [45]:
#Creates a list of each publications articles in each topic
Breitbart_0 = 48
Breitbart_1 = 79
Breitbart_2 = 53
Huffington_Post_0 = 33
Huffington_Post_1 = 56
Huffington_Post_2 = 62
Rueters_0 = 120
Rueters_1 = 89
Rueters_2 = 10

In [47]:
#Creates the totals
Breitbart = (Breitbart_0 + Breitbart_1 + Breitbart_2)
Huffington_Post = (Huffington_Post_0 + Huffington_Post_1 + Huffington_Post_2)
Rueters = (Rueters_0 + Rueters_1 + Rueters_2)
topic_0 = (Breitbart_0 + Huffington_Post_0 + Rueters_0)
topic_1 = (Breitbart_1 + Huffington_Post_1 + Rueters_1)
topic_2 = (Breitbart_2 + Huffington_Post_2 + Rueters_2)
print("Total Breitbart articles:", Breitbart)
print("Total Huffington Post articles:", Huffington_Post)
print("Total Rueters articles:", Rueters)
print("Total topic 0 articles:", topic_0)
print("Total topic 1 articles:", topic_1)
print("Total topic 2 articles:", topic_2)

Total Breitbart articles: 180
Total Huffington Post articles: 151
Total Rueters articles: 219
Total topic 0 articles: 201
Total topic 1 articles: 224
Total topic 2 articles: 125


In [48]:
#Creates the sum of scores to convert all scores to equal 1
Breitbart_sum = (((Breitbart_0 / Breitbart) * (Breitbart_0 / topic_0)) + 
                 ((Breitbart_1 / Breitbart) * (Breitbart_1 / topic_1)) + ((Breitbart_2 / Breitbart) * (Breitbart_2 / topic_2)))
Huffington_Post_sum =(((Huffington_Post_0 / Huffington_Post) * (Huffington_Post_0 / topic_0)) + 
                      ((Huffington_Post_1 / Huffington_Post) * (Huffington_Post_1 / topic_1)) + 
                      ((Huffington_Post_2 / Huffington_Post) * (Huffington_Post_2 / topic_2)))
Rueters_sum = (((Rueters_0 / Rueters) * (Rueters_0 / topic_0)) + 
                ((Rueters_1 / Rueters) * (Rueters_1 / topic_1)) + 
                ((Rueters_2 / Rueters) * (Rueters_2 / topic_2)))
print("Breitbart sum of scores", Breitbart_sum)
print("Huffington Post sum of scores", Huffington_Post_sum)
print("Rueters sum of scores", Rueters_sum)


Breitbart sum of scores 0.34331274283345176
Huffington Post sum of scores 0.3322510625679549
Rueters sum of scores 0.49225295976088246


In [55]:
print("Huffpo left wing score:", round((Huffington_Post_2 / Huffington_Post) *  (Huffington_Post_2 / topic_2) / Huffington_Post_sum, 2))
print("Huffpo right wing score:", round((Huffington_Post_1 / Huffington_Post)  * (Huffington_Post_1 / topic_1) / Huffington_Post_sum, 2))
print("Huffpo center score:", round((Huffington_Post_0 / Huffington_Post) * (Huffington_Post_0 / topic_0) / Huffington_Post_sum, 2))

Huffpo left wing score: 0.61
Huffpo center score: 0.28
Huffpo right wing score: 0.11


In [57]:
print("Breitbart left wing score:", round((Breitbart_2 / Breitbart) *  (Breitbart_2 / topic_2) / Breitbart_sum, 2))
print("Breitbart right wing score:", round((Breitbart_1 / Breitbart) *  (Breitbart_1 / topic_1) / Breitbart_sum, 2))
print("Breitbart center score:", round((Breitbart_0 / Breitbart) *  (Breitbart_0 / topic_0) / Breitbart_sum, 2))

Breitbart left wing score: 0.36
Breitbart right wing score: 0.45
Breitbart center score: 0.19


In [58]:
print("Rueters left wing score:", round((Rueters_2 / Rueters) *  (Rueters_2 / topic_2) / Rueters_sum, 2))
print("Rueters right wing score:", round((Rueters_1 / Rueters) *  (Rueters_1 / topic_1) / Rueters_sum, 2))
print("Rueters center score:", round((Rueters_0 / Rueters) *  (Rueters_0 / topic_0) / Rueters_sum, 2))

Rueters left wing score: 0.01
Rueters right wing score: 0.33
Rueters center score: 0.66
