# Organizational Bias Exploration 
#### Pulling data from MediaBiasFactCheck.com and AllSides.com

Matthew Fishman, Practicum Milestone 1

### Get/Clean Data

In [1]:
import json, pandas as pd
from pandas.io.json import json_normalize

# AllSides data
dfAllSides = pd.read_json('AllSidesALL.json')
dfAllSides = dfAllSides.set_index('name')
dfAllSides['total_votes'] = dfAllSides.apply(lambda row: row.agree + row.disagree, axis=1)
dfAllSides = dfAllSides[['bias','confidence','agree_ratio','total_votes','agreeance_text']]  #website_url omitted
dfAllSides = dfAllSides.rename(columns={'agreeance_text':'community_response'})


# MediaBiasFactCheck data
dfMBFC = pd.read_json('MBFCsourcesALL.json')
dfMBFC = dfMBFC.T
dfMBFC = dfMBFC.set_index("name")
dfMBFC = dfMBFC[dfMBFC.bias != '']
dfMBFC = dfMBFC[['bias','homepage','facebook_url']]
dfMBFC = dfMBFC.rename(columns={'homepage':'website_url'})

### Normalize Data

In [None]:
# Figure out possible values
set(dfAllSides.bias)
#{'allsides', 'center', 'lean left', 'lean right', 'left', 'mixed', 'right'}

set(dfMBFCAdj.bias)
#{center', 'conspiracy', 'fake-news', 'left', 'left-center', 'pro-science', 'right', 'right-center', 'satire'}


In [4]:
# Drop 'mixed/allsides' sites that give perspectives from both sides of the policial spectrum
dfAllSides = dfAllSides[(dfAllSides.bias != 'mixed')
                       & (dfAllSides.bias != 'allsides')]


# Drop non-political scale sources (about 900/2500)
dfMBFC = dfMBFC[(dfMBFC.bias != 'conspiracy') 
                & (dfMBFC.bias != 'fake-news') 
                & (dfMBFC.bias !='pro-science') 
                & (dfMBFC.bias != 'satire')]

# Remove the four duplicates
dfMBFC = dfMBFC[~dfMBFC.index.duplicated(keep='first')]

dfAllSides

Unnamed: 0_level_0,bias,confidence,agree_ratio,total_votes,community_response
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABC News,left-center,High,1.347916,18248,somewhat agrees
Al Cardenas,right,Not Available,1.606635,550,agrees
Al Jazeera,center,Low or Initial Rating,0.675543,7173,somewhat disagrees
Allysia Finley (Wall Street Journal),right,Not Available,0.927007,792,somewhat disagrees
AlterNet,left,High,2.693215,2504,strongly agrees
American Spectator,right,Medium,2.364276,7647,strongly agrees
American Thinker,right,Medium,1.818841,1945,agrees
Andrew Napolitano,right,Not Available,1.398104,1012,somewhat agrees
Andrew Sullivan,right-center,Not Available,1.750000,33,agrees
Ann Coulter,right,Medium,3.933852,1268,absolutely agrees


In [None]:
#get total ratings for AllSides
dfAllSides.shape

In [None]:
#get total ratings for AllSides
dfMBFC.shape

## Data Visualizations

In [None]:
#get number of agreed-upon ratings
# dfMerged = pd.merge(dfAllSides, dfMBFC, how='inner')
# dfMerged = dfMerged.drop_duplicates()
# dfMerged
#dfMerged.shape

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")

#Get most popular agreed-upon news sources
dfMergedPop = dfMerged.loc[dfMerged['total_votes'] > 93]

#Graph them
ax = sns.stripplot('bias', 'Popularity', data=dfMergedPop, 
                   order=['left', 'left-center', 'center', 'right-center', 'right'], 
                   palette=sns.diverging_palette(258, 12, n=5), edgecolor='black', linewidth=.5)
plt.title('Top 25 Media Sources with Confirmed Bias')
plt.xlabel('Political Bias')
plt.ylabel('Popularity')

#Label them
def label_point(category, y, val, ax):
    spacing = .1
    a = pd.concat({'x': category, 'y': y, 'val': val}, axis=1)
    for j, point in a.iterrows():
        ax.text(point['x']+.02, point['y']+spacing, str(point['val']))
        spacing *= -1


dfBiasLoc = dfMergedPop.bias
dfMergedPop.loc[dfMergedPop.bias == "left", 'pos'] = 0.1
dfMergedPop.loc[dfMergedPop.bias == "left-center", 'pos'] = 1.1
dfMergedPop.loc[dfMergedPop.bias == "center", 'pos'] = 2.1
dfMergedPop.loc[dfMergedPop.bias == "right-center", 'pos'] = 3.1
dfMergedPop.loc[dfMergedPop.bias == "right", 'pos'] = 4.1
label_point(dfMergedPop.pos, dfMergedPop.Popularity, dfMergedPop.name, plt.gca())  

In [108]:
#Export cleaned data
dfAllSides.to_json(r'C:\Users\Matthew\OneDrive\Semester 8\Data Science Capstone\PolitiGauge\notebooks\OrganizationalBias\CleanedAllSides.json',orient="index")
dfMBFC.to_json(r'C:\Users\Matthew\OneDrive\Semester 8\Data Science Capstone\PolitiGauge\notebooks\OrganizationalBias\CleanedMBFC.json',orient="index")