# Trust Models and Social Netoworks: How to Slow the Spread of Online Misinformation

Author: Casey Bates

Repo: [https://github.com/cbates8/COEN-353-Term-Project]()


This project was completed using sample data from [https://github.com/BuzzFeedNews/2016-10-facebook-fact-check]()

#### Access data

In [1]:
import pandas as pd
import numpy as np
from tabulate import tabulate

url = 'https://raw.githubusercontent.com/BuzzFeedNews/2016-10-facebook-fact-check/master/data/facebook-fact-check.csv'

data = pd.read_csv(url, header=0)

RATINGS = ["mostly false", "mixture of true and false", "mostly true", "no factual content"]
FACTUAL_RATINGS = ["mostly false", "mixture of true and false", "mostly true"]


## Control: No Trust Model

#### Number of posts accepted by the model

In [2]:
cntrl_page_grp = data.groupby([ "Page" ])
cntrl_rating_by_page = cntrl_page_grp["Rating"].value_counts().unstack()[RATINGS].fillna(0)
cntrl_rating_by_page["total"] = cntrl_rating_by_page.sum(axis=1)

cntrl_rating_by_page

Rating,mostly false,mixture of true and false,mostly true,no factual content,total
Page,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABC News Politics,0.0,2.0,172.0,26.0,200.0
Addicting Info,8.0,25.0,96.0,11.0,140.0
CNN Politics,0.0,4.0,385.0,20.0,409.0
Eagle Rising,30.0,54.0,121.0,81.0,286.0
Freedom Daily,26.0,26.0,56.0,4.0,112.0
Occupy Democrats,9.0,33.0,102.0,65.0,209.0
Politico,0.0,2.0,528.0,6.0,536.0
Right Wing News,26.0,89.0,142.0,11.0,268.0
The Other 98%,5.0,10.0,67.0,40.0,122.0


#### Number of shares of each post type

In [3]:
cntrl_engage_grp = data.groupby([ "Rating" ])

cntrl_shares_by_rating = cntrl_engage_grp[["share_count"]].sum()

cntrl_shares_by_rating

Unnamed: 0_level_0,share_count
Rating,Unnamed: 1_level_1
mixture of true and false,1225220.0
mostly false,367741.0
mostly true,2736827.0
no factual content,4617344.0


#### Number of shares of each post type, grouped by page

In [4]:
cntrl_engage_grp = data.groupby([ "Page", "Rating" ])

cntrl_shares_by_rating = cntrl_engage_grp[["share_count"]].sum()

cntrl_shares_by_rating

Unnamed: 0_level_0,Unnamed: 1_level_0,share_count
Page,Rating,Unnamed: 2_level_1
ABC News Politics,mixture of true and false,152.0
ABC News Politics,mostly true,6738.0
ABC News Politics,no factual content,938.0
Addicting Info,mixture of true and false,34874.0
Addicting Info,mostly false,13239.0
Addicting Info,mostly true,94458.0
Addicting Info,no factual content,27657.0
CNN Politics,mixture of true and false,956.0
CNN Politics,mostly true,69193.0
CNN Politics,no factual content,4301.0


In [5]:
cntrl_false_grp = data.loc[data["Rating"] == "mostly false"].groupby([ "Page", "Rating" ])
cntrl_mostly_false_shares = cntrl_false_grp[["share_count"]].sum()
cntrl_false_info_shares = int(sum(cntrl_mostly_false_shares.values))

cntrl_mixed_grp = data.loc[data["Rating"] == "mixture of true and false"].groupby([ "Page", "Rating" ])
cntrl_mixture_shares = cntrl_mixed_grp[["share_count"]].sum()
cntrl_mixed_info_shares = int(sum(cntrl_mixture_shares.values))

print(f"Shares containing mostly false information: {cntrl_false_info_shares}")
print(f"Shares containing mixture of true and false information: {cntrl_mixed_info_shares}")
print(f"Shares not containing mostly true information: {cntrl_false_info_shares + cntrl_mixed_info_shares}")

Shares containing mostly false information: 367741
Shares containing mixture of true and false information: 1225220
Shares not containing mostly true information: 1592961


# Beta Reputation Model

In [6]:
def trust_value(all_ratings):
	values = [0, 2]
	R = []
	S = []

	for rating in all_ratings:
		r = (rating - values[0]) / (values[1] - values[0])
		s = 1 - r
		R.append(r)
		S.append(s)

	Rxt = sum(R)
	Sxt = sum(S)

	T = (3 * Rxt + Sxt + 4) / (Rxt + Sxt + 2)
	return T

model_data = data

user_trust_values = {}
factual_ratings = {}
vals = []
for p in model_data.values:
	id = p[0]
	rating = p[7]

	if id not in factual_ratings.keys():
		factual_ratings[id] = []
	if id not in user_trust_values.keys():
		user_trust_values[id] = []
	if rating in FACTUAL_RATINGS:
		factual_ratings[id].append(FACTUAL_RATINGS.index(rating))
	
	user_trust_values[id].append(trust_value(factual_ratings[id]))
	vals.append(trust_value(factual_ratings[id]))

model_data["trust_value"] = vals

### Model 1: Only allow trust values greater than 50%

#### Number of shares of each post type

In [7]:
model_1_df = model_data.loc[model_data["trust_value"] > 1.5]

model_1_engage_grp = model_1_df.groupby([ "Rating" ])

model_1_shares_by_rating = model_1_engage_grp[["share_count"]].sum()

model_1_shares_by_rating

Unnamed: 0_level_0,share_count
Rating,Unnamed: 1_level_1
mixture of true and false,1225220.0
mostly false,360164.0
mostly true,2736827.0
no factual content,4616151.0


#### Number of shares of each post type, grouped by page

In [8]:
model_1_df = model_data.loc[model_data["trust_value"] > 1.5]

model_1_engage_grp = model_1_df.groupby([ "Page", "Rating" ])

model_1_shares_by_rating = model_1_engage_grp[["share_count"]].sum()

model_1_shares_by_rating

Unnamed: 0_level_0,Unnamed: 1_level_0,share_count
Page,Rating,Unnamed: 2_level_1
ABC News Politics,mixture of true and false,152.0
ABC News Politics,mostly true,6738.0
ABC News Politics,no factual content,938.0
Addicting Info,mixture of true and false,34874.0
Addicting Info,mostly false,13239.0
Addicting Info,mostly true,94458.0
Addicting Info,no factual content,27657.0
CNN Politics,mixture of true and false,956.0
CNN Politics,mostly true,69193.0
CNN Politics,no factual content,4301.0


In [9]:
model_1_false_grp = model_1_df.loc[model_1_df["Rating"] == "mostly false"].groupby([ "Page", "Rating" ])
model_1_mostly_false_shares = model_1_false_grp[["share_count"]].sum()
model_1_false_info_shares = int(sum(model_1_mostly_false_shares.values))

model_1_mixed_grp = model_1_df.loc[model_1_df["Rating"] == "mixture of true and false"].groupby([ "Page", "Rating" ])
model_1_mixture_shares = model_1_mixed_grp[["share_count"]].sum()
model_1_mixed_info_shares = int(sum(model_1_mixture_shares.values))

print(f"Shares containing mostly false information: {model_1_false_info_shares}")
print(f"Shares containing mixture of true and false information: {model_1_mixed_info_shares}")
print(f"Shares not containing mostly true information: {model_1_false_info_shares + model_1_mixed_info_shares}")

Shares containing mostly false information: 360164
Shares containing mixture of true and false information: 1225220
Shares not containing mostly true information: 1585384


#### Number of posts accepted by the model

In [10]:
model_1_page_grp = model_1_df.groupby([ "Page" ])

model_1_rating_by_page = model_1_page_grp["Rating"].value_counts().unstack()[RATINGS].fillna(0)
model_1_rating_by_page["total"] = model_1_rating_by_page.sum(axis=1)

model_1_rating_by_page

Rating,mostly false,mixture of true and false,mostly true,no factual content,total
Page,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABC News Politics,0.0,2.0,172.0,26.0,200.0
Addicting Info,8.0,25.0,96.0,11.0,140.0
CNN Politics,0.0,4.0,385.0,20.0,409.0
Eagle Rising,29.0,54.0,121.0,80.0,284.0
Freedom Daily,26.0,26.0,56.0,4.0,112.0
Occupy Democrats,9.0,33.0,102.0,65.0,209.0
Politico,0.0,2.0,528.0,6.0,536.0
Right Wing News,26.0,89.0,142.0,11.0,268.0
The Other 98%,5.0,10.0,67.0,40.0,122.0


### Model 2: Only allow trust values greater than 75%

#### Number of shares of each post type

In [11]:
model_2_df = model_data.loc[model_data["trust_value"] > 2.5]

model_2_engage_grp = model_2_df.groupby([ "Rating" ])

model_2_shares_by_rating = model_2_engage_grp[["share_count"]].sum()

model_2_shares_by_rating

Unnamed: 0_level_0,share_count
Rating,Unnamed: 1_level_1
mixture of true and false,994470.0
mostly false,177070.0
mostly true,2406388.0
no factual content,4362622.0


#### Number of shares of each post type, grouped by page

In [12]:
model_2_df = model_data.loc[model_data["trust_value"] > 2.5]

model_2_engage_grp = model_2_df.groupby([ "Page", "Rating" ])

model_2_shares_by_rating = model_2_engage_grp[["share_count"]].sum()

model_2_shares_by_rating

Unnamed: 0_level_0,Unnamed: 1_level_0,share_count
Page,Rating,Unnamed: 2_level_1
ABC News Politics,mixture of true and false,152.0
ABC News Politics,mostly true,6703.0
ABC News Politics,no factual content,938.0
Addicting Info,mixture of true and false,33642.0
Addicting Info,mostly false,12954.0
Addicting Info,mostly true,91745.0
Addicting Info,no factual content,27657.0
CNN Politics,mixture of true and false,956.0
CNN Politics,mostly true,69179.0
CNN Politics,no factual content,4301.0


In [13]:
model_2_false_grp = model_2_df.loc[model_2_df["Rating"] == "mostly false"].groupby([ "Page", "Rating" ])
model_2_mostly_false_shares = model_2_false_grp[["share_count"]].sum()
model_2_false_info_shares = int(sum(model_2_mostly_false_shares.values))

model_2_mixed_grp = model_2_df.loc[model_2_df["Rating"] == "mixture of true and false"].groupby([ "Page", "Rating" ])
model_2_mixture_shares = model_2_mixed_grp[["share_count"]].sum()
model_2_mixed_info_shares = int(sum(model_2_mixture_shares.values))

print(f"Shares containing mostly false information: {model_2_false_info_shares}")
print(f"Shares containing mixture of true and false information: {model_2_mixed_info_shares}")
print(f"Shares not containing mostly true information: {model_2_false_info_shares + model_2_mixed_info_shares}")

Shares containing mostly false information: 177070
Shares containing mixture of true and false information: 994470
Shares not containing mostly true information: 1171540


#### Number of posts accepted by the model

In [14]:
model_2_page_grp = model_2_df.groupby([ "Page" ])

model_2_rating_by_page = model_2_page_grp["Rating"].value_counts().unstack()[RATINGS].fillna(0)
model_2_rating_by_page["total"] = model_2_rating_by_page.sum(axis=1)

model_2_rating_by_page

Rating,mostly false,mixture of true and false,mostly true,no factual content,total
Page,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABC News Politics,0.0,2.0,170.0,25.0,197.0
Addicting Info,7.0,23.0,92.0,11.0,133.0
CNN Politics,0.0,4.0,383.0,20.0,407.0
Occupy Democrats,9.0,33.0,100.0,64.0,206.0
Politico,0.0,2.0,526.0,6.0,534.0
Right Wing News,0.0,0.0,1.0,0.0,1.0
The Other 98%,5.0,10.0,65.0,39.0,119.0


# Comparing Models

### Number of shares on different types of posts

In [15]:

table = [["Types of post shared", "Control", "Model 1", "Model 2", "Control - M1", "Control - M2"]]

table.append(["mostly false", cntrl_false_info_shares, model_1_false_info_shares, model_2_false_info_shares, cntrl_false_info_shares- model_1_false_info_shares, cntrl_false_info_shares - model_2_false_info_shares])
table.append(["mixture of true and false", cntrl_mixed_info_shares, model_1_mixed_info_shares, model_2_mixed_info_shares, cntrl_mixed_info_shares- model_1_mixed_info_shares, cntrl_mixed_info_shares - model_2_mixed_info_shares])
table.append(["false and mixed", (cntrl_false_info_shares + cntrl_mixed_info_shares), (model_1_false_info_shares + model_1_mixed_info_shares), (model_2_false_info_shares + model_2_mixed_info_shares), (cntrl_false_info_shares + cntrl_mixed_info_shares) - (model_1_false_info_shares + model_1_mixed_info_shares), (cntrl_false_info_shares + cntrl_mixed_info_shares) - (model_2_false_info_shares + model_2_mixed_info_shares)])

print(tabulate(table, headers="firstrow", tablefmt="fancy_grid"))

╒═══════════════════════════╤═══════════╤═══════════╤═══════════╤════════════════╤════════════════╕
│ Types of post shared      │   Control │   Model 1 │   Model 2 │   Control - M1 │   Control - M2 │
╞═══════════════════════════╪═══════════╪═══════════╪═══════════╪════════════════╪════════════════╡
│ mostly false              │    367741 │    360164 │    177070 │           7577 │         190671 │
├───────────────────────────┼───────────┼───────────┼───────────┼────────────────┼────────────────┤
│ mixture of true and false │   1225220 │   1225220 │    994470 │              0 │         230750 │
├───────────────────────────┼───────────┼───────────┼───────────┼────────────────┼────────────────┤
│ false and mixed           │   1592961 │   1585384 │   1171540 │           7577 │         421421 │
╘═══════════════════════════╧═══════════╧═══════════╧═══════════╧════════════════╧════════════════╛


### Number of posts removed

#### Posts removed by Model 1

In [16]:
m_1_differences = pd.DataFrame()

pages = list(cntrl_rating_by_page.T.columns)

for row in pages:
	pgs = list(model_1_rating_by_page.T.columns)
	if row in model_1_rating_by_page.T.columns:
		cntrl_vals = cntrl_rating_by_page.values[pages.index(row)]
		m1_vals = model_1_rating_by_page.values[pgs.index(row)]
		m_1_differences[row] = [cntrl_vals[i] - m1_vals[i] for i in range(len(cntrl_vals))]
	else:
		m_1_differences[row] = cntrl_rating_by_page.values[pages.index(row)]

m_1_differences = m_1_differences.T

cols = [c for c in RATINGS]
cols.append("total")
m_1_differences.columns = cols

m_1_differences

Unnamed: 0,mostly false,mixture of true and false,mostly true,no factual content,total
ABC News Politics,0.0,0.0,0.0,0.0,0.0
Addicting Info,0.0,0.0,0.0,0.0,0.0
CNN Politics,0.0,0.0,0.0,0.0,0.0
Eagle Rising,1.0,0.0,0.0,1.0,2.0
Freedom Daily,0.0,0.0,0.0,0.0,0.0
Occupy Democrats,0.0,0.0,0.0,0.0,0.0
Politico,0.0,0.0,0.0,0.0,0.0
Right Wing News,0.0,0.0,0.0,0.0,0.0
The Other 98%,0.0,0.0,0.0,0.0,0.0


#### Posts removed by Model 2

In [17]:
m_2_differences = pd.DataFrame()

pages = list(cntrl_rating_by_page.T.columns)

for row in pages:
	pgs = list(model_2_rating_by_page.T.columns)
	if row in model_2_rating_by_page.T.columns:
		cntrl_vals = cntrl_rating_by_page.values[pages.index(row)]
		m2_vals = model_2_rating_by_page.values[pgs.index(row)]
		m_2_differences[row] = [cntrl_vals[i] - m2_vals[i] for i in range(len(cntrl_vals))]
	else:
		m_2_differences[row] = cntrl_rating_by_page.values[pages.index(row)]

m_2_differences = m_2_differences.T

cols = [c for c in RATINGS] 
cols.append("total")
m_2_differences.columns = cols

m_2_differences

Unnamed: 0,mostly false,mixture of true and false,mostly true,no factual content,total
ABC News Politics,0.0,0.0,2.0,1.0,3.0
Addicting Info,1.0,2.0,4.0,0.0,7.0
CNN Politics,0.0,0.0,2.0,0.0,2.0
Eagle Rising,30.0,54.0,121.0,81.0,286.0
Freedom Daily,26.0,26.0,56.0,4.0,112.0
Occupy Democrats,0.0,0.0,2.0,1.0,3.0
Politico,0.0,0.0,2.0,0.0,2.0
Right Wing News,26.0,89.0,141.0,11.0,267.0
The Other 98%,0.0,0.0,2.0,1.0,3.0
