In [1]:
import pandas as pd
import pickle
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from IPython import display as dis
import csv
import itertools as iter
%matplotlib inline

####Season numbering (temp until fix wiki_scrape)

In [2]:
# Season numbering
reader = csv.reader(open('season_nums.csv', 'rb'))
season_nums = [{r[0] : int(r[1])} for r in reader]   

snums = [pd.DataFrame.from_dict(s, orient='index') for s in season_nums]
snums = pd.concat(snums).reset_index()
snums.columns = ['season', 'season_num']

#### TODO: writefile and import

In [3]:
# votes = pickle.load( open("process_votes.p", "rb" ) )
# graphs = pickle.load( open("make_graphs.p", "rb" ) )
seasons = pickle.load( open( "wiki_scrape.p", "rb" ) )

In [4]:
def compare_votes(votes_all):
    # Create a contestant by contestant matrix corresponding to the number of like votes
    
    # Drop columns corresponsing to final counsel (these votes are a different dynamic)
    votes = votes_all.drop(votes_all.columns[votes_all.iloc[0, :] == "Jury Vote"], 1)
    
    like_votes = np.array([None] * votes.shape[0]**2).reshape((votes.shape[0], votes.shape[0]))

    for (i, j) in iter.combinations(range(votes.shape[0]), 2):
        like_votes[i, j] = sum((votes.iloc[i, :] == votes.iloc[j, :]) & #same vote
                               (votes.iloc[i, :] !=  u' \u2014') & #ignore '-' (didn't vote)
                               (votes.iloc[i, :] !=  None) & # ignore 'None' (didn't vote)
                               (votes.iloc[i, :] != '')) #ignore blanks (already eliminated)

    like_votes = pd.DataFrame(like_votes)
    like_votes.index = votes.index
    like_votes.columns = votes.index
    return like_votes

In [5]:
def make_graphs(votes):
    
    # Create adjacency matrix
    votes = votes.fillna(0) #convert None to zeroes
    votes_matrix = votes.as_matrix() #creates numpy array
    votes_matrix = np.matrix(votes_matrix) #creates matrix

    # Make graph
    G = nx.from_numpy_matrix(votes_matrix)
    
    # Rename nodes
    names = list(votes.columns.values)
    nodeints = list(range(len(names)))
    rename = {}
    for i in nodeints:
        rename[i] = names[i]
    G = nx.relabel_nodes(G, rename)
        
    return G

In [6]:
def centrality_scores(vote_matrix, season_graph):
    deg = nx.degree(season_graph)
    deg = {k: round(v,1) for k,v in deg.iteritems()}

    close = nx.closeness_centrality(season_graph)
    close = {k: round(v,3) for k,v in close.iteritems()}

    btw = nx.betweenness_centrality(season_graph)
    btw = {k: round(v,3) for k,v in btw.iteritems()}

    eig = nx.eigenvector_centrality_numpy(season_graph)
    eig = {k: round(v,3) for k,v in eig.iteritems()}
    
    page = nx.pagerank(season_graph)
    page = {k: round(v,3) for k,v in page.iteritems()}

    # Add contestant placement (rank)
    order = list(vote_matrix.index)
    place_num = list(range(len(order)))
    place = {order[i]:i+1 for i in place_num}
    
    names = season_graph.nodes()

    # Build a table with centralities 
    table=[[name, deg[name], close[name], btw[name], eig[name], page[name], place[name]] for name in names]

    # Convert table to pandas df
    headers = ['name', 'deg', 'close', 'btw', 'eig', 'page', 'place']
    df = pd.DataFrame(table, columns=headers)
    df = df.sort(['page', 'eig', 'deg'], ascending=[0, 0, 0])
    
    return df

### Episode Scores

In [7]:
def episode_scores(season, vote_matrix):
    
    # Turn vote matrix into graph object
    V = compare_votes(vote_matrix)
    
    G = make_graphs(V)
    
    # Calculate scores
    C = centrality_scores(vote_matrix, G)
    
    # Binary classification of winners (1) and losers (0)
    C['place'] = np.where(C['place'] == 1, 1, 0)
                
    # Return dataframe
    return C

###Test Full Season

In [8]:
example = 'Borneo'
ev = seasons['Borneo']['votes']

episode_scores(example, ev)

Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,6,0.6,0.026,0.507,0.108,0
6,Susan,6,0.6,0.026,0.493,0.105,0
0,Richard,6,0.6,0.026,0.469,0.095,1
10,Kelly,8,0.652,0.271,0.354,0.093,0
14,Colleen,7,0.625,0.095,0.034,0.09,0
15,Jenna,11,0.789,0.405,0.093,0.087,0
13,Sean,6,0.6,0.026,0.359,0.074,0
1,Gervase,6,0.556,0.01,0.015,0.067,0
3,Gretchen,6,0.556,0.01,0.017,0.061,0
7,Joel,5,0.517,0.0,0.012,0.051,0


### Test Part Season

In [9]:
ev2 = ev.copy()
ev2 = ev2.drop(ev2.columns[ev2.iloc[0, :] == "Jury Vote"], 1)
players = len(ev2.columns)
episodes = []
active = list(ev2.index)

for i in range(0, players):
    episodes.append(i)
    del active[-1]
    episodes_cumulative = pd.DataFrame(ev2[ev2.columns[episodes]])
    current_scores = episode_scores(example, episodes_cumulative)
    current_scores = current_scores[current_scores['name'].isin(active)]
    dis.display(current_scores)
    

Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,3,0.2,0,0.5,0.12,0
6,Susan,3,0.2,0,0.5,0.12,0
8,Dirk,3,0.2,0,0.5,0.12,0
13,Sean,3,0.2,0,0.5,0.12,0
10,Kelly,2,0.133,0,-0.0,0.12,0
11,Stacey,2,0.133,0,-0.0,0.12,0
0,Richard,0,0.0,0,-0.0,0.018,1
1,Gervase,0,0.0,0,-0.0,0.018,0
2,Ramona,0,0.0,0,0.0,0.018,0
3,Gretchen,0,0.0,0,0.0,0.018,0


Unnamed: 0,name,deg,close,btw,eig,page,place
1,Gervase,5,0.333,0,0.408,0.066,0
2,Ramona,5,0.333,0,0.408,0.066,0
3,Gretchen,5,0.333,0,0.408,0.066,0
7,Joel,5,0.333,0,0.408,0.066,0
14,Colleen,5,0.333,0,0.408,0.066,0
15,Jenna,5,0.333,0,0.408,0.066,0
4,Rudy,3,0.2,0,0.0,0.066,0
6,Susan,3,0.2,0,0.0,0.066,0
8,Dirk,3,0.2,0,0.0,0.066,0
13,Sean,3,0.2,0,0.0,0.066,0


Unnamed: 0,name,deg,close,btw,eig,page,place
10,Kelly,2,0.133,0,-0.0,0.069,0
4,Rudy,4,0.267,0,0.479,0.068,0
6,Susan,4,0.267,0,0.479,0.068,0
8,Dirk,4,0.267,0,0.479,0.068,0
13,Sean,4,0.267,0,0.479,0.068,0
1,Gervase,5,0.333,0,-0.0,0.063,0
2,Ramona,5,0.333,0,-0.0,0.063,0
3,Gretchen,5,0.333,0,-0.0,0.063,0
7,Joel,5,0.333,0,-0.0,0.063,0
14,Colleen,5,0.333,0,-0.0,0.063,0


Unnamed: 0,name,deg,close,btw,eig,page,place
10,Kelly,2,0.133,0,-0.0,0.069,0
4,Rudy,4,0.267,0,-0.0,0.068,0
6,Susan,4,0.267,0,-0.0,0.068,0
8,Dirk,4,0.267,0,-0.0,0.068,0
13,Sean,4,0.267,0,-0.0,0.068,0
3,Gretchen,5,0.333,0,0.444,0.067,0
7,Joel,5,0.333,0,0.444,0.067,0
14,Colleen,5,0.333,0,0.444,0.067,0
15,Jenna,5,0.333,0,0.444,0.067,0
9,Greg,1,0.067,0,0.0,0.063,0


Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,5,0.363,0.019,0.503,0.086,0
6,Susan,5,0.363,0.019,0.503,0.086,0
10,Kelly,5,0.363,0.095,0.189,0.069,0
3,Gretchen,5,0.333,0.0,-0.0,0.067,0
7,Joel,5,0.333,0.0,-0.0,0.067,0
14,Colleen,5,0.333,0.0,-0.0,0.067,0
15,Jenna,5,0.333,0.0,-0.0,0.067,0
0,Richard,5,0.363,0.019,0.374,0.063,1
9,Greg,1,0.067,0.0,-0.0,0.063,0
13,Sean,4,0.272,0.0,0.397,0.061,0


Unnamed: 0,name,deg,close,btw,eig,page,place
3,Gretchen,6,0.408,0.019,0.47,0.086,0
14,Colleen,6,0.408,0.019,0.47,0.086,0
15,Jenna,6,0.408,0.019,0.47,0.086,0
4,Rudy,5,0.363,0.019,0.0,0.086,0
6,Susan,5,0.363,0.019,0.0,0.086,0
10,Kelly,5,0.363,0.095,0.0,0.069,0
0,Richard,5,0.363,0.019,0.0,0.063,1
13,Sean,4,0.272,0.0,0.0,0.061,0
1,Gervase,5,0.327,0.0,0.296,0.057,0
9,Greg,4,0.327,0.057,0.155,0.046,0


Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,5,0.363,0.019,0.514,0.092,0
6,Susan,5,0.363,0.019,0.514,0.092,0
14,Colleen,6,0.408,0.019,0.0,0.086,0
15,Jenna,6,0.408,0.019,0.0,0.086,0
10,Kelly,5,0.363,0.095,0.305,0.077,0
0,Richard,5,0.363,0.019,0.428,0.073,1
1,Gervase,5,0.327,0.0,0.0,0.057,0
13,Sean,4,0.272,0.0,0.308,0.052,0
9,Greg,4,0.327,0.057,0.0,0.046,0


Unnamed: 0,name,deg,close,btw,eig,page,place
15,Jenna,11,0.789,0.543,0.199,0.096,0
4,Rudy,6,0.6,0.026,0.486,0.095,0
6,Susan,6,0.6,0.026,0.486,0.095,0
10,Kelly,7,0.625,0.248,0.346,0.086,0
14,Colleen,6,0.556,0.01,0.082,0.085,0
0,Richard,6,0.6,0.026,0.428,0.08,1
13,Sean,6,0.6,0.026,0.338,0.066,0
1,Gervase,6,0.556,0.01,0.048,0.062,0


Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,6,0.6,0.026,0.495,0.097,0
6,Susan,6,0.6,0.026,0.495,0.097,0
14,Colleen,6,0.556,0.01,0.069,0.089,0
0,Richard,6,0.6,0.026,0.446,0.085,1
10,Kelly,7,0.625,0.248,0.301,0.079,0
13,Sean,6,0.6,0.026,0.374,0.071,0
1,Gervase,6,0.556,0.01,0.046,0.068,0


Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,6,0.6,0.026,0.492,0.099,0
6,Susan,6,0.6,0.026,0.492,0.099,0
14,Colleen,6,0.556,0.01,0.039,0.09,0
0,Richard,6,0.6,0.026,0.452,0.088,1
10,Kelly,7,0.625,0.248,0.335,0.085,0
13,Sean,6,0.6,0.026,0.39,0.077,0


Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,6,0.6,0.026,0.496,0.102,0
6,Susan,6,0.6,0.026,0.496,0.102,0
0,Richard,6,0.6,0.026,0.461,0.091,1
10,Kelly,8,0.652,0.271,0.302,0.085,0
13,Sean,6,0.6,0.026,0.408,0.081,0


Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,6,0.6,0.026,0.5,0.105,0
6,Susan,6,0.6,0.026,0.5,0.105,0
0,Richard,6,0.6,0.026,0.47,0.095,1
10,Kelly,8,0.652,0.271,0.332,0.089,0


Unnamed: 0,name,deg,close,btw,eig,page,place
4,Rudy,6,0.6,0.026,0.507,0.108,0
0,Richard,6,0.6,0.026,0.469,0.095,1
10,Kelly,8,0.652,0.271,0.354,0.093,0


Unnamed: 0,name,deg,close,btw,eig,page,place
10,Kelly,8,0.652,0.271,0.373,0.096,0
0,Richard,6,0.6,0.026,0.468,0.095,1


Unnamed: 0,name,deg,close,btw,eig,page,place
0,Richard,6,0.6,0.026,0.468,0.095,1


### Add Votes For (correct votes) and Votes Against

In [10]:
def votes_for_against(vote_matrix):

    v = vote_matrix.copy()
    l = len(v.columns)

    tally = pd.DataFrame(index=[v.index], columns=['votes_for', 'votes_against'])
    tally = tally.fillna(0)
    episodes = []

    # Votes against
    for i in range(0,l):
        episodes.append(i)
        episodes_cumulative = pd.DataFrame(v[v.columns[episodes]])
        current = episodes_cumulative[episodes_cumulative.columns[i]]
        
        # TODO: Split / remove vote-overs
        if len(pd.DataFrame(current).columns) > 1:
            current = pd.Series(str(np.zeros(l)))

        # Remove whitespace in scraped values
        current = current.str.strip()
        
        # Votes for
        eliminated = current.name
        vfdf = pd.DataFrame(current, columns=[str(eliminated)])
        try:
            correct_vote = vfdf[vfdf[eliminated] == eliminated].index
        except: 
            correct_vote=[]
        tally.loc[tally.index.isin(correct_vote), ['votes_for']] = tally['votes_for'] + 1
        

        # Count newest votes against and update tally
        va = current.value_counts()
        vadf = pd.DataFrame(va, columns=['votes'])
        tally.loc[tally.index.isin(vadf.index), ['votes_against']] = vadf['votes'] + tally['votes_against']

    return tally

In [11]:
# Test (full season)
votes_for_against(ev2)

Unnamed: 0,votes_for,votes_against
Richard,8,4
Kelly,5,0
Rudy,9,7
Susan,9,3
Sean,6,9
Colleen,3,7
Gervase,1,6
Jenna,4,11
Greg,1,6
Gretchen,3,4


### Update Episode Scores to include new parameters

In [12]:
def episode_scores(season, vote_matrix):
    
    # Turn vote matrix into graph object
    V = compare_votes(vote_matrix)
    
    G = make_graphs(V)
    
    # Calculate scores
    C = centrality_scores(vote_matrix, G)
    
    # Add votes for and against
    VFVA = votes_for_against(vote_matrix)
    C = C.join(VFVA, on='name')
        
    # Binary classification of winners (1) and losers (0)
    C['place'] = np.where(C['place'] == 1, 1, 0)
    
    # Rearrange columns
    C = C[['name','deg','close','btw','eig','page','votes_for','votes_against','place']]
                
    # Return dataframe
    return C

In [13]:
def current_score(vote_matrix, episode_num):
    
    vm = vote_matrix
    episodes = []
    active = list(vm.index)

    for i in range(0, episode_num + 1):
        episodes.append(i)
        del active[-1]
        episodes_cumulative = pd.DataFrame(vm[vm.columns[episodes]])
        current = episode_scores(example, episodes_cumulative)
        current = current[current['name'].isin(active)]
    
    return current

In [14]:
# Test on Borneo
example = 'Borneo'
ev = seasons['Borneo']['votes']

# Function takes (original wiki_scrape vote matrix, episode number up until)
current_score(ev, 5)

Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
3,Gretchen,6,0.408,0.019,0.47,0.086,3,0,0
14,Colleen,6,0.408,0.019,0.47,0.086,3,2,0
15,Jenna,6,0.408,0.019,0.47,0.086,3,3,0
4,Rudy,5,0.363,0.019,-0.0,0.086,3,6,0
6,Susan,5,0.363,0.019,-0.0,0.086,3,1,0
10,Kelly,5,0.363,0.095,-0.0,0.069,1,0,0
0,Richard,5,0.363,0.019,-0.0,0.063,2,0,1
13,Sean,4,0.272,0.0,-0.0,0.061,2,0,0
1,Gervase,5,0.327,0.0,0.296,0.057,1,0,0
9,Greg,4,0.327,0.057,0.155,0.046,1,0,0


In [15]:
# All Borneo episodes
ev = ev.drop(ev.columns[ev.iloc[0, :] == "Jury Vote"], 1)
episode_nums = len(ev.columns)

for i in range(0, episode_nums):
    dis.display(current_score(ev, i))

Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,3,0.2,0,0.5,0.12,1,3,0
6,Susan,3,0.2,0,0.5,0.12,1,0,0
8,Dirk,3,0.2,0,0.5,0.12,1,0,0
13,Sean,3,0.2,0,0.5,0.12,1,0,0
10,Kelly,2,0.133,0,0.0,0.12,0,0,0
11,Stacey,2,0.133,0,0.0,0.12,0,1,0
0,Richard,0,0.0,0,0.0,0.018,0,0,1
1,Gervase,0,0.0,0,-0.0,0.018,0,0,0
2,Ramona,0,0.0,0,0.0,0.018,0,0,0
3,Gretchen,0,0.0,0,0.0,0.018,0,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
1,Gervase,5,0.333,0,0.408,0.066,1,0,0
2,Ramona,5,0.333,0,0.408,0.066,1,2,0
3,Gretchen,5,0.333,0,0.408,0.066,1,0,0
7,Joel,5,0.333,0,0.408,0.066,1,0,0
14,Colleen,5,0.333,0,0.408,0.066,1,0,0
15,Jenna,5,0.333,0,0.408,0.066,1,0,0
4,Rudy,3,0.2,0,0.0,0.066,1,3,0
6,Susan,3,0.2,0,0.0,0.066,1,0,0
8,Dirk,3,0.2,0,0.0,0.066,1,0,0
13,Sean,3,0.2,0,0.0,0.066,1,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
10,Kelly,2,0.133,0,-0.0,0.069,0,0,0
4,Rudy,4,0.267,0,0.479,0.068,2,5,0
6,Susan,4,0.267,0,0.479,0.068,2,0,0
8,Dirk,4,0.267,0,0.479,0.068,2,0,0
13,Sean,4,0.267,0,0.479,0.068,2,0,0
1,Gervase,5,0.333,0,0.0,0.063,1,0,0
2,Ramona,5,0.333,0,0.0,0.063,1,2,0
3,Gretchen,5,0.333,0,0.0,0.063,1,0,0
7,Joel,5,0.333,0,0.0,0.063,1,0,0
14,Colleen,5,0.333,0,0.0,0.063,1,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
10,Kelly,2,0.133,0,0.0,0.069,0,0,0
4,Rudy,4,0.267,0,-0.0,0.068,2,5,0
6,Susan,4,0.267,0,-0.0,0.068,2,0,0
8,Dirk,4,0.267,0,-0.0,0.068,2,0,0
13,Sean,4,0.267,0,-0.0,0.068,2,0,0
3,Gretchen,5,0.333,0,0.444,0.067,2,0,0
7,Joel,5,0.333,0,0.444,0.067,2,0,0
14,Colleen,5,0.333,0,0.444,0.067,2,2,0
15,Jenna,5,0.333,0,0.444,0.067,2,1,0
9,Greg,1,0.067,0,-0.0,0.063,0,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,5,0.363,0.019,0.503,0.086,3,6,0
6,Susan,5,0.363,0.019,0.503,0.086,3,1,0
10,Kelly,5,0.363,0.095,0.189,0.069,1,0,0
3,Gretchen,5,0.333,0.0,-0.0,0.067,2,0,0
7,Joel,5,0.333,0.0,-0.0,0.067,2,0,0
14,Colleen,5,0.333,0.0,-0.0,0.067,2,2,0
15,Jenna,5,0.333,0.0,-0.0,0.067,2,1,0
0,Richard,5,0.363,0.019,0.374,0.063,2,0,1
9,Greg,1,0.067,0.0,0.0,0.063,0,0,0
13,Sean,4,0.272,0.0,0.397,0.061,2,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
3,Gretchen,6,0.408,0.019,0.47,0.086,3,0,0
14,Colleen,6,0.408,0.019,0.47,0.086,3,2,0
15,Jenna,6,0.408,0.019,0.47,0.086,3,3,0
4,Rudy,5,0.363,0.019,0.0,0.086,3,6,0
6,Susan,5,0.363,0.019,0.0,0.086,3,1,0
10,Kelly,5,0.363,0.095,0.0,0.069,1,0,0
0,Richard,5,0.363,0.019,0.0,0.063,2,0,1
13,Sean,4,0.272,0.0,0.0,0.061,2,0,0
1,Gervase,5,0.327,0.0,0.296,0.057,1,0,0
9,Greg,4,0.327,0.057,0.155,0.046,1,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,5,0.363,0.019,0.514,0.092,4,7,0
6,Susan,5,0.363,0.019,0.514,0.092,4,2,0
14,Colleen,6,0.408,0.019,0.0,0.086,3,3,0
15,Jenna,6,0.408,0.019,0.0,0.086,3,4,0
10,Kelly,5,0.363,0.095,0.305,0.077,2,0,0
0,Richard,5,0.363,0.019,0.428,0.073,3,1,1
1,Gervase,5,0.327,0.0,0.0,0.057,1,1,0
13,Sean,4,0.272,0.0,0.308,0.052,2,0,0
9,Greg,4,0.327,0.057,0.0,0.046,1,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
15,Jenna,11,0.789,0.543,0.199,0.096,4,7,0
4,Rudy,6,0.6,0.026,0.486,0.095,5,7,0
6,Susan,6,0.6,0.026,0.486,0.095,5,2,0
10,Kelly,7,0.625,0.248,0.346,0.086,3,0,0
14,Colleen,6,0.556,0.01,0.082,0.085,3,3,0
0,Richard,6,0.6,0.026,0.428,0.08,4,1,1
13,Sean,6,0.6,0.026,0.338,0.066,3,0,0
1,Gervase,6,0.556,0.01,0.048,0.062,1,1,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,6,0.6,0.026,0.495,0.097,6,7,0
6,Susan,6,0.6,0.026,0.495,0.097,6,2,0
14,Colleen,6,0.556,0.01,0.069,0.089,3,3,0
0,Richard,6,0.6,0.026,0.446,0.085,5,4,1
10,Kelly,7,0.625,0.248,0.301,0.079,3,0,0
13,Sean,6,0.6,0.026,0.374,0.071,4,1,0
1,Gervase,6,0.556,0.01,0.046,0.068,1,1,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,6,0.6,0.026,0.492,0.099,7,7,0
6,Susan,6,0.6,0.026,0.492,0.099,7,2,0
14,Colleen,6,0.556,0.01,0.039,0.09,3,3,0
0,Richard,6,0.6,0.026,0.452,0.088,6,4,1
10,Kelly,7,0.625,0.248,0.335,0.085,4,0,0
13,Sean,6,0.6,0.026,0.39,0.077,5,3,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,6,0.6,0.026,0.496,0.102,8,7,0
6,Susan,6,0.6,0.026,0.496,0.102,8,2,0
0,Richard,6,0.6,0.026,0.461,0.091,7,4,1
10,Kelly,8,0.652,0.271,0.302,0.085,4,0,0
13,Sean,6,0.6,0.026,0.408,0.081,6,5,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,6,0.6,0.026,0.5,0.105,9,7,0
6,Susan,6,0.6,0.026,0.5,0.105,9,3,0
0,Richard,6,0.6,0.026,0.47,0.095,8,4,1
10,Kelly,8,0.652,0.271,0.332,0.089,5,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
4,Rudy,6,0.6,0.026,0.507,0.108,9,7,0
0,Richard,6,0.6,0.026,0.469,0.095,8,4,1
10,Kelly,8,0.652,0.271,0.354,0.093,5,0,0


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
10,Kelly,8,0.652,0.271,0.373,0.096,5,0,0
0,Richard,6,0.6,0.026,0.468,0.095,8,4,1


Unnamed: 0,name,deg,close,btw,eig,page,votes_for,votes_against,place
0,Richard,6,0.6,0.026,0.468,0.095,8,4,1


### TODO:

* Wrap episode_scores for all seasons - maybe put in dict with percentage as key?
* Train Naive Bayes on per episode basis
* Prediction tests
* Fix wiki_scrape for season_nums, re-integrate here
* Workaround for re-votes (votes for / against)
* Use writefile / import instead of copying function