In [1]:
import pandas as pd
import plotly.graph_objects as go
from _clustergram import Clustergram
import networkx as nx

In [2]:
# traverse results folder
import os

all_results = []

for file in os.listdir('results'):
    if file.endswith('.txt'):
        with open(f'results/{file}', 'r') as file:
            text = file.read()
            week = int(file.name.split('week')[1].split('.')[0])
            entries = text.split('Album art')[1:]
            week_results = []
            for entry in entries:
                entry_results = []
                offset = 0
                elements = [e.strip() for e in entry.split('\n')]
                title = elements[4]
                if elements[5] != '':
                    #print(f'Title spans multiple lines in file {file.name}')
                    title = title + ' ' + elements[5]
                    offset = 1
                artist = elements[6 + offset]
                album = elements[8 + offset]
                total_votes = int(elements[11 + offset])
                vote_tally = 0
                # Get the first user by finding the index of the first 'User avatar':
                user_index = elements.index('User avatar')
                submitter = elements[user_index + offset + 3]
                # Get the votes by finding all of the remaining indexes of 'User avatar' 
                voter_ids = [i for i, e in enumerate(elements) if e == 'User avatar']
                vote_summary = {}
                for voter_id in voter_ids[1:]:
                    voter = elements[voter_id  + offset + 1].replace('*','')
                    if voter != 'Send':
                        try:
                            comment = elements[voter_id + 3]
                            vote_offset = 0
                            if elements[voter_id + 4] != '':
                                comment = comment + ' ' + elements[voter_id + 3]
                                vote_offset = 1
                            try:
                                vote = int(elements[voter_id + vote_offset + 6])
                            except:
                                vote = 0
                            vote_summary[voter] = vote
                            vote_tally += vote
                            result = {
                                'week': week,
                                'title': title,
                                'artist': artist,
                                'album': album,
                                'total_votes': total_votes,
                                'submitter': submitter,
                                'voter': voter,
                                'vote': vote,
                                'comment': comment
                            }
                            entry_results.append(result)
                        except:
                            print('ENTRY')
                            print(f'Error in week {week} submission {title} processing vote for {voter}')
                            print(elements[(voter_id):])
                if vote_tally != total_votes:
                    print(f'Error in week {week} submission {title} - vote tally is {vote_tally} but total votes is {total_votes}')
                    for r in entry_results:
                        print(f"{r['voter']}: {r['vote']}")
                    print()
                week_results.extend(entry_results)
            all_results.extend(week_results)
df = pd.DataFrame(all_results)
df = df[df['submitter'] != '']
df

Unnamed: 0,week,title,artist,album,total_votes,submitter,voter,vote,comment
0,2,No Better,Lorde,No Better,26,skpurdue,gardenfractals,6,"* This song slaps, hadn't heard it before"
1,2,No Better,Lorde,No Better,26,skpurdue,Laura South,6,* love this song!!! good pick
2,2,No Better,Lorde,No Better,26,skpurdue,Dan Kerrigan,5,*
3,2,No Better,Lorde,No Better,26,skpurdue,Mac Creamer,3,*
4,2,No Better,Lorde,No Better,26,skpurdue,Connnnnor,3,*
...,...,...,...,...,...,...,...,...,...
437,6,Don't Judge Me,Janelle Monáe,Dirty Computer,17,skpurdue,Mac Creamer,2,* I had a hard time pulling the sample out of ...
438,6,Don't Judge Me,Janelle Monáe,Dirty Computer,17,skpurdue,Dan Kerrigan,2,*
439,6,Don't Judge Me,Janelle Monáe,Dirty Computer,17,skpurdue,Racquel Levia,1,*
440,6,Don't Judge Me,Janelle Monáe,Dirty Computer,17,skpurdue,gardenfractals,1,*


In [13]:
def get_weekly(person_type='submitter'):
    # Create a dataframe with one column per week and one row per submitter
    people = list(df[person_type].unique())
    weeks = [int(w) for w in list(df['week'].unique())]
    weeks.sort()
    weekly_data = []
    for person in people:
        person_data = {person_type: person}
        for week in weeks:
            subset = df[(df['week'] == week) & (df[person_type] == person)]
            person_data[week] = subset['total_votes'].values[0] if len(subset) > 0 else 0
        person_data['total'] = sum([person_data[week] for week in weeks])
        weekly_data.append(person_data)
    weekly_df = pd.DataFrame(weekly_data).sort_values('total', ascending=True).replace(0, None)
    return weekly_df

In [4]:
def lookup(person_type, person, week, category):
    subset = df[(df['week'] == week) & (df[person_type] == person)]
    if len(subset) > 0:
        value = subset[category].values[0]
        if '-' in value:
            value = value.split('-')[0].strip()
        if len(value) > 15:
            value_parts = value.split(' ')
            midpoint = len(value_parts) // 2
            return ' '.join(value_parts[:midpoint]) + '<br>' + ' '.join(value_parts[midpoint:])
        else:
            return value
    else:
        return ''

In [5]:
def plot_weekly(person_type):
    weekly_df = get_weekly(person_type)
    # Plot weekly data in a heatmap:
    fig = go.Figure(data=go.Heatmap(
            z=weekly_df[weeks].values,
            x=weeks,
            y=weekly_df['submitter'],
            # look up title for text:
            text=[[lookup(person, week, 'title') for week in weeks] for person in weekly_df[person_type]],
            customdata=[[lookup(person, week, 'artist') for week in weeks] for person in weekly_df[person_type]],
            texttemplate="%{text}",
            hovertemplate="<b>%{text}</b><br>%{customdata}<br>%{z} votes<br>(%{y})<extra></extra>",
            hoverinfo='text',
            colorscale='Blues_r',
            hoverongaps = False))
    fig.update_layout(
        template="plotly_dark",
        title='Music League Votes by Week',
        xaxis_title='Week',
        yaxis_title=person_type,
        yaxis_nticks=20,
        yaxis_tickangle=-45,
        width=1000,
        height=600
    )
    return fig

In [6]:
# add up the votes in the 'votes' column to get adjacency from voter to submitter
adjacency = df.groupby(['voter', 'submitter'])['vote'].sum().reset_index()
# Remove empty voters
adjacency = adjacency[adjacency['voter'] != '']

In [7]:
# convert to a matrix:
matrix = adjacency.pivot_table(index='voter', columns='submitter', values='vote').fillna(0)
# Remove any rows or columns that are all zeros
matrix = matrix.loc[(matrix != 0).any(axis=1), (matrix != 0).any()]


In [8]:
adjacency.to_csv('edgelist.csv', index=False)
matrix.to_csv('matrix.csv', index=False)

In [9]:
# For each submitter, get the total votes they received from the matrix and add it to their name:
new_names = []
for submitter in matrix.columns:
    total_votes = int(matrix[submitter].sum())
    new_names.append(f'{submitter} ({total_votes})')
matrix.columns = new_names

fig = Clustergram(
    data=matrix,
    column_labels=list(matrix.columns.values),
    row_labels=list(matrix.index),
    height=400,
    width=800,
    color_map = [
        [0, "#222"],
        [1, "aqua"]
    ],
    center_values=False,
)
fig.update_layout(template='plotly_dark')
# Update only the heatmap axes, not all axes:
fig.update_yaxes(title_text='Voters', selector={'anchor': 'x11'})
fig.update_xaxes(title_text='Submitters', selector={'anchor': 'y11'})

In [10]:
fig.write_html('clustergram.html')

In [11]:
def get(voter, submitter):
    subset = df[(df['voter'] == voter) & (df['submitter'] == submitter)]
    subset.sort_values(by='vote', ascending=False, inplace=True)
    print(f'{voter} gave {subset["vote"].sum()} votes to {submitter}')
    titles = subset['title'].values
    votes = subset['vote'].values
    print(f'Votes: {[f"{title} ({vote})" for title, vote in zip(titles, votes)]}')

In [12]:
get('Laura South', 'skpurdue')

Laura South gave 28 votes to skpurdue
Votes: ['No Better (6)', 'Rest in Peace (6)', 'American Pie (5)', "Don't Judge Me (5)", 'Surfing on a Sine Wave (3)', 'Little Black Submarines (3)']
