In [1]:
# Packages
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Constants
VOTES = "../data/mk8dx-votes.csv"
COURSES = "../data/mk8dx-courses.csv"

# Functions
def import_csv_as_df(file_path):
    df = pd.read_csv(file_path)
    return df

# Main
votes = import_csv_as_df(VOTES)
print('Votes dataframe:')
print(votes.head())
print('========\n')

courses = import_csv_as_df(COURSES)
print('Courses dataframe:')
print(courses.head())
print('========\n')


Votes dataframe:
   race_id     selected_course race_speed  num_racers          voted_course  \
0        1        Rainbow Road        150          12  Squeaky Clean Sprint   
1        1        Rainbow Road        150          12       DS Shroom Ridge   
2        1        Rainbow Road        150          12                Random   
3        1        Rainbow Road        150          12          Rainbow Road   
4        2  Sweet Sweet Canyon        150          12        Hyrule Circuit   

   num_votes pct_votes  
0          4    33.33%  
1          3    25.00%  
2          4    33.33%  
3          1     8.33%  
4          3    25.00%  

Courses dataframe:
   course_id              course       cup  nth_race
0          1  Mario Kart Stadium  Mushroom         1
1          2          Water Park  Mushroom         2
2          3  Sweet Sweet Canyon  Mushroom         3
3          4        Thwomp Ruins  Mushroom         4
4          5       Mario Circuit    Flower         1



In [2]:
def group_and_sum(df, groupby_column, sum_column):
    grouped_df = df.groupby(groupby_column)[sum_column].sum()
    return grouped_df

votes_grouped = group_and_sum(votes, 'voted_course', 'num_votes')

In [3]:
def join_dataframes(df1, df2, df1_join, df2_join):
    merged_df = pd.merge(df1, df2, left_on=df1_join, right_on=df2_join)
    return merged_df

courses_votes = join_dataframes(courses, votes, 'course', 'voted_course')
courses_votes.head()


Unnamed: 0,course_id,course,cup,nth_race,race_id,selected_course,race_speed,num_racers,voted_course,num_votes,pct_votes
0,1,Mario Kart Stadium,Mushroom,1,39,Ninja Hideaway,150,12,Mario Kart Stadium,4,33.33%
1,1,Mario Kart Stadium,Mushroom,1,42,3DS Neo Bowser City,150,12,Mario Kart Stadium,6,50.00%
2,1,Mario Kart Stadium,Mushroom,1,66,Wii Daisy Circuit,150,9,Mario Kart Stadium,4,44.44%
3,1,Mario Kart Stadium,Mushroom,1,97,Twisted Mansion,150,12,Mario Kart Stadium,2,16.67%
4,2,Water Park,Mushroom,2,10,Toad Harbor,150,8,Water Park,1,12.50%


In [4]:
def find_selected_course(df):
  rf = df['selected_course'] == df['voted_course']
  return rf

selected_courses = find_selected_course(courses_votes)
print(selected_courses.sample(10))

98     False
85     False
77      True
127    False
297    False
72     False
15      True
270    False
63     False
190    False
dtype: bool


In [5]:
def filter_courses_votes(df, filter_series):
    filtered_df = df[filter_series]
    return filtered_df

filtered_courses_votes = filter_courses_votes(courses_votes, selected_courses)
filtered_courses_votes.head()


Unnamed: 0,course_id,course,cup,nth_race,race_id,selected_course,race_speed,num_racers,voted_course,num_votes,pct_votes
5,2,Water Park,Mushroom,2,14,Water Park,150,11,Water Park,1,9.09%
6,2,Water Park,Mushroom,2,52,Water Park,150,11,Water Park,2,18.18%
7,3,Sweet Sweet Canyon,Mushroom,3,2,Sweet Sweet Canyon,150,12,Sweet Sweet Canyon,7,58.33%
9,3,Sweet Sweet Canyon,Mushroom,3,15,Sweet Sweet Canyon,150,11,Sweet Sweet Canyon,5,45.45%
15,5,Mario Circuit,Flower,1,20,Mario Circuit,150,10,Mario Circuit,1,10.00%


In [30]:
import altair as alt

def plot_course_votes(df):
    chart = alt.Chart(df).mark_bar().encode(
        x='course',
        y='num_votes'
    )
    return chart

plot_course_votes(filtered_courses_votes)