In [49]:
import numpy as np
import pandas as pd

In [62]:
# Initialize a DataFrame with some metrics and some randomly generated data for them.

metrics = ['metric_{}'.format(c) for c in 'ABCD']

data = pd.DataFrame(
    np.random.randint(0, 10, size=(1000, len(metrics))),
    columns=metrics,
)

In [63]:
data.head()

Unnamed: 0,metric_A,metric_B,metric_C,metric_D
0,8,6,7,4
1,5,9,2,4
2,0,2,7,2
3,7,8,2,7
4,2,0,3,4


In [64]:
# Add a "user" column and promote it to index.

users = ['Alvin', 'Simon', 'Theodore', 'Larry', 'Curly', 'Moe', 'Tom', 'Jerry']

data['user'] = np.random.choice(users, size=(len(data)))

data = data.set_index('user')

In [65]:
data.head()

Unnamed: 0_level_0,metric_A,metric_B,metric_C,metric_D
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alvin,8,6,7,4
Jerry,5,9,2,4
Alvin,0,2,7,2
Simon,7,8,2,7
Moe,2,0,3,4


In [69]:
# Sum up all the metrics simultaneously, grouped by user.

sums = data.groupby(level='user').sum()

sums

Unnamed: 0_level_0,metric_A,metric_B,metric_C,metric_D
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alvin,635,609,578,554
Curly,602,638,647,661
Jerry,528,507,543,540
Larry,551,618,569,570
Moe,549,489,559,540
Simon,521,520,520,544
Theodore,527,496,484,481
Tom,684,609,580,595


In [68]:
# For each metric, show the one user with the highest count.

sums.apply(lambda c: c.argmax())

metric_A      Tom
metric_B    Curly
metric_C    Curly
metric_D    Curly
dtype: object

In [75]:
# For each metric simultaneously, rank the users by count for the metric.

ranking = sums.apply(lambda c: c.sort_values().index)

ranking

Unnamed: 0_level_0,metric_A,metric_B,metric_C,metric_D
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alvin,Simon,Moe,Theodore,Theodore
Curly,Theodore,Theodore,Simon,Jerry
Jerry,Jerry,Jerry,Jerry,Moe
Larry,Moe,Simon,Moe,Simon
Moe,Larry,Alvin,Larry,Alvin
Simon,Curly,Tom,Alvin,Larry
Theodore,Alvin,Larry,Tom,Tom
Tom,Tom,Curly,Curly,Curly


In [76]:
# The "user" index left over is useless now. Let's reset it to a more useful integer index.

ranking = ranking.reset_index(drop=True)

ranking

Unnamed: 0,metric_A,metric_B,metric_C,metric_D
0,Simon,Moe,Theodore,Theodore
1,Theodore,Theodore,Simon,Jerry
2,Jerry,Jerry,Jerry,Moe
3,Moe,Simon,Moe,Simon
4,Larry,Alvin,Larry,Alvin
5,Curly,Tom,Alvin,Larry
6,Alvin,Larry,Tom,Tom
7,Tom,Curly,Curly,Curly


In [77]:
# Of course you can just focus on the top however-many.

ranking.head(3).T

Unnamed: 0,0,1,2
metric_A,Simon,Theodore,Jerry
metric_B,Moe,Theodore,Jerry
metric_C,Theodore,Simon,Jerry
metric_D,Theodore,Jerry,Moe
