In [1]:
import numpy as np
from datascience import *

In [2]:
raw_compensation = Table.read_table('../data/raw_compensation.csv')
raw_compensation

Rank,Name,Company (Headquarters),Total Pay,% Change,Cash Pay,Equity Pay,Other Pay,Ratio of CEO pay to average industry worker pay
1,Mark V. Hurd*,Oracle (Redwood City),$53.25,(No previous year),$0.95,$52.27,$0.02,362
2,Safra A. Catz*,Oracle (Redwood City),$53.24,(No previous year),$0.95,$52.27,$0.02,362
3,Robert A. Iger,Walt Disney (Burbank),$44.91,-3%,$24.89,$17.28,$2.74,477
4,Marissa A. Mayer,Yahoo! (Sunnyvale),$35.98,-15%,$1.00,$34.43,$0.55,342
5,Marc Benioff,salesforce.com (San Francisco),$33.36,-16%,$4.65,$27.26,$1.45,338
6,John H. Hammergren,McKesson (San Francisco),$24.84,-4%,$12.10,$12.37,$0.37,222
7,John S. Watson,Chevron (San Ramon),$22.04,-15%,$4.31,$14.68,$3.05,183
8,Jeffrey Weiner,LinkedIn (Mountain View),$19.86,27%,$2.47,$17.26,$0.13,182
9,John T. Chambers**,Cisco Systems (San Jose),$19.62,19%,$5.10,$14.51,$0.01,170
10,John G. Stumpf,Wells Fargo (San Francisco),$19.32,-10%,$6.80,$12.50,$0.02,256


In [3]:
# TypeError: cannot perform reduce with flexible type
# np.average(raw_compensation.column("Total Pay"))

In [4]:
total_pay_type = type(raw_compensation.column('Total Pay')[0])
total_pay_type

numpy.str_

In [5]:
mark_hurd_pay_string = raw_compensation.column('Total Pay')[0]
mark_hurd_pay_string

'$53.25 '

In [6]:
mark_hurd_pay = float(mark_hurd_pay_string.strip('$')) * 1e6
mark_hurd_pay

53250000.0

In [7]:
def convert_pay_string_to_number(pay_string):
    """Converts a pay string like '$100' (in millions) to a number of dollars."""
    return float(pay_string.strip('$')) * 1e6

In [8]:
movies_by_year = Table.read_table("../data/movies_by_year.csv")
rank = 5
fifth_from_top_movie_year = movies_by_year.sort("Total Gross", descending=True).column("Year").item(rank-1)
print("Year number", rank, "for total gross movie sales was:", fifth_from_top_movie_year)

Year number 5 for total gross movie sales was: 2010


In [9]:
def print_kth_top_movie_year(k):
    """Print the kth top movie year"""
    k_top_movie_year = movies_by_year.sort("Total Gross", descending=True).column("Year").item(k-1)
    print("Year number", k, "for total gross movie sales was:", k_top_movie_year)

In [10]:
print_kth_top_movie_year(5)

Year number 5 for total gross movie sales was: 2010


In [11]:
compensation = raw_compensation.with_column(
    "Total Pay ($)", raw_compensation.apply(convert_pay_string_to_number, "Total Pay")
)

# first and last row
compensation.row(0), compensation.row(-1)

(Row(Rank=1, Name='Mark V. Hurd*', Company (Headquarters)='Oracle (Redwood City)', Total Pay='$53.25 ', % Change='(No previous year)', Cash Pay='$0.95 ', Equity Pay='$52.27 ', Other Pay='$0.02 ', Ratio of CEO pay to average industry worker pay=362.0, Total Pay ($)=53250000.0),
 Row(Rank=102, Name='Lawrence Page***', Company (Headquarters)='Alphabet (Mountain View)', Total Pay='$0.00 ', % Change='0%', Cash Pay='$0.00 ', Equity Pay='$0.00 ', Other Pay='$0.00 ', Ratio of CEO pay to average industry worker pay=1e-05, Total Pay ($)=0.0))

In [12]:
average_total_pay = np.average(compensation.column(9))
average_total_pay

11445294.11764706

In [13]:
cash = raw_compensation.apply(convert_pay_string_to_number, "Cash Pay")
equity = raw_compensation.apply(convert_pay_string_to_number, "Equity Pay")
other = raw_compensation.apply(convert_pay_string_to_number, "Other Pay")
total = cash + equity + other

cash_proportion = cash / (total + 1)
cash_proportion[0], cash_proportion[-1]

(0.01784372618625608, 0.0)

In [14]:
stripped = compensation.where(4, are.not_equal_to('(No previous year)'))
changes = stripped.apply(lambda x: float(x.strip('%')), 4)

with_previous_compensation = stripped.with_column(
    '2014 Total Pay ($)', stripped.column('Total Pay ($)') / ((changes/100) + 1)
)
with_previous_compensation.row(0), with_previous_compensation.row(-1)

(Row(Rank=3, Name='Robert A. Iger', Company (Headquarters)='Walt Disney (Burbank)', Total Pay='$44.91 ', % Change='-3%', Cash Pay='$24.89 ', Equity Pay='$17.28 ', Other Pay='$2.74 ', Ratio of CEO pay to average industry worker pay=477.0, Total Pay ($)=44910000.0, 2014 Total Pay ($)=46298969.07216495),
 Row(Rank=102, Name='Lawrence Page***', Company (Headquarters)='Alphabet (Mountain View)', Total Pay='$0.00 ', % Change='0%', Cash Pay='$0.00 ', Equity Pay='$0.00 ', Other Pay='$0.00 ', Ratio of CEO pay to average industry worker pay=1e-05, Total Pay ($)=0.0, 2014 Total Pay ($)=0.0))

In [15]:
average_pay_2014 = np.average(with_previous_compensation.column('2014 Total Pay ($)'))
average_pay_2014

11649176.115603436

In [16]:
compensation.where('Total Pay ($)', are.above(3e7)).num_rows

5