# SELECT statements with Aggregate functions

In [62]:
import pandas as pd
import numpy as np

Create a DataFrame

In [63]:
players = pd.read_csv('hockey_players.csv')
players

Unnamed: 0,first_name,last_name,team,position,jersey_number,salary,birthdate
0,Joe,Pavelski,SJ,C,8,6000000.0,1984-07-11
1,Connor,McDavid,EDM,C,97,925000.0,1997-01-13
2,Sidney,Crosby,PIT,C,87,8700000.0,1987-08-07
3,Carey,Price,MTL,G,31,10500000.0,1987-08-16
4,Daniel,Sedin,VAN,LW,22,,1980-09-26
5,Henrik,Sedin,VAN,C,33,,1980-09-26


## SELECT COUNT(*) FROM players

In [64]:
len(players)

6

In [65]:
players.shape[0]  # Shape returns number of rows and columns, rows is the first value so 0 returns number of rows

6

# SELECT COUNT(salary) FROM players

In [66]:
players['salary'].count()

4

In [67]:
sum(players['salary'].notnull())

4

In [68]:
players['salary'].agg('count')

4

## SELECT COUNT(DISTINCT(team)) FROM players

In [69]:
players['team'].nunique()

5

## SELECT SUM(salary) FROM players

In [70]:
players['salary'].sum()

26125000.0

In [71]:
players['salary'].agg('sum')

26125000.0

## SELECT MAX(salary) FROM players

In [72]:
players['salary'].max()

10500000.0

In [73]:
players['salary'].agg('max')

10500000.0

## SELECT SUM(salary) FROM players GROUP BY team

In [74]:
players.groupby(['team'])['salary'].sum()

team
EDM      925000.0
MTL    10500000.0
PIT     8700000.0
SJ      6000000.0
VAN           0.0
Name: salary, dtype: float64

In [75]:
players.groupby(['team'])['salary'].agg('sum')

team
EDM      925000.0
MTL    10500000.0
PIT     8700000.0
SJ      6000000.0
VAN           0.0
Name: salary, dtype: float64

## SELECT SUM(salary) FROM players GROUP BY team WHERE team IN ('VAN','PIT','SJ')

In [76]:
players[players.team.isin(['VAN','PIT','SJ'])].groupby(['team'])['salary'].sum()

team
PIT    8700000.0
SJ     6000000.0
VAN          0.0
Name: salary, dtype: float64

## SELECT SUM(salary) FROM players GROUP BY team HAVING sum(salary) > 700000

In [77]:
totals = players.groupby(['team'])['salary'].sum()
totals.where(lambda x : x > 7000000).dropna()

team
MTL    10500000.0
PIT     8700000.0
Name: salary, dtype: float64

In [78]:
players.groupby(['team'])['salary'].sum().where(lambda x : x > 7000000).dropna()

team
MTL    10500000.0
PIT     8700000.0
Name: salary, dtype: float64