## Which state produces the best three-point shoooters?



## Step 1

### We'll start by looking at the three-point accuracy for all the players in each state.


In [0]:
from pandas.io import gbq
project_id = '[YOUR_PROJECT_ID]'


In [5]:
states_3pt_pct_q = """
SELECT
  birthplace_state AS state,
  SUM(three_points_made) AS threes_made,
  SUM(three_points_att) AS threes_att,
  SUM(three_points_made)/SUM(three_points_att) AS three_point_pct
FROM
  `bigquery-public-data.ncaa_basketball.mbb_players_games_sr`
WHERE
  (three_points_att > 0)
  AND division_alias = "D1"
  AND birthplace_state IN ( 
    'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI',
    'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MT', 'NE', 'NV', 'NH',
    'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'MD', 'MA', 'MI', 'MN',
    'MS', 'MO', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA',
    'WV', 'WI', 'WY')
GROUP BY
  state
ORDER BY
  three_point_pct DESC
"""

states_3pt_pct = gbq.read_gbq(query=states_3pt_pct_q, dialect ='standard', project_id=project_id)
states_3pt_pct.head(10)

Requesting query... ok.
Job ID: job_CHB0qVdrD1ggK7KKcl_CPgiTnGjn
Query running...
Query done.
Cache hit.

Retrieving results...
Got 51 rows.

Total time taken 0.56 s.
Finished at 2018-03-16 08:02:17.


Unnamed: 0,state,threes_made,threes_att,three_point_pct
0,ND,286,741,0.385965
1,WY,323,856,0.377336
2,ID,787,2086,0.377277
3,OR,2999,7969,0.376333
4,CO,3805,10257,0.370966
5,IA,3329,9062,0.367358
6,NE,1422,3888,0.365741
7,UT,2679,7347,0.364639
8,AZ,4910,13477,0.364324
9,WI,6180,16975,0.364065


## Step 2

### North Dakota comes out on top. But let's dig into what that number means. Let's look at how many players come from each state, and compare them to the overall state population.

In [6]:
num_players_q = """
SELECT
  stats.state AS state_abbrev,
  pops.state,
  COUNT(player_id) AS num_players,
  pops.population_2017 AS num_people,
  COUNT(player_id) / pops.population_2017 AS players_per_person
FROM (
  SELECT
    player_id,
    birthplace_state AS state,
    SUM(three_points_att) AS threes_att
  FROM
    `bigquery-public-data.ncaa_basketball.mbb_players_games_sr`
  WHERE
    division_alias = "D1"
    AND birthplace_state IN ( 
      'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI',
      'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MT', 'NE', 'NV', 'NH',
      'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'MD', 'MA', 'MI', 'MN',
      'MS', 'MO', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA',
      'WV', 'WI', 'WY')
  GROUP BY
    player_id,
    birthplace_state ) AS stats
LEFT JOIN
  `ncaa-basketball-extras.extras.state_population` AS pops
ON
  pops.abbreviation = stats.state
GROUP BY
  stats.state,
  pops.state,
  pops.population_2017
ORDER BY
  players_per_person ASC
"""

num_players = gbq.read_gbq(query=num_players_q, dialect ='standard', project_id=project_id)
num_players.head(10)

Requesting query... ok.
Job ID: job_Yt4IRElvxz-YPgnKhBdXVMhPkowN
Query running...
Query done.
Processed: 38.7 MB
Standard price: $0.00 USD

Retrieving results...
Got 51 rows.

Total time taken 1.98 s.
Finished at 2018-03-16 08:02:19.


Unnamed: 0,state_abbrev,state,num_players,num_people,players_per_person
0,HI,Hawaii,12,1427538,8e-06
1,ME,Maine,15,1335907,1.1e-05
2,ND,North Dakota,10,755393,1.3e-05
3,NM,New Mexico,32,2088070,1.5e-05
4,VT,Vermont,10,623657,1.6e-05
5,NH,New Hampshire,23,1342795,1.7e-05
6,RI,Rhode Island,19,1059639,1.8e-05
7,MA,Massachusetts,131,6859819,1.9e-05
8,SD,South Dakota,17,869666,2e-05
9,AZ,Arizona,150,7016270,2.1e-05


## Step 3

### Looks like Hawaii, Maine, and North Dakota have very few players per capita. Getting back to North Dakota though, how many of those 10 players have attempted a three-point shot?

In [7]:
query = """
SELECT
  stats.state AS state_abbrev,
  pops.state,
  COUNT(player_id) AS num_players,
  pops.population_2017 AS num_people,
  COUNT(player_id) / pops.population_2017 AS players_per_person
FROM (
  SELECT
    player_id,
    birthplace_state AS state,
    SUM(three_points_att) AS threes_att
  FROM
    `bigquery-public-data.ncaa_basketball.mbb_players_games_sr`
  WHERE
    division_alias = "D1"
    AND birthplace_state IN ( 
      'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI',
      'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MT', 'NE', 'NV', 'NH',
      'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'MD', 'MA', 'MI', 'MN',
      'MS', 'MO', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA',
      'WV', 'WI', 'WY')
  GROUP BY
    player_id,
    birthplace_state ) AS stats
LEFT JOIN
  `ncaa-basketball-extras.extras.state_population` AS pops
ON
  pops.abbreviation = stats.state
WHERE
  threes_att > 0
GROUP BY
  stats.state,
  pops.state,
  pops.population_2017
ORDER BY
  players_per_person ASC
"""

states = gbq.read_gbq(query=query, dialect ='standard', project_id=project_id)
states.head(10)

Requesting query... ok.
Job ID: job_2_4VcClY6tldn8gSMmwrWwVOhDJL
Query running...
Query done.
Processed: 43.5 MB
Standard price: $0.00 USD

Retrieving results...
Got 51 rows.

Total time taken 1.99 s.
Finished at 2018-03-16 08:02:22.


Unnamed: 0,state_abbrev,state,num_players,num_people,players_per_person
0,ND,North Dakota,4,755393,5e-06
1,HI,Hawaii,8,1427538,6e-06
2,ME,Maine,10,1335907,7e-06
3,VT,Vermont,5,623657,8e-06
4,NM,New Mexico,23,2088070,1.1e-05
5,MT,Montana,14,1050493,1.3e-05
6,WY,Wyoming,8,579315,1.4e-05
7,MA,Massachusetts,96,6859819,1.4e-05
8,NH,New Hampshire,20,1342795,1.5e-05
9,CO,Colorado,84,5607154,1.5e-05


## Step 4

### Looks like four players have attempted three-pointers. Out of curiosity, out of those four, how many have made any three-pointers?

In [8]:
query = """
SELECT
  stats.state AS state_abbrev,
  pops.state,
  COUNT(player_id) AS num_players,
  pops.population_2017 AS num_people,
  COUNT(player_id) / pops.population_2017 AS players_per_person
FROM (
  SELECT
    player_id,
    birthplace_state AS state,
    SUM(three_points_made) AS threes_made
  FROM
    `bigquery-public-data.ncaa_basketball.mbb_players_games_sr`
  WHERE
    division_alias = "D1"
    AND birthplace_state IN ( 
      'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA', 'HI',
      'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MT', 'NE', 'NV', 'NH',
      'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'MD', 'MA', 'MI', 'MN',
      'MS', 'MO', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA',
      'WV', 'WI', 'WY')
  GROUP BY
    player_id,
    birthplace_state ) AS stats
LEFT JOIN
  `ncaa-basketball-extras.extras.state_population` AS pops
ON
  pops.abbreviation = stats.state
WHERE
  threes_made > 0
GROUP BY
  stats.state,
  pops.state,
  pops.population_2017
ORDER BY
  players_per_person ASC
"""

states = gbq.read_gbq(query=query, dialect ='standard', project_id=project_id)
states.head(10)

Requesting query... ok.
Job ID: job_z3tn_2Vhr9-m4jKBz3l9uYbxsTob
Query running...
Query done.
Processed: 43.5 MB
Standard price: $0.00 USD

Retrieving results...
Got 51 rows.

Total time taken 2.18 s.
Finished at 2018-03-16 08:02:25.


Unnamed: 0,state_abbrev,state,num_players,num_people,players_per_person
0,ND,North Dakota,3,755393,4e-06
1,HI,Hawaii,6,1427538,4e-06
2,ME,Maine,8,1335907,6e-06
3,VT,Vermont,4,623657,6e-06
4,NM,New Mexico,19,2088070,9e-06
5,MT,Montana,12,1050493,1.1e-05
6,WY,Wyoming,7,579315,1.2e-05
7,MA,Massachusetts,84,6859819,1.2e-05
8,SD,South Dakota,11,869666,1.3e-05
9,CO,Colorado,74,5607154,1.3e-05


## Only three out of the 10 players from North Dakota have made a three-pointer! North Dakota produces great three-point shooters, but it produces very, very few of them.