In [2]:
import numpy as np
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
import os
POSTGRES_PASSWORD = os.environ['POSTGRES_PASSWORD']

In [4]:
engine = create_engine("postgresql+psycopg2://{user}:{pw}@{service}:{port}/{db}".format(
    user = 'postgres',
    pw = POSTGRES_PASSWORD,
    service = 'postgres', 
    port = '5432',
    db = 'nba'
))

In [11]:
myquery = '''
SELECT 
    game_date,
    season,
    ot, 
    2*ot AS super_ot
FROM games 
WHERE season=2022 AND ot>=2
ORDER BY ot DESC
LIMIT 20
'''
pd.read_sql_query(myquery, con = engine)
#order by sorts 

Unnamed: 0,game_date,season,ot,super_ot
0,2021-11-26,2022,3,6
1,2022-01-29,2022,3,6
2,2022-02-17,2022,2,4
3,2021-10-20,2022,2,4
4,2021-10-30,2022,2,4
5,2021-11-27,2022,2,4


From player_game table, make a table with all players who scored more than 30 points in the game. Keep only the columns with the game and player_id, points (pts), and 3-point shots made and attempted (fg3, fg3a). Sort by points in descending order and display just the top 15

In [13]:
myquery = '''
SELECT 
    game_id, 
    player_id, 
    fg3, 
    fg3a,
    pts
FROM player_game
WHERE pts>30
ORDER BY pts DESC
LIMIT 15
'''
pd.read_sql_query(myquery, con = engine)

Unnamed: 0,game_id,player_id,fg3,fg3a,pts
0,202101030GSW,curryst01,8,16,62
1,202001200POR,lillada01,11,20,61
2,202008110DAL,lillada01,9,17,61
3,202001200POR,lillada01,11,20,61
4,202008110DAL,lillada01,9,17,61
5,201911300HOU,hardeja01,8,14,60
6,202101060PHI,bealbr01,7,10,60
7,202104300BOS,tatumja01,5,7,60
8,201911080POR,lillada01,7,16,60
9,201911080POR,lillada01,7,16,60


# Joins
- inner - keeps what matches in both tables - deletes what doesn't match
- full - keeps everything whether or not it matches
- left - keeps everything in first table and only what matches in second
- right - keeps everything in second table and only what matches in first

In [20]:
myquery = '''
SELECT 
    g.game_date, 
    p.player,
    pg.fg3, 
    pg.fg3a,
    pg.pts
FROM player_game pg
INNER JOIN players p
    ON pg.player_id = p.player_id
INNER JOIN games g
    on pg.game_id = g.game_id
WHERE pg.pts>30
ORDER BY pg.pts DESC
LIMIT 15
'''
pd.read_sql_query(myquery, con = engine)

Unnamed: 0,game_date,player,fg3,fg3a,pts
0,2021-01-03,Stephen Curry,8,16,62
1,2020-01-20,Damian Lillard,11,20,61
2,2020-08-11,Damian Lillard,9,17,61
3,2020-01-20,Damian Lillard,11,20,61
4,2020-08-11,Damian Lillard,9,17,61
5,2019-11-30,James Harden,8,14,60
6,2021-01-06,Bradley Beal,7,10,60
7,2021-04-30,Jayson Tatum,5,7,60
8,2019-11-08,Damian Lillard,7,16,60
9,2019-11-08,Damian Lillard,7,16,60


In the team_game table, who won each game?
That will require you to join the team_game to itself (game_id to game_id and team_abbrev to opponent_abbrev)
Keep the game_id, the team and opponent abbreviation, the team score and the other team's score renamed to 'opponent score'

In [27]:
myquery = '''
SELECT 
    tg1.game_id,
    tg1.team_abbrev,
    tg1.opponent_abbrev,
    tg1.team_score,
    tg2.team_score AS opponent_score,
    (tg1.team_score > tg2.team_score) as win
FROM team_game tg1
INNER JOIN team_game tg2
    ON tg1.opponent_abbrev = tg2.team_abbrev 
    AND tg2.game_id = tg1.game_id
'''
pd.read_sql_query(myquery, con = engine)

Unnamed: 0,game_id,team_abbrev,opponent_abbrev,team_score,opponent_score,win
0,202202170BRK,WAS,BRK,117,103,True
1,202202170BRK,BRK,WAS,103,117,False
2,202202170CHO,MIA,CHO,111,107,True
3,202202170CHO,CHO,MIA,107,111,False
4,202202170LAC,HOU,LAC,111,142,False
...,...,...,...,...,...,...
6389,202002270GSW,GSW,LAL,86,116,False
6390,202002290PHO,GSW,PHO,115,99,True
6391,202003010GSW,GSW,WAS,110,124,False
6392,202003030DEN,GSW,DEN,116,100,True
