**Notes for querying postgreSQL in the terminal**

Enter the database:
<br>
`psql baseball -h localhost -U lacar`

pw: 

Find the tables of the database:
<br>
`\dt`

Get all columns of a table
<br>
`\d+ my_table`

Selecting based on column value (note single quote marks)
<br>
`SELECT * FROM player_id
WHERE name_last='machado';`

Selecting based on two column values
<br>
`SELECT * FROM player_id
WHERE name_last='machado'
AND name_first='manny';`

Count the number of games that Machado has played 3B in 2019
<br>
Machado's MLB key: 592518

Tried several things

Gives all pitches:
`SELECT COUNT("game_date") FROM statcast
WHERE "fielder_5"=592518 AND "game_date"
BETWEEN '2019-01-01' AND '2019-12-31' LIMIT 5;`

To get unique game dates:
`SELECT DISTINCT "game_date" FROM statcast WHERE "fielder_5"=592518 AND "game_date" BETWEEN '2019-01-01' AND '2019-12-31' LIMIT 5;`

To get number of unique game dates (need the table AS):
<br>
`SELECT COUNT(*) FROM (SELECT DISTINCT "game_date" FROM statcast WHERE "fielder_5"=592518 AND "game_date" BETWEEN '2019-01-01' AND '2019-12-31' LIMIT 5) AS machado_3b_games;`

... and without limit
<br>
`SELECT COUNT(*) FROM (SELECT DISTINCT "game_date" FROM statcast WHERE "fielder_5"=592518 AND "game_date" BETWEEN '2019-01-01' AND '2019-12-31') AS machado_3b_games;`
 
 `count 
   119
(1 row)`

Games at SS:
<br>
`SELECT COUNT(*) FROM (SELECT DISTINCT "game_date" FROM statcast WHERE "fielder_6"=592518 AND "game_date" BETWEEN '2019-01-01' AND '2019-12-31') AS machado_3b_games;`
<br>
`count 
    37
(1 row)`




Try advanced query

# Example of how to format a long query

In [None]:
def return_df_metric_rate_batter(metric, batter_id):

    sql_query = """
    SELECT "game_date", "batter", "events" FROM statcast
    WHERE "batter"= 
    """ + str(batter_id) + """
    AND "game_date" BETWEEN '2019-03-28' AND '2019-04-30'
    AND "events" IS NOT NULL
    """

    df_events = pd.read_sql_query(sql_query,con)
    
    df_summary = df_events.groupby('events').count()['game_date'] / df_events.count()[0]
    return df_summary[metric]

# For practice

In [6]:
import pandas as pd

import sqlalchemy
import sqlalchemy_utils
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2

In [7]:
# Define a database name
# Set your postgres username
dbname = "baseball"
username = "lacar"  # change this to your username

In [9]:
# Working with PostgreSQL in Python
# Connect to make queries using psycopg2
con = None
con = psycopg2.connect(database=dbname, user=username)

In [10]:
## Get Padres stats for 2019


sql_query = """
SELECT "game_date", "batter", "events", "home_team" FROM statcast
WHERE "game_date" BETWEEN '2019-03-28' AND '2019-04-30'
AND "events" IS NOT NULL
AND "home_team"='SD'
LIMIT 10
"""

df_query = pd.read_sql_query(sql_query,con)
    
print(df_query)



   game_date    batter                     events home_team
0 2019-04-24  570267.0  grounded_into_double_play        SD
1 2019-04-24  596129.0                     single        SD
2 2019-04-24  571745.0                  field_out        SD
3 2019-04-24  571976.0                  strikeout        SD
4 2019-04-24  665487.0                  force_out        SD
5 2019-04-24  594824.0                     single        SD
6 2019-04-24  642336.0                  field_out        SD
7 2019-04-24  429665.0                  strikeout        SD
8 2019-04-24  605480.0                  field_out        SD
9 2019-04-24  592387.0                  field_out        SD


In [36]:
## Get batters of average WAR (Renfroe like) 2019

sql_query = """
SELECT "Name", "WAR" FROM batting_stats
WHERE "WAR" BETWEEN 1.5 and 2.5
"""

df_query = pd.read_sql_query(sql_query,con)
    
print(df_query)

                  Name  WAR
0            Josh Bell  2.5
1         Kole Calhoun  2.5
2    Edwin Encarnacion  2.5
3          Khris Davis  2.5
4          Nelson Cruz  2.5
..                 ...  ...
231      Freddy Galvis  1.5
232       Amed Rosario  1.5
233    Kevin Kiermaier  1.5
234        Kyle Seager  1.5
235       Juan Lagares  1.5

[236 rows x 2 columns]


In [47]:
## Get batters of average WAR (Renfroe like) 2019


sql_query = """
SELECT "Name", "WAR", "Season" FROM batting_stats
WHERE "WAR" BETWEEN 1.9 and 2
AND "Season"=2019

"""

df_query = pd.read_sql_query(sql_query,con)
#print(df_query)
df_query

Unnamed: 0,Name,WAR,Season
0,Evan Longoria,2.0,2019.0
1,Charlie Blackmon,2.0,2019.0
2,Tommy La Stella,2.0,2019.0
3,Andrew Benintendi,2.0,2019.0
4,Danny Santana,1.9,2019.0
5,Eric Thames,1.9,2019.0
6,Ryan Braun,1.9,2019.0
7,Jose Abreu,1.9,2019.0
8,Eloy Jimenez,1.9,2019.0
9,Carson Kelly,1.9,2019.0


In [54]:
## Get Pitchers of average WAR (1.9-2) 2019

sql_query = """
SELECT "Name", "WAR", "Season" FROM pitching_stats
WHERE "Name"='Masahiro Tanaka'"
AND "Season"=2019
"""

df_query = pd.read_sql_query(sql_query,con)
#print(df_query)
df_query

DatabaseError: Execution failed on sql '
SELECT "Name", "WAR", "Season" FROM pitching_stats
WHERE "Name"='Masahiro Tanaka'"
AND "Season"=2019
': syntax error at or near ""
AND ""
LINE 3: WHERE "Name"='Masahiro Tanaka'"
                                      ^


In [26]:
sql_query = """
SELECT "Name", "WAR" FROM batting_stats
WHERE "Name" LIKE '%Renfroe'
LIMIT 10
"""

df_query = pd.read_sql_query(sql_query,con)    
print(df_query)


             Name  WAR
0  Hunter Renfroe  1.9
1  Hunter Renfroe  1.6
2  Hunter Renfroe  0.2


In [55]:
## Get Pitchers of average WAR (1.9-2) 2019

sql_query = """
SELECT "Name", "WAR", "Season" FROM pitching_stats
WHERE "WAR" BETWEEN 1.9 AND 2
AND "Season"=2019
"""

df_query = pd.read_sql_query(sql_query,con)
#print(df_query)
df_query



Unnamed: 0,Name,WAR,Season
0,Domingo German,2.0,2019.0
1,Wade Miley,2.0,2019.0
2,Brett Anderson,2.0,2019.0
3,Merrill Kelly,2.0,2019.0
4,Ivan Nova,2.0,2019.0
5,Tanner Roark,2.0,2019.0
6,Aroldis Chapman,2.0,2019.0
7,Martin Perez,1.9,2019.0
8,Trent Thornton,1.9,2019.0
9,Daniel Norris,1.9,2019.0


In [None]:
sql_query = """
SELECT "game_date", "batter", "events" FROM statcast
WHERE "batter"= 
""" + str(batter_id) + """
AND "game_date" BETWEEN '2019-03-28' AND '2019-04-30'
AND "events" IS NOT NULL
"""

df_query = pd.read_sql_query(sql_query,con)
    
print(df_query)
