# Day 39

I'm adding onto yesterday's query by filtering for number 1 overall draft picks that finished outside the 1st round. I bet most of those finishes are due to injury so I will also join against the injury table I have from the nflverse data in my database.

In [1]:
import pandas as pd
import sqlite3

# Create database connection
conn = sqlite3.connect('../../data/db/database.db')

## Query the Data

In [9]:
query = """
WITH adp_cleaned AS (
    SELECT
        scoring,
        season,
        teams AS league_size,
        CASE
            WHEN player_id = 2071 AND name = 'CJ Anderson' THEN 'C.J. Anderson'
            ELSE name
        END AS player_name,
        position AS pos,
        /*
        ADP dataset has the most recent team that a player has played for, regardless of season.
        So I'll need to update teams for certain player-season combinations in order to join
        against the nflverse data which contains the player's fantasy points by season
        */
        CASE
            WHEN player_id = 1347 AND name = 'Chris Johnson' THEN 'TEN'
            WHEN player_id = 1340 AND name = 'Fred Jackson' AND season < 2015 THEN 'BUF'
            WHEN player_id = 1340 AND name = 'Fred Jackson' AND season = 2015 THEN 'SEA'
            WHEN player_id = 1728 AND name = 'C.J. Spiller' AND season < 2014 THEN 'BUF'
            WHEN player_id = 1728 AND name = 'C.J. Spiller' AND season = 2015 THEN 'NO'
            WHEN player_id = 649 AND name = 'Maurice Jones-Drew' AND season < 2014 THEN 'JAX'
            ELSE team
        END AS team,
        adp_formatted,
        round,
        pick
    FROM adp),
season_summary AS (
    SELECT
        player_id,
        -- Data Cleaning for future join against ADP table
        CASE
            WHEN player_display_name = "Le'Veon Bell" THEN "LeVeon Bell"
            WHEN player_display_name = "Odell Beckham" THEN "Odell Beckham Jr"
            WHEN player_display_name = "Mike Vick" THEN "Michael Vick"
            WHEN player_display_name = "DK Metcalf" THEN "D.K. Metcalf"
            ELSE player_display_name
        END AS player_name,
        season,
        -- Data Cleaning for future join against ADP table
        CASE 
            WHEN player_id = "00-0029675" AND player_display_name = "Trent Richardson" THEN "RB"
            ELSE position
        END AS pos,
        -- Data Cleaning for future join against ADP table
        CASE
            WHEN recent_team = 'LA' THEN 'LAR'
            ELSE recent_team
        END AS team,
        ROUND(SUM(fantasy_points), 2) AS tot_pts,
        ROUND(SUM(fantasy_points_ppr), 2) AS tot_pts_ppr
    FROM 
        /* 
        The ADP table that I will eventually join against has the last team a player
        played for in a given season. In the case of players that were traded 
        at least once in a season, if I group by player name with player-week records in
        ascending order (by week) I'll get their earliest team. So I need to order by 
        most recent week to get the last team they played for when I aggregate.
        */
        (
            SELECT  *
            FROM weekly
            ORDER BY week DESC 
        ) AS weekly
    WHERE season_type = 'REG' 
        AND player_display_name IS NOT NULL
    GROUP BY player_id, player_display_name, season),
rankings AS (
    SELECT
        *,
        RANK() OVER(PARTITION BY season ORDER BY tot_pts DESC) AS r_pts,
        RANK() OVER(PARTITION BY season ORDER BY tot_pts_ppr DESC) AS r_pts_ppr
    FROM season_summary), 
joined AS (   
    SELECT
        adp_cleaned.*, 
        rankings.*, 
        weeks_injured
    FROM adp_cleaned
    LEFT JOIN rankings
        ON rankings.player_name = adp_cleaned.player_name
        AND rankings.season = adp_cleaned.season
        -- Additional join keys added in case multiple players have same name
        AND rankings.pos = adp_cleaned.pos
        AND rankings.team = adp_cleaned.team
    LEFT JOIN (
        SELECT *, COUNT(*) AS weeks_injured
        FROM injuries
        WHERE practice_status LIKE 'OUT%'
            OR practice_status LIKE 'Did Not Participate%'
        GROUP BY season, full_name, team, position
    ) AS injuries
        ON injuries.season = adp_cleaned.season
        AND injuries.full_name = adp_cleaned.player_name
        -- Additional join keys added in case multiple players have same name
        AND injuries.team = adp_cleaned.team
        AND injuries.position = adp_cleaned.pos),
first_and_second_round_picks AS (
    SELECT
        scoring,
        season,
        league_size,
        player_id,
        player_name,
        pos,
        team,
        adp_formatted,
        tot_pts_ppr,
        r_pts,
        r_pts_ppr,
        weeks_injured
    FROM joined
    WHERE round = 1
        OR round = 2)
SELECT * 
FROM first_and_second_round_picks
WHERE scoring = 'ppr'
    AND league_size = 10
"""

df = pd.read_sql(query, conn)
df.head(10)

Unnamed: 0,scoring,season,league_size,player_id,player_name,pos,team,adp_formatted,tot_pts_ppr,r_pts,r_pts_ppr,weeks_injured
0,ppr,2010,10,00-0026164,Chris Johnson,RB,TEN,1.01,272.9,17.0,17.0,
1,ppr,2010,10,00-0025394,Adrian Peterson,RB,MIN,1.02,275.9,11.0,15.0,
2,ppr,2010,10,00-0026195,Ray Rice,RB,BAL,1.03,276.6,23.0,14.0,
3,ppr,2010,10,00-0024275,Maurice Jones-Drew,RB,JAX,1.05,234.1,33.0,35.0,2.0
4,ppr,2010,10,00-0022044,Andre Johnson,WR,HOU,1.05,256.6,44.0,23.0,3.0
5,ppr,2010,10,00-0023500,Frank Gore,RB,SF,1.06,202.5,58.0,56.0,
6,ppr,2010,10,00-0023459,Aaron Rodgers,QB,GB,1.08,304.48,3.0,4.0,1.0
7,ppr,2010,10,00-0011754,Randy Moss,WR,TEN,1.08,97.3,173.0,178.0,
8,ppr,2010,10,00-0022821,Michael Turner,RB,ATL,1.09,225.6,23.0,42.0,
9,ppr,2010,10,00-0020498,Reggie Wayne,WR,IND,2.01,280.5,45.0,12.0,1.0
