# Day 40

For today's work I think I'll start something new – Weekends in Tableau. I'll start with some dashboards aggregating player stats (and of course fantasy points) for the current season. I'll build onto it every weekend and see where it goes.

To accomplish this I'll first need to grab all the data I need to make an interesting and effective dashboard. I want to bring in not only basic stats like passing yards and touchdowns but also advanced metrics from Pro Football Focus and Next Gen Stats.  

I'll focus on the query today and clean it up tomorrow if need be.

In [1]:
import pandas as pd
import sqlite3

# Create database connection
conn = sqlite3.connect('../../data/db/database.db')

## Query the Data

In [2]:
query = """
WITH weekly_data AS (
    SELECT
        player_id,
        player_display_name AS player_name,
        position,
        recent_team,
        season,
        week,
        season_type,
        completions,
        attempts,
        passing_yards AS pass_yds,
        passing_tds AS pass_tds,
        interceptions as ints,
        sacks,
        passing_air_yards AS pass_air_yds,
        passing_yards_after_catch AS pass_yac,
        passing_epa AS pass_epa,
        passing_2pt_conversions AS pass_2pt_conv,
        pacr,
        carries,
        rushing_yards AS rush_yds,
        rushing_tds AS rush_tds,
        rushing_epa AS rush_epa, 
        receptions, 
        targets,
        receiving_yards AS rec_yds,
        receiving_tds AS rec_tds,
        receiving_air_yards AS rec_air_yds,
        receiving_yards_after_catch AS rec_yac,
        receiving_epa AS rec_epa,
        racr,
        target_share,
        air_yards_share,
        wopr,
        fantasy_points AS fantasy_pts,
        fantasy_points_ppr AS fantasy_pts_ppr
    FROM weekly
    WHERE season = 2022
        AND week <= 12
        AND position IN ('QB', 'WR', 'RB', 'TE')),
pfr_pass_data AS (
    SELECT
        season,
        week,
        opponent,
        pfr_player_name AS player_name,
        passing_bad_throws,
        passing_bad_throw_pct,
        times_sacked,
        times_blitzed,
        times_hurried,
        times_hit,
        times_pressured,
        times_pressured_pct,
        ids.gsis_id
    FROM pfr_pass
    JOIN ids
        ON ids.pfr_id = pfr_pass.pfr_player_id
    WHERE season = 2022
        AND week <= 12),
pfr_rec_data AS (
    SELECT
        season,
        week,
        opponent,
        pfr_player_name AS player_name,
        receiving_broken_tackles,
        receiving_drop,
        receiving_drop_pct,
        receiving_int,
        receiving_rat,
        ids.gsis_id
    FROM pfr_rec
    JOIN ids
        ON ids.pfr_id = pfr_rec.pfr_player_id
    WHERE season = 2022
        AND week <= 12),
pfr_rush_data AS (
    SELECT
        season,
        week,
        opponent,
        pfr_player_name AS player_name,
        carries,
        rushing_yards_before_contact AS rush_yds_before_contact,
        rushing_yards_before_contact_avg AS rush_yds_before_contact_avg,
        rushing_yards_after_contact AS rush_yds_after_contact,
        rushing_yards_after_contact_avg AS rush_yds_after_contact_avg,
        rushing_broken_tackles AS rush_broken_tackles,
        ids.gsis_id
    FROM pfr_rush
    JOIN ids
        ON ids.pfr_id = pfr_rush.pfr_player_id
    WHERE season = 2022
        AND week <= 12),
ngs_pass_data AS (
    SELECT
        season,
        week,
        player_display_name AS player_name,
        avg_time_to_throw,
        avg_completed_air_yards,
        avg_intended_air_yards,
        avg_air_yards_differential,
        aggressiveness,
        max_completed_air_distance,
        avg_air_yards_to_sticks,
        passer_rating,
        completion_percentage,
        expected_completion_percentage,
        completion_percentage_above_expectation,
        avg_air_distance,
        max_air_distance,
        player_gsis_id
    FROM ngs_pass
    WHERE season = 2022
        AND week BETWEEN 1 AND 12),
ngs_rec_data AS (
    SELECT
        season,
        week,
        player_display_name AS player_name,
        avg_cushion,
        avg_separation,
        avg_intended_air_yards,
        percent_share_of_intended_air_yards,
        receptions,
        targets,
        catch_percentage,
        yards,
        rec_touchdowns,
        avg_yac,
        avg_expected_yac,
        avg_yac_above_expectation,
        -- use as join_key
        player_gsis_id
    FROM ngs_rec
    WHERE season = 2022
        AND week BETWEEN 1 AND 12),
ngs_rush_data AS (
    SELECT
        season,
        week,
        player_display_name AS player_name,
        efficiency,
        percent_attempts_gte_eight_defenders,
        avg_time_to_los,
        rush_attempts,
        rush_yards,
        expected_rush_yards,
        rush_yards_over_expected,
        avg_rush_yards,
        rush_yards_over_expected_per_att,
        rush_pct_over_expected,
        rush_touchdowns,
        player_gsis_id
    FROM ngs_rush
    WHERE season = 2022
        AND week BETWEEN 1 AND 12),
snap_counts_data AS (
    SELECT
        season,
        week,
        player AS player_name,
        --opponent,
        offense_snaps,
        offense_pct,
        defense_snaps,
        defense_pct,
        st_snaps,
        st_pct,
        ids.gsis_id
    FROM snap_counts
    JOIN ids
        ON ids.pfr_id = snap_counts.pfr_player_id
    WHERE season = 2022
        AND week <= 12),
joined_tables AS (
    SELECT
        *
    FROM weekly_data
    LEFT JOIN pfr_pass_data
        ON pfr_pass_data.gsis_id = weekly_data.player_id
            AND pfr_pass_data.season = weekly_data.season
            AND pfr_pass_data.week = weekly_data.week
    LEFT JOIN pfr_rec_data
        ON pfr_rec_data.gsis_id = weekly_data.player_id
            AND pfr_rec_data.season = weekly_data.season
            AND pfr_rec_data.week = weekly_data.week
    LEFT JOIN pfr_rush_data
        ON pfr_rush_data.gsis_id = weekly_data.player_id
            AND pfr_rush_data.season = weekly_data.season
            AND pfr_rush_data.week = weekly_data.week
    LEFT JOIN ngs_pass_data
        ON ngs_pass_data.player_gsis_id = weekly_data.player_id
            AND ngs_pass_data.season = weekly_data.season
            AND ngs_pass_data.week = weekly_data.week
    LEFT JOIN ngs_rec_data
        ON ngs_rec_data.player_gsis_id = weekly_data.player_id
            AND ngs_rec_data.season = weekly_data.season
            AND ngs_rec_data.week = weekly_data.week
    LEFT JOIN ngs_rush_data
        ON ngs_rush_data.player_gsis_id = weekly_data.player_id
            AND ngs_rush_data.season = weekly_data.season
            AND ngs_rush_data.week = weekly_data.week
    LEFT JOIN snap_counts_data
        ON snap_counts_data.gsis_id = weekly_data.player_id
            AND snap_counts_data.season = weekly_data.season
            AND snap_counts_data.week = weekly_data.week
)
SELECT *
FROM joined_tables
"""

df = pd.read_sql(query, conn)
df.head(10)

Unnamed: 0,player_id,player_name,position,recent_team,season,week,season_type,completions,attempts,pass_yds,...,season:480322260,week:1064012515,player_name:2672220405,offense_snaps,offense_pct,defense_snaps,defense_pct,st_snaps,st_pct,gsis_id:3
0,00-0019596,Tom Brady,QB,TB,2022,1,REG,18,27,212.0,...,2022.0,1.0,Tom Brady,62.0,1.0,0.0,0.0,0.0,0.0,00-0019596
1,00-0019596,Tom Brady,QB,TB,2022,2,REG,18,34,190.0,...,2022.0,2.0,Tom Brady,67.0,1.0,0.0,0.0,1.0,0.04,00-0019596
2,00-0019596,Tom Brady,QB,TB,2022,3,REG,31,42,271.0,...,2022.0,3.0,Tom Brady,65.0,1.0,0.0,0.0,0.0,0.0,00-0019596
3,00-0019596,Tom Brady,QB,TB,2022,4,REG,39,52,385.0,...,2022.0,4.0,Tom Brady,64.0,1.0,0.0,0.0,0.0,0.0,00-0019596
4,00-0019596,Tom Brady,QB,TB,2022,5,REG,35,52,351.0,...,2022.0,5.0,Tom Brady,79.0,1.0,0.0,0.0,0.0,0.0,00-0019596
5,00-0019596,Tom Brady,QB,TB,2022,6,REG,25,40,243.0,...,2022.0,6.0,Tom Brady,72.0,1.0,0.0,0.0,0.0,0.0,00-0019596
6,00-0019596,Tom Brady,QB,TB,2022,7,REG,32,49,290.0,...,2022.0,7.0,Tom Brady,67.0,1.0,0.0,0.0,0.0,0.0,00-0019596
7,00-0019596,Tom Brady,QB,TB,2022,8,REG,26,44,325.0,...,2022.0,8.0,Tom Brady,68.0,1.0,0.0,0.0,0.0,0.0,00-0019596
8,00-0019596,Tom Brady,QB,TB,2022,9,REG,36,58,280.0,...,2022.0,9.0,Tom Brady,84.0,1.0,0.0,0.0,0.0,0.0,00-0019596
9,00-0019596,Tom Brady,QB,TB,2022,10,REG,22,29,258.0,...,2022.0,10.0,Tom Brady,75.0,1.0,0.0,0.0,0.0,0.0,00-0019596


In [7]:
for col in df.columns:
    print(col)

player_id
player_name
position
recent_team
season
week
season_type
completions
attempts
pass_yds
pass_tds
ints
sacks
pass_air_yds
pass_yac
pass_epa
pass_2pt_conv
pacr
carries
rush_yds
rush_tds
rush_epa
receptions
targets
rec_yds
rec_tds
rec_air_yds
rec_yac
rec_epa
racr
target_share
air_yards_share
wopr
fantasy_pts
fantasy_pts_ppr
season:1
week:1
opponent
player_name:1
passing_bad_throws
passing_bad_throw_pct
times_sacked
times_blitzed
times_hurried
times_hit
times_pressured
times_pressured_pct
gsis_id
season:2
week:2
opponent:1
player_name:2
receiving_broken_tackles
receiving_drop
receiving_drop_pct
receiving_int
receiving_rat
gsis_id:1
season:3
week:3
opponent:2
player_name:3
carries:1
rush_yds_before_contact
rush_yds_before_contact_avg
rush_yds_after_contact
rush_yds_after_contact_avg
rush_broken_tackles
gsis_id:2
season:4
week:4
player_name:4
avg_time_to_throw
avg_completed_air_yards
avg_intended_air_yards
avg_air_yards_differential
aggressiveness
max_completed_air_distance
avg_air_