# Day 47

I'm continuing my Weekends in Tableau series by updating my dataset to reflect the latest games and updating my "viz" to include other positions and their relevant stats.  

I want to document the daily work in this notebook but the query itself should be added to `/etl/process_tables.py` since the skeleton of the query won't change on a weekly basis.

In [3]:
import pandas as pd
import sqlite3

# Create database connection
conn = sqlite3.connect('../../data/db/database.db')

## Query the Data

I'll change the week parameter to reflect the latest week of NFL data available.

In [4]:
latest_week = 13

In [7]:
query = f"""
WITH weekly_data AS (
    SELECT
        player_id,
        player_display_name AS player_name,
        position,
        recent_team,
        season,
        week,
        season_type,
        sacks,
        passing_air_yards AS pass_air_yds,
        passing_yards_after_catch AS pass_yac,
        passing_epa AS pass_epa,
        passing_2pt_conversions AS pass_2pt_conv,
        pacr,
        rushing_epa AS rush_epa, 
        receiving_air_yards AS rec_air_yds,
        receiving_yards_after_catch AS rec_yac,
        receiving_epa AS rec_epa,
        racr,
        target_share,
        air_yards_share,
        wopr,
        fantasy_points AS fantasy_pts,
        fantasy_points_ppr AS fantasy_pts_ppr
    FROM weekly
    WHERE season = 2022
        AND week <= {latest_week}
        AND position IN ('QB', 'WR', 'RB', 'TE')),
pfr_pass_data AS (
    SELECT
        season,
        week,
        opponent,
        pfr_player_name AS player_name,
        passing_bad_throws,
        passing_bad_throw_pct,
        times_sacked,
        times_blitzed,
        times_hurried,
        times_hit,
        times_pressured,
        times_pressured_pct,
        ids.gsis_id
    FROM pfr_pass
    JOIN ids
        ON ids.pfr_id = pfr_pass.pfr_player_id
    WHERE season = 2022
        AND week <= {latest_week}),
pfr_rec_data AS (
    SELECT
        season,
        week,
        opponent,
        pfr_player_name AS player_name,
        receiving_broken_tackles,
        receiving_drop,
        receiving_drop_pct,
        receiving_int,
        receiving_rat,
        ids.gsis_id
    FROM pfr_rec
    JOIN ids
        ON ids.pfr_id = pfr_rec.pfr_player_id
    WHERE season = 2022
        AND week <= {latest_week}),
pfr_rush_data AS (
    SELECT
        season,
        week,
        opponent,
        pfr_player_name AS player_name,
        carries,
        rushing_yards_before_contact AS rush_yds_before_contact,
        rushing_yards_before_contact_avg AS rush_yds_before_contact_avg,
        rushing_yards_after_contact AS rush_yds_after_contact,
        rushing_yards_after_contact_avg AS rush_yds_after_contact_avg,
        rushing_broken_tackles AS rush_broken_tackles,
        ids.gsis_id
    FROM pfr_rush
    JOIN ids
        ON ids.pfr_id = pfr_rush.pfr_player_id
    WHERE season = 2022
        AND week <= {latest_week}),
ngs_pass_data AS (
    SELECT
        season,
        week,
        player_display_name AS player_name,
        attempts,
        pass_yards,
        pass_touchdowns AS pass_tds,
        interceptions,
        avg_time_to_throw,
        avg_completed_air_yards,
        avg_air_yards_differential,
        aggressiveness,
        max_completed_air_distance,
        avg_air_yards_to_sticks,
        passer_rating,
        completions,
        completion_percentage,
        expected_completion_percentage,
        completion_percentage_above_expectation,
        avg_air_distance,
        max_air_distance,
        player_gsis_id
    FROM ngs_pass
    WHERE season = 2022
        AND week BETWEEN 1 AND {latest_week}),
ngs_rec_data AS (
    SELECT
        season,
        week,
        player_display_name AS player_name,
        avg_cushion,
        avg_separation,
        avg_intended_air_yards,
        percent_share_of_intended_air_yards,
        receptions,
        targets,
        catch_percentage,
        yards,
        rec_touchdowns,
        avg_yac,
        avg_expected_yac,
        avg_yac_above_expectation,
        player_gsis_id
    FROM ngs_rec
    WHERE season = 2022
        AND week BETWEEN 1 AND {latest_week}),
ngs_rush_data AS (
    SELECT
        season,
        week,
        player_display_name AS player_name,
        efficiency,
        percent_attempts_gte_eight_defenders,
        avg_time_to_los,
        rush_attempts,
        rush_yards,
        expected_rush_yards,
        rush_yards_over_expected,
        avg_rush_yards,
        rush_yards_over_expected_per_att,
        rush_pct_over_expected,
        rush_touchdowns,
        player_gsis_id
    FROM ngs_rush
    WHERE season = 2022
        AND week BETWEEN 1 AND {latest_week}),
snap_counts_data AS (
    SELECT
        season,
        week,
        player AS player_name,
        --opponent,
        offense_snaps,
        offense_pct AS offense_snaps_pct,
        defense_snaps,
        defense_pct AS defense_snaps_pct,
        st_snaps,
        st_pct AS st_snaps_pct,
        ids.gsis_id
    FROM snap_counts
    JOIN ids
        ON ids.pfr_id = snap_counts.pfr_player_id
    WHERE season = 2022
        AND week <= {latest_week}),
joined_tables AS (
    SELECT
        player_id,
        weekly_data.player_name,
        weekly_data.position,
        recent_team,
        weekly_data.season,
        weekly_data.week,
        season_type,
        sacks,
        pass_air_yds,
        pass_yac,
        pass_epa,
        pass_2pt_conv,
        pacr,
        rush_epa, 
        rec_air_yds,
        rec_yac,
        rec_epa,
        racr,
        target_share,
        air_yards_share,
        wopr,
        fantasy_pts,
        fantasy_pts_ppr,
        passing_bad_throws,
        passing_bad_throw_pct,
        times_sacked,
        times_blitzed,
        times_hurried,
        times_hit,
        times_pressured,
        times_pressured_pct,
        receiving_broken_tackles,
        receiving_drop,
        receiving_drop_pct,
        receiving_int,
        receiving_rat,
        carries,
        rush_yds_before_contact,
        rush_yds_before_contact_avg,
        rush_yds_after_contact,
        rush_yds_after_contact_avg,
        rush_broken_tackles,
        attempts,
        pass_yards,
        pass_tds,
        interceptions,
        avg_time_to_throw,
        avg_completed_air_yards,
        avg_air_yards_differential,
        aggressiveness,
        max_completed_air_distance,
        avg_air_yards_to_sticks,
        passer_rating,
        completions,
        completion_percentage,
        expected_completion_percentage,
        completion_percentage_above_expectation,
        avg_air_distance,
        max_air_distance,
        avg_cushion,
        avg_separation,
        avg_intended_air_yards,
        percent_share_of_intended_air_yards,
        receptions,
        targets,
        catch_percentage,
        yards,
        rec_touchdowns,
        avg_yac,
        avg_expected_yac,
        avg_yac_above_expectation,
        efficiency,
        percent_attempts_gte_eight_defenders,
        avg_time_to_los,
        rush_attempts,
        rush_yards,
        expected_rush_yards,
        rush_yards_over_expected,
        avg_rush_yards,
        rush_yards_over_expected_per_att,
        rush_pct_over_expected,
        rush_touchdowns,
        offense_snaps,
        offense_snaps_pct,
        defense_snaps,
        defense_snaps_pct,
        st_snaps,
        st_snaps_pct
    FROM weekly_data
    LEFT JOIN pfr_pass_data
        ON pfr_pass_data.gsis_id = weekly_data.player_id
            AND pfr_pass_data.season = weekly_data.season
            AND pfr_pass_data.week = weekly_data.week
    LEFT JOIN pfr_rec_data
        ON pfr_rec_data.gsis_id = weekly_data.player_id
            AND pfr_rec_data.season = weekly_data.season
            AND pfr_rec_data.week = weekly_data.week
    LEFT JOIN pfr_rush_data
        ON pfr_rush_data.gsis_id = weekly_data.player_id
            AND pfr_rush_data.season = weekly_data.season
            AND pfr_rush_data.week = weekly_data.week
    LEFT JOIN ngs_pass_data
        ON ngs_pass_data.player_gsis_id = weekly_data.player_id
            AND ngs_pass_data.season = weekly_data.season
            AND ngs_pass_data.week = weekly_data.week
    LEFT JOIN ngs_rec_data
        ON ngs_rec_data.player_gsis_id = weekly_data.player_id
            AND ngs_rec_data.season = weekly_data.season
            AND ngs_rec_data.week = weekly_data.week
    LEFT JOIN ngs_rush_data
        ON ngs_rush_data.player_gsis_id = weekly_data.player_id
            AND ngs_rush_data.season = weekly_data.season
            AND ngs_rush_data.week = weekly_data.week
    LEFT JOIN snap_counts_data
        ON snap_counts_data.gsis_id = weekly_data.player_id
            AND snap_counts_data.season = weekly_data.season
            AND snap_counts_data.week = weekly_data.week
)
SELECT *
FROM joined_tables
"""

df = pd.read_sql(query, conn)
df.tail(10)

Unnamed: 0,player_id,player_name,position,recent_team,season,week,season_type,sacks,pass_air_yds,pass_yac,...,avg_rush_yards,rush_yards_over_expected_per_att,rush_pct_over_expected,rush_touchdowns,offense_snaps,offense_snaps_pct,defense_snaps,defense_snaps_pct,st_snaps,st_snaps_pct
3766,00-0038134,Kenneth Walker,RB,SEA,2022,3,REG,0.0,0.0,0.0,...,,,,,,,,,,
3767,00-0038134,Kenneth Walker,RB,SEA,2022,4,REG,0.0,0.0,0.0,...,,,,,,,,,,
3768,00-0038134,Kenneth Walker,RB,SEA,2022,5,REG,0.0,0.0,0.0,...,,,,,,,,,,
3769,00-0038134,Kenneth Walker,RB,SEA,2022,6,REG,0.0,0.0,0.0,...,4.619048,1.813149,0.35,1.0,,,,,,
3770,00-0038134,Kenneth Walker,RB,SEA,2022,7,REG,0.0,0.0,0.0,...,7.26087,3.748078,0.478261,2.0,,,,,,
3771,00-0038134,Kenneth Walker,RB,SEA,2022,8,REG,0.0,0.0,0.0,...,2.833333,-1.306878,0.222222,1.0,,,,,,
3772,00-0038134,Kenneth Walker,RB,SEA,2022,9,REG,0.0,0.0,0.0,...,4.192308,-0.212514,0.346154,2.0,,,,,,
3773,00-0038134,Kenneth Walker,RB,SEA,2022,10,REG,0.0,0.0,0.0,...,1.7,-3.612524,0.0,0.0,,,,,,
3774,00-0038134,Kenneth Walker,RB,SEA,2022,12,REG,0.0,0.0,0.0,...,1.857143,-2.561676,0.153846,2.0,,,,,,
3775,00-0038134,Kenneth Walker,RB,SEA,2022,13,REG,0.0,0.0,0.0,...,,,,,,,,,,


In [8]:
df.to_csv('../../data/output/for_tableau_all_data_week_13.csv', index=False)