In [None]:
pip install streamlit

In [None]:
pip install nfl_data_py

In [None]:
pip install duckdb

In [None]:
pip install plotly

In [None]:
import nfl_data_py as nfl  # NFL data retrieval and analysis (via https://pypi.org/project/nfl-data-py/)
import pandas as pd  # Data manipulation and analysis
import duckdb # Used to write SQL inside Python script
import plotly.express as px # Used to create Python visualizations
import plotly.graph_objects as go # Used to create Python visualizations

In [None]:
# Make dataframe output easier to read
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Allow horizontal scrolling
pd.set_option('display.max_rows', None)  # Show all rows

# Define lists for years and columns that we can use to filter the large play-by-play data set.
years = [2020,2021,2022,2023]
columns = ['season','game_id','drive','series','series_result','fixed_drive_result','desc','weather','roof','surface','temp','wind','kicker_player_id','kickoff_returner_player_id','penalty','return_team','return_yards','penalty_player_id','penalty_type','penalty_yards','end_yard_line','kickoff_inside_twenty','kickoff_in_endzone','kickoff_out_of_bounds','kickoff_downed','kickoff_fair_catch','kick_distance','fumble_lost','drive_start_yard_line','touchdown','defteam','play_type','play_deleted']

# Save NFL play by play data sets to dataframe
df_game_log = nfl.import_pbp_data(years, columns, downcast=True, cache=False, alt_path=None) # Import play-by-play data set
df_players = nfl.import_seasonal_rosters(years) # Import NFL rosters for each year requested
df_teams = nfl.import_team_desc() # Import NFL team information

# Use SQL to create main kickoff data set
kickoffs = duckdb.sql("""select
                            a.season,
                            a.game_id,
                            a.drive,
                            a.series,
                            a.series_result,
                            a.fixed_drive_result,
                            a.desc,
                            a.weather,
                            a.roof,
                            a.surface,
                            a.temp,
                            a.wind,
                            a.kicker_player_id,
                            a.kickoff_returner_player_id,
                            a.penalty,
                            a.return_team,
                            b.team_name,
                            'https://a.espncdn.com/combiner/i?img=/i/teamlogos/nfl/500/' || a.return_team || '.png&h=200&w=200' as team_logo_espn,
                            a.return_yards,
                            a.penalty_player_id,
                            a.penalty_type,
                            a.penalty_yards,
                            a.end_yard_line,
                            a.kickoff_inside_twenty,
                            a.kickoff_in_endzone,
                            a.kickoff_out_of_bounds,
                            a.kickoff_downed,
                            a.kickoff_fair_catch,
                            a.kick_distance,
                            case when lower(a.desc) like '%injur%' then 1 else 0 end as injury,
                            a.fumble_lost,
                            a.return_team,
                            a.end_yard_line,
                            a.drive_start_yard_line,
                            a.touchdown,
                            case
                                when touchdown=1 then 100
                            	when a.drive_start_yard_line not like '%' || return_team || '%' then 50 + (50 - cast(replace(a.drive_start_yard_line,defteam||' ','') as float))
                            	else cast(replace(a.drive_start_yard_line,return_team||' ','') as float)
                            end as yardline_100
                            
                            from 
                            df_game_log a inner join
                            df_teams b on a.return_team = b.team_abbr
                            
                            where
                            play_type in ('kickoff') and 
                            a.play_deleted=0 and 
                            return_team is not null
                            """).df()

# Use SQL to summarize/aggregate main kickoff data set, kickoffs, at the season level
kickoffs_agg = duckdb.sql("""select 
                                season,
                                count(*) as number_kickoffs,
                                avg(yardline_100) as avg_starting_position,
                                sum(case when touchdown=1 then 1 else 0 end)/(count(*)*1.0) as touchdown_return_rate,
                                sum(case when fixed_drive_result in ('Field goal','Touchdown') then 1 else 0 end)/(count(*)*1.0) as touchdown_rate_on_drives_following_kickoffs,
                                sum(injury)/(count(*)*1.0) as injury_rate,
                                sum(case when kickoff_returner_player_id is not null then 1 else 0 end)/(count(*)*1.0) as return_rate
                                
                                from
                                kickoffs
                                
                                group by
                                season""").df()

# Use SQL to summarize/aggregate main kickoff data set, kickoffs, at the team level
kickoffs_team_agg = duckdb.sql("""select 
                                team_name,
                                team_logo_espn as url,
                                0.05 as team_logo_size,
                                count(*) as number_kickoffs,
                                avg(yardline_100) as avg_starting_position,
                                sum(case when touchdown=1 then 1 else 0 end)/(count(*)*1.0) as touchdown_return_rate,
                                sum(case when fixed_drive_result in ('Field goal','Touchdown') then 1 else 0 end)/(count(*)*1.0) as touchdown_rate_on_drives_following_kickoffs,
                                sum(injury)/(count(*)*1.0) as injury_rate,
                                sum(case when kickoff_returner_player_id is not null then 1 else 0 end)/(count(*)*1.0) as return_rate
                                
                                from
                                kickoffs a inner join
                                (select max(season) as max_season
                                   from kickoffs) b on a.season = b.max_season
                                
                                group by
                                team_name,
                                team_logo_espn,
                                0.05""").df()

# Print team aggregate data set
kickoffs_team_agg

In [None]:
# Define values that will map to the x axis, y axis, and team logos
x = kickoffs_team_agg['avg_starting_position']
y = kickoffs_team_agg['touchdown_rate_on_drives_following_kickoffs']
image_urls = kickoffs_team_agg['url']

# Create a basic scatter plot
fig = go.Figure()

# Add scatter points with invisible markers (we'll replace them with images)
fig.add_trace(go.Scatter(
    x=x, # Map 'x' data set to x axis
    y=y, # Map 'y' data set to y axis
    mode='markers',
    marker=dict(opacity=0)  # Make markers invisible
))

# Add images to the scatter plot by looping through all the records from the team data set
for i, url in enumerate(image_urls): # for each record in our image_urls data set defined above, add an image with the below properties
    fig.add_layout_image(
        dict(
            source=url, # Identifies the source field to use as images from the image_urls data set
            xref="x", # Determines which axis the x position of the image is relative to. When set to "x", the x coordinate of the image is relative to the x-axis of the plot.
            yref="y", # Determines which axis the y position of the image is relative to. When set to "y", the y coordinate of the image is relative to the y-axis of the plot.
            x=x[i],  # x position of the image
            y=y[i],  # y position of the image
            sizex=0.1,  # Adjust size as needed
            sizey=0.1 , # Adjust size as needed
            xanchor="center", # Adjust the alignment of the image as needed for the x-axis
            yanchor="middle" # Adjust the alignment of the image as needed for the y-axis
        )
    )

# Customize layout
fig.update_layout(
    title="Scoring Rate by Average Starting Field Position", # Assign visual title
    xaxis_title="Average Starting Field Position", # Assign x-axis title
    yaxis_title="Scoring Rate (% of Drives Following Kickoff with a Score)", # Assign y-axis title
    height=800,  # Set the height of the chart
    xaxis=dict( # Apply data formatting to the x-axis (rounded to 1 decimal)
        showgrid=True,
        tickformat='.1f'  # Format x-axis labels to one decimal place
    ),
    yaxis=dict( # Apply data formatting to the y-axis (% rounded to 1 decimal)
        showgrid=True,
        tickformat='.1%'
    )
)

# Display the plot
fig.show()

# Display the plot in Streamlit (future use)
#st.plotly_chart(fig)