In [2]:
!pip install streamlit pandas numpy plotly seaborn pyngrok


Collecting streamlit
  Downloading streamlit-1.42.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.2-py2.py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m71.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m121.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_6

In [None]:
import pandas as pd

# Load the dataset
file_path = "premier-player-23-24.csv"
df = pd.read_csv(file_path)

# Display basic info
print(df.info())

# Display first few rows
print(df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 580 entries, 0 to 579
Data columns (total 35 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Player       580 non-null    object 
 1   Nation       580 non-null    object 
 2   Pos          580 non-null    object 
 3   NewPos       580 non-null    object 
 4   Age          580 non-null    int64  
 5   MP           580 non-null    int64  
 6   Starts       580 non-null    int64  
 7   Min          580 non-null    int64  
 8   90s          580 non-null    float64
 9   Gls          580 non-null    int64  
 10  Ast          580 non-null    int64  
 11  G+A          580 non-null    int64  
 12  G-PK         580 non-null    int64  
 13  PK           580 non-null    int64  
 14  PKatt        580 non-null    int64  
 15  CrdY         580 non-null    int64  
 16  CrdR         580 non-null    int64  
 17  xG           580 non-null    float64
 18  npxG         580 non-null    float64
 19  xAG     

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Cache data loading for performance
@st.cache_data
def load_data():
    # Load the dataset (assuming it's saved as 'player_stats.csv')
    df = pd.read_csv('premier-player-23-24.csv')
    # Ensure numerical columns are properly typed
    numeric_cols = ['Age', 'MP', 'Starts', 'Min', '90s', 'Gls', 'Ast', 'G+A', 'G-PK', 'PK', 'PKatt',
                    'CrdY', 'CrdR', 'xG', 'npxG', 'xAG', 'npxG+xAG', 'PrgC', 'PrgP', 'PrgR',
                    'Gls_90', 'Ast_90', 'G+A_90', 'G-PK_90', 'G+A-PK_90', 'xG_90', 'xAG_90',
                    'xG+xAG_90', 'npxG_90', 'npxG+xAG_90']
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

# Load the dataset
df = load_data()

# Title and Instructions
st.title("Football Player Statistics Analyzer")
st.markdown("""
**Welcome to the Football Player Statistics Analyzer!**
This app allows you to explore player statistics from the English Premier League 2023-2024 season. Use the filters in the sidebar to customize your view, then explore the sections below for insights like top scorers, assist leaders, and more.
- **How to Use**: Adjust the filters on the left (e.g., select teams, positions, or set an age range), then check the visualizations and tables in the main area.
- **Data Source**: Player statistics from the EPL 2023-2024 season.
- **Tip**: Scroll down to 'Stat Definitions' for explanations of each metric!
""")

# Sidebar Filters
st.sidebar.header("Filters")
teams = st.sidebar.multiselect("Select Teams", options=sorted(df['Team'].unique()), default=[])
positions = st.sidebar.multiselect("Select Positions", options=sorted(df['Pos'].unique()), default=[])
age_range = st.sidebar.slider("Age Range", min_value=int(df['Age'].min()), max_value=int(df['Age'].max()), value=(18, 40))
min_minutes = st.sidebar.number_input("Minimum Minutes Played", min_value=0, value=0, step=100)

# Filter the DataFrame
filtered_df = df[
    (df['Team'].isin(teams) if teams else True) &
    (df['Pos'].isin(positions) if positions else True) &
    (df['Age'].between(age_range[0], age_range[1])) &
    (df['Min'] >= min_minutes)
]

# Handle empty filtered DataFrame
if filtered_df.empty:
    st.warning("No players match the selected filters. Please adjust your selections.")
else:
    # Summary Statistics
    st.subheader("Summary Statistics")
    st.write(f"Total Players: {len(filtered_df)} | Total Goals: {filtered_df['Gls'].sum()} | Average xG: {filtered_df['xG'].mean():.2f}")

    # Top Scorers
    st.header("Top Scorers")
    top_scorers = filtered_df.sort_values('Gls', ascending=False).head(10)
    fig1 = px.bar(top_scorers, x='Player', y='Gls', color='Team', title="Top 10 Goal Scorers",
                  hover_data=['Team', 'Min'], text=top_scorers['Gls'],  # Add goal count on bars
                  color_discrete_sequence=px.colors.qualitative.Bold)  # Vibrant colors
    fig1.update_traces(textposition='auto')  # Position text neatly
    fig1.update_layout(showlegend=True)  # Ensure legend visibility
    st.plotly_chart(fig1)
    st.table(top_scorers[['Player', 'Team', 'Gls', 'xG', 'Min']])

    # Assist Leaders
    st.header("Assist Leaders")
    top_assisters = filtered_df.sort_values('Ast', ascending=False).head(10)
    fig2 = px.bar(top_assisters, x='Player', y='Ast', color='Team', title="Top 10 Assist Providers",
                  hover_data=['Team', 'Min'], text=top_assisters['Ast'],
                  color_discrete_sequence=px.colors.qualitative.Bold)
    fig2.update_traces(textposition='auto')
    fig2.update_layout(showlegend=True)
    st.plotly_chart(fig2)
    st.table(top_assisters[['Player', 'Team', 'Ast', 'xAG', 'Min']])

    # Goals vs. Expected Goals
    st.header("Goals vs. Expected Goals (xG)")
    fig3 = px.scatter(filtered_df, x='xG', y='Gls', text='Player', color='Team', size='Min',
                      title="Goals vs. xG", hover_data=['Team', 'Pos'],
                      color_discrete_sequence=px.colors.qualitative.Set2)
    fig3.update_traces(textposition='top center', marker=dict(opacity=0.7))  # Softer markers
    fig3.add_shape(type="line", x0=0, y0=0, x1=filtered_df['xG'].max(), y1=filtered_df['xG'].max(),
                   line=dict(color="gray", dash="dash"))
    st.plotly_chart(fig3)

    # Progressive Actions
    st.header("Progressive Actions")
    top_progressors = filtered_df.sort_values('PrgC', ascending=False).head(10)
    fig4 = px.bar(top_progressors, x='Player', y='PrgC', color='Team', title="Top 10 Progressive Carries",
                  hover_data=['Team', 'PrgP', 'Min'], text=top_progressors['PrgC'],
                  color_discrete_sequence=px.colors.qualitative.Bold)
    fig4.update_traces(textposition='auto')
    fig4.update_layout(showlegend=True)
    st.plotly_chart(fig4)
    st.table(top_progressors[['Player', 'Team', 'PrgC', 'PrgP', 'Min']])

    # NEW VISUALIZATION: Team Contribution Pie Chart
    st.header("Team Contribution to Goals and Assists")
    team_contribution = filtered_df.groupby('Team')[['Gls', 'Ast']].sum().reset_index()
    fig5 = px.pie(team_contribution, values='Gls', names='Team', title="Goal Contribution by Team",
                  color_discrete_sequence=px.colors.sequential.Viridis)
    fig5.update_traces(textinfo='percent+label', pull=[0.1 if i == team_contribution['Gls'].idxmax() else 0 for i in range(len(team_contribution))])  # Highlight top team
    st.plotly_chart(fig5)

    # NEW VISUALIZATION: Age Distribution Histogram
    st.header("Player Age Distribution")
    fig6 = px.histogram(filtered_df, x='Age', nbins=20, title="Distribution of Player Ages",
                        color_discrete_sequence=['#00CC96'], marginal="rug")  # Add rug plot for individual points
    fig6.update_layout(bargap=0.2)  # Add spacing between bars
    st.plotly_chart(fig6)

    # NEW VISUALIZATION: Position Breakdown Sunburst
    st.header("Position Breakdown")
    pos_team_counts = filtered_df.groupby(['Pos', 'Team']).size().reset_index(name='Count')
    fig7 = px.sunburst(pos_team_counts, path=['Pos', 'Team'], values='Count',
                       title="Players by Position and Team",
                       color='Count', color_continuous_scale='RdYlBu')
    st.plotly_chart(fig7)

    # Overperformers and Underperformers
    st.header("Overperformers and Underperformers")
    filtered_df['Gls_minus_xG'] = filtered_df['Gls'] - filtered_df['xG']
    overperformers = filtered_df.sort_values('Gls_minus_xG', ascending=False).head(5)
    underperformers = filtered_df.sort_values('Gls_minus_xG', ascending=True).head(5)
    st.subheader("Top Overperformers (Goals - xG)")
    st.table(overperformers[['Player', 'Team', 'Gls', 'xG', 'Gls_minus_xG']])
    st.subheader("Top Underperformers (Goals - xG)")
    st.table(underperformers[['Player', 'Team', 'Gls', 'xG', 'Gls_minus_xG']])

    # NEW VISUALIZATION: Heatmap of Key Stats
    st.header("Correlation Between Key Stats")
    key_stats = filtered_df[['Gls', 'Ast', 'xG', 'xAG', 'PrgC', 'PrgP', 'Min']].corr()
    fig8 = go.Figure(data=go.Heatmap(
        z=key_stats.values, x=key_stats.columns, y=key_stats.columns,
        colorscale='Viridis', text=key_stats.round(2).values, texttemplate="%{text}",
        hoverinfo="z"))
    fig8.update_layout(title="Correlation Heatmap of Key Statistics", width=600, height=600)
    st.plotly_chart(fig8)

    # Filtered Data Table
    st.header("Filtered Data")
    st.dataframe(filtered_df)

    # Download Button
    @st.cache_data
    def convert_df(df):
        return df.to_csv(index=False).encode('utf-8')

    csv = convert_df(filtered_df)
    st.download_button(
        label="Download Filtered Data as CSV",
        data=csv,
        file_name='filtered_player_stats.csv',
        mime='text/csv',
    )

# Stat Definitions
with st.expander("Stat Definitions"):
    st.markdown("""
    - **Gls**: Goals scored
    - **Ast**: Assists
    - **G+A**: Goals plus assists
    - **xG**: Expected goals
    - **npxG**: Non-penalty expected goals
    - **xAG**: Expected assisted goals
    - **PrgC**: Progressive carries (carries moving the ball towards the opponent's goal)
    - **PrgP**: Progressive passes (passes moving the ball towards the opponent's goal)
    - **Min**: Minutes played
    - **90s**: Number of 90-minute games played (Min / 90)
    - **CrdY**: Yellow cards
    - **CrdR**: Red cards
    - **Per 90 Stats** (e.g., Gls_90): Metric per 90 minutes played
    """)

Overwriting app.py


In [3]:
!pkill -f ngrok  # Kill any ngrok processes
!pkill -f streamlit  # Kill any Streamlit processes
from pyngrok import ngrok
ngrok.kill()  # Ensure pyngrok terminates all tunnels

In [None]:
from pyngrok import ngrok
import time

# Set your ngrok authtoken (replace the token below with your own if needed)
ngrok.set_auth_token("2tpvxNKhhrLQD5BEgr8L8oV9S1M_6JBhLtsWZ19TgAi19aFwr")

# Run Streamlit in the background
!streamlit run app.py &>/dev/null &

# Wait for Streamlit to start
time.sleep(5)

# Get the Streamlit port (default is 8501)
streamlit_port = 8501

# Create a tunnel to the Streamlit port
public_url = ngrok.connect(streamlit_port).public_url

# Display the public URL
print(f"Streamlit app is running at: {public_url}")


Streamlit app is running at: https://c2e8-35-231-145-231.ngrok-free.app
