In [None]:
%%writefile ../app/main.py
import streamlit as st
import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))

from pages import data_exploration, model_training, shot_prediction
from components.sidebar import sidebar

st.set_page_config(page_title="NBA Shot Predictor", page_icon="🏀", layout="wide")

def main():
    st.title("NBA Shot Predictor 🏀")
    
    page = sidebar()
    
    if page == "Data Exploration":
        data_exploration.run()
    elif page == "Model Training":
        model_training.run()
    elif page == "Shot Prediction":
        shot_prediction.run()

if __name__ == "__main__":
    main()


In [None]:
%%writefile ../app/components/sidebar.py
import streamlit as st

def sidebar():
    st.sidebar.title("Navigation")
    return st.sidebar.radio("Go to", ["Data Exploration", "Model Training", "Shot Prediction"])


In [None]:
%%writefile ../app/pages/data_exploration.py

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from data.data_loader import fetch_shots_data
from visualization.plot_utils import plot_court

def run():
    st.header("Data Exploration")
    
    team_name = st.selectbox("Select Team", ["Boston Celtics", "Los Angeles Lakers", "Golden State Warriors"])
    season = st.selectbox("Select Season", ["2023-24", "2022-23", "2021-22"])
    
    shots = fetch_shots_data(team_name, True, season)
    
    st.subheader("Raw Data")
    st.dataframe(shots.head())
    
    st.subheader("Shot Distribution")
    fig, ax = plt.subplots(figsize=(12, 11))
    plot_court(ax)
    ax.scatter(shots['LOC_X'], shots['LOC_Y'], alpha=0.5)
    st.pyplot(fig)
    
    st.subheader("Shot Success Rate by Distance")
    distance_success = shots.groupby('SHOT_DISTANCE').agg({
        'SHOT_MADE_FLAG': ['count', 'mean']
    })
    distance_success.columns = ['Total Shots', 'Success Rate']
    distance_success = distance_success[distance_success['Total Shots'] > 10]
    st.line_chart(distance_success['Success Rate'])
    
    st.subheader("Top Scorers")
    top_scorers = shots.groupby('PLAYER_NAME').agg({
        'SHOT_MADE_FLAG': ['count', 'sum']
    })
    top_scorers.columns = ['Total Shots', 'Made Shots']
    top_scorers['Points'] = top_scorers['Made Shots'] * 2  # Simplification, not accounting for 3-pointers
    top_scorers = top_scorers.sort_values('Points', ascending=False).head(10)
    st.bar_chart(top_scorers['Points'])


In [None]:
%%writefile ../app/pages/model_training.py
import streamlit as st
import pandas as pd
from data.data_loader import fetch_shots_data
from features.feature_engineering import engineer_features, prepare_features
from models.model_trainer import train_model, save_model

def run():
    st.header("Model Training")
    
    team_name = st.selectbox("Select Team for Training", ["Boston Celtics", "Los Angeles Lakers", "Golden State Warriors"])
    season = st.selectbox("Select Season for Training", ["2023-24", "2022-23", "2021-22"])
    
    selected_models = st.multiselect(
        "Select Models to Include in Ensemble",
        options=["XGB", "LGBM", "RF", "MLP"],
        default=["XGB", "LGBM", "RF"]
    )
    
    if st.button("Train Model"):
        with st.spinner("Fetching data and training model..."):
            shots = fetch_shots_data(team_name, True, season)
            shots_encoded = engineer_features(shots)
            X, y = prepare_features(shots_encoded)
            model, scaler, imputer = train_model(X, y, selected_models, {})
            
            save_model(model, scaler, imputer, f'../models/{team_name.replace(" ", "_")}_{season}_model.joblib')
        
        st.success(f"Model for {team_name} ({season}) trained and saved successfully!")
        
        st.subheader("Feature Importance")
        feature_importance = pd.DataFrame({
            'feature': X.columns,
            'importance': model.best_estimator_.named_steps['votingclassifier'].estimators_[0].feature_importances_
        }).sort_values('importance', ascending=False).head(10)
        st.bar_chart(feature_importance.set_index('feature'))


In [None]:
%%writefile ../app/pages/shot_prediction.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from data.data_loader import fetch_shots_data
from features.feature_engineering import engineer_features, prepare_features
from models.model_trainer import load_model
from models.model_predictor import predict_shot
from visualization.plot_utils import plot_court, plot_shots_with_predictions
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

def run():
    st.header("Shot Prediction")
    
    team_name = st.selectbox("Select Team", ["Boston Celtics", "Los Angeles Lakers", "Golden State Warriors"])
    season = st.selectbox("Select Season", ["2023-24", "2022-23", "2021-22"])
    
    shots = fetch_shots_data(team_name, True, season)
    game_ids = shots['GAME_ID'].unique()
    selected_game = st.selectbox("Select Game for Prediction", game_ids)
    
    if st.button("Predict Shots"):
        with st.spinner("Loading model and predicting shots..."):
            model, scaler, imputer = load_model(f'../models/{team_name.replace(" ", "_")}_{season}_model.joblib')
            
            train_shots = shots[shots['GAME_ID'] != selected_game]
            test_shots = shots[shots['GAME_ID'] == selected_game]
            
            train_encoded = engineer_features(train_shots)
            X_train, y_train = prepare_features(train_encoded)
            
            test_encoded = engineer_features(test_shots)
            X_test, y_test = prepare_features(test_encoded)
            
            predictions = predict_shot(model, scaler, imputer, test_shots, X_train.columns)
        
        st.success("Predictions complete!")
        
        fig, ax = plt.subplots(figsize=(12, 11))
        plot_shots_with_predictions(predictions)
        st.pyplot(fig)
        
        st.subheader("Model Performance")
        accuracy = accuracy_score(y_test, predictions['PREDICTION'])
        precision = precision_score(y_test, predictions['PREDICTION'])
        recall = recall_score(y_test, predictions['PREDICTION'])
        f1 = f1_score(y_test, predictions['PREDICTION'])
        roc_auc = roc_auc_score(y_test, predictions['PREDICTION_PROB'])
        
        metrics_df = pd.DataFrame({
            'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC'],
            'Value': [accuracy, precision, recall, f1, roc_auc]
        })
        st.table(metrics_df)
        
        st.subheader("Comparison with Actual Shots")
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
        plot_court(ax1)
        ax1.scatter(predictions[predictions['SHOT_MADE_FLAG'] == 1]['LOC_X'], 
                    predictions[predictions['SHOT_MADE_FLAG'] == 1]['LOC_Y'], 
                    color='green', alpha=0.7, label='Made')
        ax1.scatter(predictions[predictions['SHOT_MADE_FLAG'] == 0]['LOC_X'], 
                    predictions[predictions['SHOT_MADE_FLAG'] == 0]['LOC_Y'], 
                    color='red', alpha=0.7, label='Missed')
        ax1.set_title("Actual Shots")
        ax1.legend()
        
        plot_court(ax2)
        ax2.scatter(predictions[predictions['PREDICTION'] == 1]['LOC_X'], 
                    predictions[predictions['PREDICTION'] == 1]['LOC_Y'], 
                    color='green', alpha=0.7, label='Predicted Made')
        ax2.scatter(predictions[predictions['PREDICTION'] == 0]['LOC_X'], 
                    predictions[predictions['PREDICTION'] == 0]['LOC_Y'], 
                    color='red', alpha=0.7, label='Predicted Missed')
        ax2.set_title("Predicted Shots")
        ax2.legend()
        
        st.pyplot(fig)
