In [None]:
pip install openpyxl

In [None]:
pip install tabulate

In [None]:
import pandas as pd
import numpy as np
import os
import joblib

# Suppress warnings that often arise from pandas indexing/slicing in chained operations
pd.options.mode.chained_assignment = None 

# --- CONFIGURATION: PATHS AND FILE NAME ---
BRONZE_DIR = "/Users/brockolson/Desktop/STAT766/Stat 766 Final Project" 
MODEL_OUTPUT = os.path.join(BRONZE_DIR, "nba_playoff_forest.joblib") # model is actually LOG REG but called RF here
EXCEL_FILE_NAME = "/Users/brockolson/Desktop/STAT766/Stat 766 Final Project/2025stats.xlsx" 
EXCEL_FILE_PATH = os.path.join(BRONZE_DIR, EXCEL_FILE_NAME)
PREDICTION_OUTPUT_FILE = os.path.join(BRONZE_DIR, "nba_playoff_predictions_output.csv") # üõë NEW: Output file path

# --- FUNCTION 1: EXCEL DATA LOADING (Unchanged) ---
def load_all_features_from_excel(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Error: Excel file not found at {file_path}")
    df = pd.read_excel(file_path, sheet_name=0)
    df.columns = df.columns.str.strip()
    df = df.rename(columns={'TOV_PCT': 'TM_TOV_PCT'})
    numeric_cols = [col for col in df.columns if col != 'TEAM_NAME']
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

# --- FUNCTION 2: DATA PREPARATION (Unchanged, Generates 13 Ranks) ---
def prepare_current_season_data_ranks(raw_stats_df):
    df = raw_stats_df.copy()
    FEATURES_TO_RANK = {
        'EFG_PCT': 'asc', 'FTA_RATE': 'asc', 'TM_TOV_PCT': 'desc', 'OREB_PCT': 'asc', 
        'OPP_EFG_PCT': 'desc', 'OPP_FTA_RATE': 'desc', 'OPP_TOV_PCT': 'desc', 'OPP_OREB_PCT': 'desc',
        'AST_TO': 'asc', 'AST_RATIO': 'asc', 
        'E_PACE': 'asc', 'PACE_PER40': 'asc', 'POSS': 'asc'
    }
    final_rank_df = df[['TEAM_NAME']].copy()
    REQUIRED_RANKS = []
    for feature, direction in FEATURES_TO_RANK.items():
        rank_col_name = f"{feature}_RANK"
        REQUIRED_RANKS.append(rank_col_name)
        is_ascending = (direction == 'asc')
        if feature not in df.columns:
             raise KeyError(f"Raw feature '{feature}' is missing from the Excel file.")
        final_rank_df[rank_col_name] = df[feature].rank(
            method='min', 
            ascending=is_ascending
        ).astype(float)
    X_current_season_features = final_rank_df[['TEAM_NAME'] + REQUIRED_RANKS]
    return X_current_season_features, REQUIRED_RANKS

# ---------------------------------------------------------
# FINAL PREDICTION EXECUTION (REMOVING THE INVERSION HACK)
# ---------------------------------------------------------

if __name__ == "__main__":
    
    print("--- STARTING PREDICTION: 13 RANK COLUMNS (No Inversion Hack) ---")
    
    try:
        # 1. Load & Prepare Data
        raw_current_data = load_all_features_from_excel(EXCEL_FILE_PATH)
        X_current_season_features, FEATURE_LIST = prepare_current_season_data_ranks(raw_current_data)
        
        # 2. Load the Model Pipeline
        final_model_pipeline = joblib.load(MODEL_OUTPUT)

        # 3. Define expected features (13 ranks)
        model_expected_features = FEATURE_LIST

        # 4. Select and Impute features
        X_for_prediction = X_current_season_features[FEATURE_LIST]
        X_for_prediction = X_for_prediction.fillna(15.5)
        X_for_prediction = X_for_prediction.reindex(columns=model_expected_features)
        
        # üõë INVERSION HACK REMOVED. The model uses the ranks 1-30 as-is.
        
        # 5. Generate Predictions
        playoff_probs = final_model_pipeline.predict_proba(X_for_prediction)[:, 1]
        playoff_predictions = final_model_pipeline.predict(X_for_prediction)

        # 6. Assemble, Sort, and Export Results Table
        results_df = X_current_season_features[['TEAM_NAME']].copy()
        results_df['Predicted_Probability'] = playoff_probs
        results_df['Predicted_Playoffs'] = playoff_predictions

        results_df = results_df.sort_values(by='Predicted_Probability', ascending=False)
        
        results_df.to_csv(PREDICTION_OUTPUT_FILE, index=False, float_format='%.4f')
        
        # 7. Print Success Message and Table
        print("\n" + "="*70)
        print("üèÄ NBA PLAYOFF PREDICTIONS (Prediction Generated)")
        print(f"‚úÖ Prediction table exported to: {PREDICTION_OUTPUT_FILE}")
        print("="*70)
        
        print(results_df.to_markdown(index=False, floatfmt=".4f"))

    except Exception as e:
        print(f"An unexpected error occurred: {e}")