# NBA Fantasy Lineup Optimization

This notebook implements the lineup optimization methodology from Papageorgiou et al. (2024), using trained models to optimize DFS lineups.

## Setup and Data Loading

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from datetime import datetime
import joblib

from src.models.predictor import Predictor
from src.optimization.lineup import LineupOptimizer, Player

# Set plotting style
plt.style.use('seaborn')
%matplotlib inline

In [None]:
# Load configuration
with open('../config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Load player info and latest features
player_info = pd.read_csv('../data/raw/player_info.csv')
latest_features = pd.read_csv('../data/processed/features.csv')

## Load Models and Make Predictions

In [None]:
def load_player_model(player_id):
    """Load trained model for a player."""
    try:
        return joblib.load(f'../models/player_{player_id}.joblib')
    except:
        return None

def get_latest_features(df, player_id):
    """Get latest feature values for a player."""
    player_data = df[df['PLAYER_ID'] == player_id].copy()
    player_data['GAME_DATE'] = pd.to_datetime(player_data['GAME_DATE'])
    return player_data.sort_values('GAME_DATE').iloc[-1]

# Make predictions for all players
predictions = []
for _, player in player_info.iterrows():
    player_id = player['PLAYER_ID']
    model = load_player_model(player_id)
    
    if model is not None:
        features = get_latest_features(latest_features, player_id)
        predictor = Predictor(model)
        
        predictions.append({
            'player_id': player_id,
            'name': player['PLAYER_NAME'],
            'position': player['POSITION'],
            'team': player['TEAM_ABBREVIATION'],
            'predicted_fp': predictor.predict(features.to_frame().T)[0]
        })

predictions_df = pd.DataFrame(predictions)
print(f"Generated predictions for {len(predictions_df)} players")

## Load DFS Salary Data

For this example, we'll simulate DraftKings salaries.

In [None]:
def simulate_salaries(predictions_df, base_salary=5000, fp_multiplier=100):
    """Simulate DraftKings salaries based on predicted performance."""
    salaries = base_salary + (predictions_df['predicted_fp'] * fp_multiplier)
    return np.clip(salaries.round(-2), 3000, 12000)  # DraftKings salary range

# Add simulated salaries
predictions_df['salary'] = simulate_salaries(predictions_df)

# Display salary distribution
plt.figure(figsize=(10, 6))
sns.histplot(data=predictions_df, x='salary')
plt.title('Distribution of Player Salaries')
plt.show()

## Value Analysis

Analyze projected points per salary dollar.

In [None]:
# Calculate value metrics
predictions_df['value'] = predictions_df['predicted_fp'] / (predictions_df['salary'] / 1000)

# Plot top value players
plt.figure(figsize=(12, 6))
top_value = predictions_df.nlargest(15, 'value')
sns.barplot(data=top_value, x='value', y='name')
plt.title('Top 15 Players by Value (FP/$1000)')
plt.tight_layout()
plt.show()

## Create Player Objects

In [None]:
def create_player_objects(df):
    """Create Player objects for optimization."""
    players = []
    for _, row in df.iterrows():
        positions = set(row['position'].split('/'))
        player = Player(
            id=row['player_id'],
            name=row['name'],
            positions=positions,
            salary=int(row['salary']),
            predicted_fp=float(row['predicted_fp']),
            team=row['team'],
            opponent='TBD'  # In practice, get from schedule
        )
        players.append(player)
    return players

available_players = create_player_objects(predictions_df)
print(f"Created {len(available_players)} player objects")

## Optimize Lineups

In [None]:
# Initialize optimizer
optimizer = LineupOptimizer(config)

# Generate multiple lineups
num_lineups = 5
lineups = optimizer.optimize_lineup(available_players, num_lineups=num_lineups)

print(f"Generated {len(lineups)} optimal lineups")

# Display lineups
for i, lineup in enumerate(lineups, 1):
    print(f"\nLineup {i}:")
    print(f"Projected FP: {lineup.predicted_fp:.2f}")
    print(f"Total Salary: ${lineup.total_salary:,}")
    print("\nPlayers:")
    for player in lineup.players:
        print(f"- {player.name} ({', '.join(player.positions)}) - ${player.salary:,} - {player.predicted_fp:.1f} FP")

## Lineup Analysis

In [None]:
def analyze_lineup_diversity(lineups):
    """Analyze player usage across lineups."""
    player_usage = {}
    for lineup in lineups:
        for player in lineup.players:
            player_usage[player.name] = player_usage.get(player.name, 0) + 1
            
    usage_df = pd.DataFrame({
        'player': list(player_usage.keys()),
        'usage': list(player_usage.values())
    }).sort_values('usage', ascending=False)
    
    plt.figure(figsize=(12, 6))
    sns.barplot(data=usage_df.head(15), x='usage', y='player')
    plt.title('Most Common Players Across Lineups')
    plt.tight_layout()
    plt.show()
    
analyze_lineup_diversity(lineups)

In [None]:
def analyze_salary_distribution(lineups):
    """Analyze salary distribution within lineups."""
    all_salaries = []
    all_positions = []
    
    for lineup in lineups:
        for player in lineup.players:
            all_salaries.append(player.salary)
            all_positions.extend(player.positions)
            
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    sns.boxplot(y=all_salaries)
    plt.title('Salary Distribution in Lineups')
    
    plt.subplot(1, 2, 2)
    position_counts = pd.Series(all_positions).value_counts()
    sns.barplot(x=position_counts.index, y=position_counts.values)
    plt.title('Position Distribution')
    
    plt.tight_layout()
    plt.show()
    
analyze_salary_distribution(lineups)

## Lineup Performance Analysis

In [None]:
def analyze_lineup_performance(lineups):
    """Analyze projected performance of lineups."""
    performance_data = [
        {
            'lineup': i+1,
            'projected_fp': lineup.predicted_fp,
            'total_salary': lineup.total_salary,
            'avg_value': lineup.predicted_fp / (lineup.total_salary / 1000)
        }
        for i, lineup in enumerate(lineups)
    ]
    
    perf_df = pd.DataFrame(performance_data)
    
    # Plot comparisons
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    sns.barplot(data=perf_df, x='lineup', y='projected_fp', ax=axes[0])
    axes[0].set_title('Projected Fantasy Points')
    
    sns.barplot(data=perf_df, x='lineup', y='total_salary', ax=axes[1])
    axes[1].set_title('Total Salary')
    
    sns.barplot(data=perf_df, x='lineup', y='avg_value', ax=axes[2])
    axes[2].set_title('Value (FP/$1000)')
    
    plt.tight_layout()
    plt.show()
    
analyze_lineup_performance(lineups)

## Save Optimized Lineups