In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

class FantasyFootballPredictor:
    def __init__(self):
        self.models = {}
        self.scalers = {}
        
    def calculate_fantasy_points(self, position, data):
        """Calculate fantasy points based on actual available stats using PPR scoring"""
        try:
            if position == 'K':
                points = (
                    data['XPM_avg'] * 1 +     # 1 point per extra point
                    data['FGM_avg'] * 3       # 3 points per field goal
                    # Could also factor in FG%_avg and XP%_avg as multipliers for consistency
                )
            elif position == 'QB':
                points = (
                    data['Yds_avg'] * 0.04 +  
                    data['TD_avg'] * 4 +      
                    data['Int_avg'] * -2 +    
                    data['Y/A_avg'] * 0.1 +   
                    data['FL_avg'] * -2       
                )
            elif position == 'RB':
                points = (
                    data['Yds_avg'] * 0.1 +     
                    data['TD_avg'] * 6 +        
                    data['Rec_avg'] * 1 +       
                    data['Y/R_avg'] * 0.1 +     
                    data['FL_avg'] * -2         
                )
            elif position in ['WR', 'TE']:
                points = (
                    data['Rec_avg'] * 1 +     
                    data['Yds_avg'] * 0.1 +   
                    data['TD_avg'] * 6 +      
                    data['FL_avg'] * -2       
                )
                
            return points * self.calculate_sample_weight(data['Games_Played'], data['Experience'])
        except Exception as e:
            print(f"Error calculating points for {position}: {str(e)}")
            return 0

    def calculate_sample_weight(self, games_played, experience):
        """Calculate confidence factor based on games played and experience"""
        if isinstance(experience, str) and experience.lower() == 'rook':
            exp_factor = 0.7
        else:
            exp_factor = min(1.0, 0.7 + (float(experience) * 0.1))
            
        games_factor = 1 / (1 + np.exp(-0.05 * (games_played - 20)))
        return exp_factor * games_factor

    def load_and_prepare_data(self, position):
        """Load and prepare data for a specific position"""
        try:
            df = pd.read_csv(f'Player_Roles/{position}_analysis.csv')
            
            if position == 'K':
                features = ['Games_Played', 'FGM_avg', 'XPM_avg', 'FG%_avg', 'XP%_avg', 'Pts_avg']
            elif position == 'QB':
                features = ['Games_Played', 'Yds_avg', 'TD_avg', 'Int_avg', 'Y/A_avg', 'FL_avg']
            elif position == 'RB':
                features = ['Games_Played', 'Yds_avg', 'TD_avg', 'Rec_avg', 'Y/R_avg', 'FL_avg']
            elif position in ['WR', 'TE']:
                features = ['Games_Played', 'Rec_avg', 'Yds_avg', 'TD_avg', 'FL_avg']
            
            # Drop rows with NaN values only in the features we need
            df = df.dropna(subset=features)
            
            # Calculate fantasy points
            df['fantasy_points'] = df.apply(lambda x: self.calculate_fantasy_points(position, x), axis=1)
            
            return df, features
            
        except Exception as e:
            print(f"Error loading {position} data: {str(e)}")
            print(f"Available columns: {df.columns.tolist()}")
            return None, None

    def train_model(self, position):
        """Train model for a specific position"""
        df, features = self.load_and_prepare_data(position)
        if df is None or len(df) < 10:
            return False
            
        X = df[features]
        y = df['fantasy_points']
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Train model
        model = GradientBoostingRegressor(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=5,
            random_state=42
        )
        
        model.fit(X_train_scaled, y_train)
        
        # Evaluate model
        y_pred = model.predict(X_test_scaled)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        print(f"\n{position} Model Performance:")
        print(f"Mean Squared Error: {mse:.2f}")
        print(f"R² Score: {r2:.2f}")
        
        self.models[position] = model
        self.scalers[position] = scaler
        
        return True

    def predict_players(self, position):
        """Predict fantasy points for all players in a position"""
        if position not in self.models:
            print(f"No trained model available for {position}")
            return None
            
        df, features = self.load_and_prepare_data(position)
        if df is None:
            return None
            
        X = df[features]
        X_scaled = self.scalers[position].transform(X)
        
        predictions = self.models[position].predict(X_scaled)
        df['predicted_points'] = predictions
        
        return df.sort_values('predicted_points', ascending=False)[
            ['Player', 'Team', 'Experience', 'Games_Played', 'predicted_points'] + features
        ]

In [5]:
def format_predictions(df, position):
    """Format predictions with additional confidence information"""
    cols = ['Player', 'Team', 'Experience', 'Games_Played', 'predicted_points']
    
    if position == 'K':
        additional_cols = ['FG%_avg', 'XP%_avg', 'Pts_avg']
    elif position == 'QB':
        additional_cols = ['Yds_avg', 'TD_avg', 'Int_avg', 'Y/A_avg', 'FL_avg']
    elif position == 'RB':
        additional_cols = ['Yds_avg', 'TD_avg', 'Rec_avg', 'FL_avg']
    else:  # WR/TE
        additional_cols = ['Rec_avg', 'Yds_avg', 'TD_avg', 'FL_avg']
        
    cols.extend(additional_cols)
    return df[cols].round(2)

In [6]:
def save_predictions(df, position, output_dir='FF_Predictions'):
    """Save predictions to CSV file"""
    import os
    # Create FF_Predictions directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    df_sorted = df.sort_values('predicted_points', ascending=False)
    filename = os.path.join(output_dir, f'{position}_predictions.csv')
    df_sorted.to_csv(filename, index=False)
    print(f"\nSaved {position} predictions to: {filename}")

In [7]:
def main():
    predictor = FantasyFootballPredictor()
    
    positions = ['QB', 'RB', 'WR', 'TE', 'K']
    for position in positions:
        print(f"\nTraining {position} model...")
        predictor.train_model(position)
        
        print(f"\nPredictions for {position}s:")
        predictions = predictor.predict_players(position)
        if predictions is not None:
            formatted_predictions = format_predictions(predictions, position)
            print(formatted_predictions.to_string())
            
            # Use the save_predictions function instead of direct to_csv
            save_predictions(formatted_predictions, position)

if __name__ == "__main__":
    main()


Training QB model...

QB Model Performance:
Mean Squared Error: 1.73
R² Score: 0.94

Predictions for QBs:
              Player        Team Experience  Games_Played  Games_Played  predicted_points  Yds_avg  TD_avg  Int_avg  Y/A_avg  FL_avg
2         Derek Carr      Saints         10           167           167             14.21   234.72    1.47     0.64     6.96    0.22
0   Matthew Stafford        Rams         15           217           217             13.97   231.68    1.46     0.74     6.33    0.16
4         Jared Goff       Lions          8           128           128             13.56   233.73    1.44     0.64     6.90    0.23
1       Kirk Cousins     Falcons         12           161           161             12.83   205.23    1.39     0.58     6.11    0.20
5     Baker Mayfield  Buccaneers          6           100           100             12.61   210.28    1.40     0.75     6.67    0.16
13       Brock Purdy       49ers          2            35            35             10.90   183