In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [153]:
ftGames = pd.read_csv('gamesUpdated.csv')

In [154]:
ftGames.head()

Unnamed: 0,gameId,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore,Result
0,2018090600,2018,1,9/6/2018,20:20:00,PHI,ATL,18,12,PHI
1,2018090900,2018,1,9/9/2018,13:00:00,BAL,BUF,47,3,BAL
2,2018090901,2018,1,9/9/2018,13:00:00,CLE,PIT,21,21,Draw
3,2018090902,2018,1,9/9/2018,13:00:00,IND,CIN,23,34,CIN
4,2018090903,2018,1,9/9/2018,13:00:00,MIA,TEN,27,20,MIA


In [155]:
gamesDf= pd.DataFrame(ftGames, columns=ftGames['gameId'])

In [156]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Assuming ftGames is your original DataFrame
# Example: ftGames = pd.DataFrame({...})

# Specify the columns you want to keep
columns_to_keep = ['homeTeamAbbr', 'visitorTeamAbbr', 'homeFinalScore', 'visitorFinalScore', 'Result']

# Filter the DataFrame to keep only the specified columns
ftGames_filtered = ftGames[columns_to_keep]

# Select only the numeric columns for PCA
scores_to_scale = ftGames_filtered[['homeFinalScore', 'visitorFinalScore']]

# Standardize the data
scaler = StandardScaler()
scaled_scores = scaler.fit_transform(scores_to_scale)

# Apply PCA
pca = PCA(n_components=2)  # Specify the number of components you want to keep
pca_results = pca.fit_transform(scaled_scores)

# # Create a DataFrame with the PCA results
pca_df = pd.DataFrame(data=pca_results, columns=['PC1', 'PC2'])

# Concatenate the PCA results with the original DataFrame, including the scores
final_df = pd.concat([ftGames_filtered[['homeTeamAbbr', 'visitorTeamAbbr', 'homeFinalScore', 'visitorFinalScore', 'Result']], pca_df], axis=1)

# Display the final DataFrame with PCA results and original scores
print(final_df)

    homeTeamAbbr visitorTeamAbbr  homeFinalScore  visitorFinalScore Result  \
0            PHI             ATL              18                 12    PHI   
1            BAL             BUF              47                  3    BAL   
2            CLE             PIT              21                 21   Draw   
3            IND             CIN              23                 34    CIN   
4            MIA             TEN              27                 20    MIA   
..           ...             ...             ...                ...    ...   
895          WAS             MIN              17                 20    MIN   
896          ARI             SEA              21                 31    SEA   
897           TB              LA              16                 13     TB   
898           KC             TEN              20                 17     KC   
899           NO             BAL              13                 27    BAL   

          PC1       PC2  
0   -0.335660 -1.179725  
1   -3.0460

In [None]:
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import matplotlib
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
matplotlib.use('TkAgg')

class NFLPredictorGUI:
    def __init__(self, master):
        self.master = master
        self.master.title("NFL Game Predictor")
        self.master.geometry("1200x600")  # Increased size to accommodate graph
        
        try:
            self.ftGames_filtered = pd.read_csv('gamesUpdated.csv')
        except FileNotFoundError:
            messagebox.showerror("Error", "CSV file not found!")
            self.master.destroy()
            return
            
        self.setup_ui()
        self.setup_rf_tab()  # Add this line
        
    def setup_ui(self):
        self.notebook = ttk.Notebook(self.master)
        self.notebook.pack(fill='both', expand=True, padx=30, pady=15)
        
        self.stats_tab = ttk.Frame(self.notebook)
        self.season_tab = ttk.Frame(self.notebook)
        self.matchup_tab = ttk.Frame(self.notebook)
        
        self.notebook.add(self.stats_tab, text='Current Stats')
        self.notebook.add(self.season_tab, text='Season Predictions')
        self.notebook.add(self.matchup_tab, text='Head-to-Head')
        
        self.teams = sorted(list(set(self.ftGames_filtered['homeTeamAbbr'].unique()) | 
                                set(self.ftGames_filtered['visitorTeamAbbr'].unique())))
        
        self.setup_stats_tab()
        self.setup_season_tab()
        self.setup_matchup_tab()
        
    def setup_rf_tab(self):
        self.rf_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.rf_tab, text='Points Prediction')
        
        # Add prediction button
        self.rf_button = ttk.Button(self.rf_tab, text="Generate Points Prediction", 
                                   command=self.generate_rf_predictions)
        self.rf_button.pack(pady=10)
        
        # Add text widget for results
        self.rf_text = tk.Text(self.rf_tab, height=20, width=50)
        self.rf_text.pack(pady=10, padx=10)
        
        # Add canvas for visualization
        self.rf_fig = plt.Figure(figsize=(8, 6))
        self.rf_canvas = FigureCanvasTkAgg(self.rf_fig, self.rf_tab)
        self.rf_canvas.get_tk_widget().pack(pady=10)
        
    def setup_stats_tab(self):
        ttk.Label(self.stats_tab, text="Select Team:").pack(pady=5)
        self.team_var = tk.StringVar()
        self.team_dropdown = ttk.Combobox(self.stats_tab, 
                                        textvariable=self.team_var,
                                        values=self.teams)
        self.team_dropdown.pack(pady=5)
        
        self.stats_text = tk.Text(self.stats_tab, height=20, width=70)
        self.stats_text.pack(pady=10)
        
        ttk.Button(self.stats_tab, 
                  text="Show Stats", 
                  command=self.show_current_stats).pack(pady=50)
        
    def setup_season_tab(self):
        self.season_text = tk.Text(self.season_tab, height=20, width=70)
        self.season_text.pack(pady=10)
        
    def setup_matchup_tab(self):
        # Create frames for text and graph
        text_frame = ttk.Frame(self.matchup_tab)
        text_frame.pack(side=tk.LEFT, pady=10, padx=5, fill='both', expand=True)
        
        graph_frame = ttk.Frame(self.matchup_tab)
        graph_frame.pack(side=tk.RIGHT, pady=10, padx=5, fill='both', expand=True)
        
        self.matchup_text = tk.Text(text_frame, height=20, width=40)
        self.matchup_text.pack(pady=10)
        
        # Initialize matplotlib figure and canvas
        self.fig, self.ax = plt.subplots(figsize=(20, 25))
        self.canvas = FigureCanvasTkAgg(self.fig, master=graph_frame)
        self.canvas.get_tk_widget().pack()
        
    def show_current_stats(self):
        team = self.team_var.get()
        if not team:
            messagebox.showerror("Error", "Please select a team")
            return
            
        self.stats_text.delete(1.0, tk.END)
        self.season_text.delete(1.0, tk.END)
        self.matchup_text.delete(1.0, tk.END)
        
        team_games = self.ftGames_filtered[
            (self.ftGames_filtered['homeTeamAbbr'] == team) | 
            (self.ftGames_filtered['visitorTeamAbbr'] == team)
        ]
        
        if len(team_games) == 0:
            self.stats_text.insert(tk.END, f"No games found for team {team}")
            return
            
        total_games = len(team_games)
        wins = len(team_games[team_games['Result'] == team])
        win_rate = wins / total_games
        
        home_games = team_games[team_games['homeTeamAbbr'] == team]
        visitor_games = team_games[team_games['visitorTeamAbbr'] == team]
        
        avg_home_score = home_games['homeFinalScore'].mean() if len(home_games) > 0 else 0
        avg_visitor_score = visitor_games['visitorFinalScore'].mean() if len(visitor_games) > 0 else 0
        
        stats_output = (
            f"Current Statistics for {team}:\n"
            f"{'='*40}\n"
            f"Total Games: {total_games}\n"
            f"Wins: {wins}\n"
            f"Win Rate: {win_rate:.2%}\n"
            f"Average Home Score: {avg_home_score:.2f}\n"
            f"Average Away Score: {avg_visitor_score:.2f}\n"
        )
        self.stats_text.insert(tk.END, stats_output)
        
        self.calculate_season_predictions(team)
        self.calculate_matchup_predictions(team)
        
    def calculate_season_predictions(self, team):
        seasons_data = self.get_seasons_data(team)
        if len(seasons_data) >= 2:
            X = np.array([[s['wins'], s['home_score'], s['away_score']] for s in seasons_data[:-1]])
            y_wins = np.array([s['wins'] for s in seasons_data[1:]])
            
            win_model = LinearRegression()
            win_model.fit(X, y_wins)
            
            latest_season = np.array([[seasons_data[-1]['wins'], 
                                     seasons_data[-1]['home_score'],
                                     seasons_data[-1]['away_score']]])
            
            predicted_wins = max(0, min(17, round(win_model.predict(latest_season)[0])))
            
            season_output = (
                f"Next Season Predictions:\n"
                f"{'='*40}\n"
                f"Projected Wins: {predicted_wins}\n"
                f"Projected Win Rate: {(predicted_wins/17):.2%}\n"
            )
            self.season_text.insert(tk.END, season_output)
            
    def calculate_matchup_predictions(self, team):
        # Clear previous graph
        self.ax.clear()
        
        # Get all team scores across all games
        team_scores = []
        game_labels = []  # To store opponent labels
        
        for opponent in [t for t in self.teams if t != team]:
            matchups = self.ftGames_filtered[
                ((self.ftGames_filtered['homeTeamAbbr'] == team) & 
                 (self.ftGames_filtered['visitorTeamAbbr'] == opponent)) |
                ((self.ftGames_filtered['homeTeamAbbr'] == opponent) & 
                 (self.ftGames_filtered['visitorTeamAbbr'] == team))
            ]
            
            if len(matchups) > 0:
                # Display text stats
                self.display_matchup_stats(team, opponent, matchups)
                
                # Collect scores for this opponent
                for _, game in matchups.iterrows():
                    if game['homeTeamAbbr'] == team:
                        team_scores.append(game['homeFinalScore'])
                    else:
                        team_scores.append(game['visitorFinalScore'])
                    game_labels.append(opponent)  # Add opponent label for this game
    
        # Create the complete bar chart
        x = range(len(team_scores))
        bars = self.ax.bar(x, team_scores, color='blue')
        
        # Customize the plot
        self.ax.set_ylabel('Points Scored')
        self.ax.set_xlabel('Games')
        self.ax.set_title(f'{team} Scoring History')
        self.ax.set_xticks(x)
        self.ax.set_xticklabels(game_labels, rotation=45)
        
        # Add score labels on top of each bar
        for bar in bars:
            height = bar.get_height()
            self.ax.text(bar.get_x() + bar.get_width()/2., height,
                        f'{int(height)}',
                        ha='center', va='bottom')
        
        # Adjust layout to prevent label cutoff
        self.fig.tight_layout()
        
        # Refresh canvas
        self.canvas.draw()

    def display_matchup_stats(self, team, opponent, matchups):
        team_wins = len(matchups[matchups['Result'] == team])
        historical_win_rate = team_wins / len(matchups)
        
        team_home = matchups[matchups['homeTeamAbbr'] == team]
        team_away = matchups[matchups['visitorTeamAbbr'] == team]
        
        avg_home = team_home['homeFinalScore'].mean() if len(team_home) > 0 else 0
        avg_away = team_away['visitorFinalScore'].mean() if len(team_away) > 0 else 0
        
        predicted_win_prob = (historical_win_rate * 0.7 + 
                            (team_wins / max(1, len(matchups))) * 0.3)
        predicted_score = (avg_home + avg_away) / 2
        
        matchup_output = (
            f"\nVs {opponent}:\n"
            f"Record: {team_wins}-{len(matchups) - team_wins}\n"
            f"Win Rate: {historical_win_rate:.2%}\n"
            f"Avg Home Score: {avg_home:.2f}\n"
            f"Avg Away Score: {avg_away:.2f}\n"
            f"Predicted Win Probability: {predicted_win_prob:.2%}\n"
            f"Predicted Score: {predicted_score:.2f}\n"
            f"{'-'*40}\n"
        )
        self.matchup_text.insert(tk.END, matchup_output)

    def get_seasons_data(self, team):
        team_games = self.ftGames_filtered[
            (self.ftGames_filtered['homeTeamAbbr'] == team) | 
            (self.ftGames_filtered['visitorTeamAbbr'] == team)
        ]
        
        seasons_data = []
        games_per_season = 17
        
        for i in range(0, len(team_games), games_per_season):
            season_games = team_games.iloc[i:i+games_per_season]
            if len(season_games) < games_per_season:
                continue
                
            season_stats = {
                'wins': len(season_games[season_games['Result'] == team]),
                'home_score': season_games[season_games['homeTeamAbbr'] == team]['homeFinalScore'].mean(),
                'away_score': season_games[season_games['visitorTeamAbbr'] == team]['visitorFinalScore'].mean()
            }
            seasons_data.append(season_stats)
            
        return seasons_data
    
    def prepare_rf_features(self):
        team_stats = {}
        
        for team in self.teams:
            # Get last 3 seasons of data
            team_games = self.ftGames_filtered[
                (self.ftGames_filtered['homeTeamAbbr'] == team) | 
                (self.ftGames_filtered['visitorTeamAbbr'] == team)
            ].tail(51)  # 17 games * 3 seasons
            
            if len(team_games) > 0:
                # Calculate per-season stats
                seasons = []
                for i in range(0, len(team_games), 17):
                    season_games = team_games.iloc[i:i+17]
                    if len(season_games) == 17:
                        home_games = season_games[season_games['homeTeamAbbr'] == team]
                        away_games = season_games[season_games['visitorTeamAbbr'] == team]
                        
                        seasons.append({
                            'wins': len(season_games[season_games['Result'] == team]),
                            'points_scored': (
                                home_games['homeFinalScore'].sum() + 
                                away_games['visitorFinalScore'].sum()
                            ),
                            'points_allowed': (
                                home_games['visitorFinalScore'].sum() + 
                                away_games['homeFinalScore'].sum()
                            )
                        })
                
                if seasons:
                    team_stats[team] = {
                        'wins': np.mean([s['wins'] for s in seasons]),
                        'win_rate': np.mean([s['wins']/17 for s in seasons]),
                        'avg_points_scored': np.mean([s['points_scored'] for s in seasons]),
                        'avg_points_allowed': np.mean([s['points_allowed'] for s in seasons]),
                        'points_trend': np.std([s['points_scored'] for s in seasons]),
                        'total_points': seasons[-1]['points_scored']  # Most recent season
                    }
        
        return pd.DataFrame.from_dict(team_stats, orient='index')

    def generate_rf_predictions(self):
        self.rf_text.delete(1.0, tk.END)
        
        # Prepare data with new features
        stats_df = self.prepare_rf_features()
        
        # Enhanced feature set
        X = stats_df[[
            'wins', 
            'win_rate', 
            'avg_points_scored',
            'avg_points_allowed',
            'points_trend'
        ]]
        y = stats_df['total_points']
        
        # Split data for validation
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=np.random.randint(0, 100)
        )
        
        # Train model with more estimators
        rf_model = RandomForestRegressor(
            n_estimators=200,
            max_depth=None,
            min_samples_split=2,
            random_state=np.random.randint(0, 100)
        )
        rf_model.fit(X_train, y_train)
        
        # Make predictions with confidence intervals
        predictions = rf_model.predict(X)
        
        # Calculate prediction intervals
        pred_intervals = []
        for _ in range(100):
            preds = []
            for estimator in rf_model.estimators_:
                preds.append(estimator.predict(X))
            pred_intervals.append(np.mean(preds, axis=0))
        
        lower = np.percentile(pred_intervals, 5, axis=0)
        upper = np.percentile(pred_intervals, 95, axis=0)
        
        # Create results DataFrame with confidence intervals
        results_df = pd.DataFrame({
            'Team': stats_df.index,
            'Predicted_Points': predictions,
            'Lower_Bound': lower,
            'Upper_Bound': upper,
            'Actual_Points': y
        }).sort_values('Predicted_Points', ascending=False)
        
        # Display results
        self.rf_text.insert(tk.END, "Team Points Predictions:\n\n")
        for idx, row in results_df.iterrows():
            self.rf_text.insert(tk.END, 
                f"{row['Team']}: {row['Predicted_Points']:.0f} points\n")
        
        # Visualize predictions
        self.plot_rf_predictions(results_df)

    def plot_rf_predictions(self, results_df):
        self.rf_fig.clear()
        ax = self.rf_fig.add_subplot(111)
        
        x = range(len(results_df))
        ax.bar(x, results_df['Predicted_Points'], alpha=0.6, label='Predicted')
        ax.set_xticks(x)
        ax.set_xticklabels(results_df['Team'], rotation=45)
        ax.set_ylabel('Points')
        ax.set_title('Predicted Team Points')
        ax.legend()
        
        self.rf_fig.tight_layout()
        self.rf_canvas.draw()

def main():
    root = tk.Tk()
    app = NFLPredictorGUI(root)
    root.mainloop()

if __name__ == "__main__":
    main()

