# Trends DataFrame

In [2]:
import pandas as pd
trends = pd.DataFrame(columns=['team', 'wins_2021', 'wins_2022', 'wins_2023', 'wins_2024', 'wins_diff', 'trending_up'])

## connect to MySQL

In [3]:
import numpy as np
import math
import pandas as pd
#!pip install pymysql
import pymysql as mysql
import os
import seaborn as sns
import matplotlib.pyplot as plt

#!pip install tbats
from tbats import TBATS
import pandas as pd
import matplotlib.pyplot as plt

In [24]:
cnx = mysql.connect(
        host='localhost',
        user='root',
        passwd=os.getenv('MYSQL'),
        database='nfl',
        port=int(3306)
)

In [25]:
cur = cnx.cursor()

In [26]:
def sql(query, params=None):
    cur = cnx.cursor()
    cur.execute(query)
    for x in cur:
        print(x)

In [27]:
def todf(query):
    df = pd.read_sql(query, cnx)
    return df

In [28]:
a = sql('SHOW DATABASES;')

('information_schema',)
('mysql',)
('nfl',)
('performance_schema',)
('sys',)


## create 3yrs moving average DataFrame

In [36]:
query = """
SELECT
    game_team,
    game_year,
    COUNT(CASE WHEN game_result = 'W' THEN 1 END) AS wins
FROM
    games
GROUP BY
    game_team, game_year
ORDER BY
    game_team, game_year;
"""

# pull data from MySQL database
wins = todf(query)

# pivot dataframe
wins_time_series = wins.pivot(index='game_year', columns='game_team', values='wins')

# wieghts for the 3-year moving average
weights = np.array([.2, .3, .5]) 

# empty DataFrame to store the 3-year moving averages
three_year_ma_df = pd.DataFrame()

# loop through each column in the wins_time_series DataFrame
for col in wins_time_series.columns:
    new_col_name = f"{col}_3_year_MA"
    three_year_ma_df[new_col_name] = wins_time_series[col].rolling(window=3).apply(lambda x: np.dot(x, weights), raw=True)

# drop the first two rows
three_year_ma_df = three_year_ma_df.iloc[2:].reset_index(drop=True)

  df = pd.read_sql(query, cnx)


#### list all of the teams

In [40]:
teams = wins['game_team'].unique().tolist()

## train the TBATS Model

In [42]:
for team in teams:   
    seasonal_periods = [24, 6]

    # fit TBATS model
    model = TBATS(seasonal_periods=seasonal_periods)

    # create year +1 forecast
    list = three_year_ma_df[f'{team}_3_year_MA'].fillna(0).tolist()
    model_1 = model.fit(list)
    ma_forecast = model_1.forecast(steps=1)

    print(f'Success! For team {team}, the 2024 forecast is {ma_forecast[0]}.')
    # create a DataFrame
    forecasts_df = pd.DataFrame({
        'team': team,
        '2024': ma_forecast,
    })

    # get the previous wins
    value_2021 = wins_time_series[f'{team}'].loc[2021]
    value_2022 = wins_time_series[f'{team}'].loc[2022]
    value_2023 = wins_time_series[f'{team}'].loc[2023]

    # insert previous wins
    forecasts_df.insert(1, 'wins_2021', value_2021)
    forecasts_df.insert(2, 'wins_2022', value_2022)
    forecasts_df.insert(3, 'wins_2023', value_2023)

    # format the DataFrame
    a = forecasts_df['wins_2021'][0].astype(int)
    b = forecasts_df['wins_2022'][0].astype(int)
    c = forecasts_df['wins_2023'][0].astype(int)

    # calculate the forecasted moving average for 2024
    cast = ma_forecast

    # use moving average to calculate the forecasted wins for 2024
    forecasts_df['wins_2024'] = (cast - (.2*a) - (.3*b))/ .5

    # Create a new column that is the difference between wins_2024 and wins_2023
    forecasts_df['wins_diff'] = forecasts_df['wins_2024'] - forecasts_df['wins_2023']

    # create a new 'trending_up' binary column
    forecasts_df['trending_up'] = forecasts_df.apply(
        lambda row: 1 if row['wins_2024'] > row['wins_2023'] else 0, 
        axis=1
    )

    # drop the 2024 moving avergae column
    forecasts_df = forecasts_df.drop(columns=['2024'])

    # move the 'team' column to the first position
    columns = ['team'] + [col for col in forecasts_df.columns if col != 'team']
    forecasts_df = forecasts_df[columns]


    # add to the 'trends' DataFrame
    trends = pd.concat([trends, forecasts_df.iloc[[0]]], ignore_index=True)

Success! For team ARI, the 2024 forecast is 5.583202718015681.


  trends = pd.concat([trends, forecasts_df.iloc[[0]]], ignore_index=True)


Success! For team ATL, the 2024 forecast is 6.937943151796526.
Success! For team BAL, the 2024 forecast is 10.859741106871988.
Success! For team BUF, the 2024 forecast is 9.731975621934245.
Success! For team CAR, the 2024 forecast is 4.459838984109707.
Success! For team CHI, the 2024 forecast is 6.6210552474946.
Success! For team CIN, the 2024 forecast is 9.972790436571461.
Success! For team CLE, the 2024 forecast is 9.350137743354898.
Success! For team DAL, the 2024 forecast is 12.019185560050015.
Success! For team DEN, the 2024 forecast is 7.13281405665543.
Success! For team DET, the 2024 forecast is 8.74446017685748.
Success! For team GNB, the 2024 forecast is 9.567859100217627.
Success! For team HOU, the 2024 forecast is 5.956877772599885.
Success! For team IND, the 2024 forecast is 7.685009751327173.
Success! For team JAX, the 2024 forecast is 8.024721157450298.
Success! For team KAN, the 2024 forecast is 10.421752394972998.
Success! For team LAC, the 2024 forecast is 7.2949638293

In [7]:
trends['forecast'] = trends.apply(lambda row: math.ceil(row['wins_2024']) if row['trending_up'] == 1 else math.floor(row['wins_2024']), axis=1)

In [10]:
trends = trends.sort_values(by='forecast', ascending=False).reset_index(drop=True)

In [17]:
trends.to_csv('forecasts.csv', index=False)

## Visuals

In [None]:
# Plotting the data
plt.figure(figsize=(12, 6))
plt.plot(three_year_ma_df['Adjusted_Year'], three_year_ma_df['CHI_3_year_MA'], label='CHI_3_year_MA', marker='o')


# Add a point at x = 2024
forecast_value = chi_forecast 
plt.scatter(2024, forecast_value, color='red', label='Forecast (2024)')

# Adding labels and title
plt.xlabel('Adjusted Year')
plt.ylabel('MIN 3-Year MA')
plt.title('CHI 3-Year Moving Average')
plt.legend()
plt.grid(True)
plt.show()

## Polynomial

In [None]:
# Assuming 'game_year' is the index of the DataFrame
years = three_year_ma_df.index  # Access the index 'game_year'
values = three_year_ma_df['BUF_3_year_MA'].values

# Function to calculate SSE for a given polynomial period
def calculate_sse(period, x, y):
    # Fit a polynomial of degree 'period' to the data
    coeffs = np.polyfit(x, y, period)
    
    # Predict values using the polynomial
    y_pred = np.polyval(coeffs, x)
    
    # Calculate SSE (Sum of Squared Errors)
    sse = np.sum((y - y_pred) ** 2)
    return sse, coeffs

# Loop through different period values and track the one with the lowest SSE
best_period = None
lowest_sse = float('inf')
best_coeffs = None

# Example: testing periods (degrees) from 1 to 5
for period in range(6, 22):
    sse, coeffs = calculate_sse(period, years, values)
    print(f"Period: {period}, SSE: {sse}")
    
    # Update the best period if the current one has a lower SSE
    if sse < lowest_sse:
        lowest_sse = sse
        best_period = period
        best_coeffs = coeffs

# Output the best period and the corresponding coefficients
print(f"Best period (degree): {best_period}, Lowest SSE: {lowest_sse}")
print(f"Best polynomial coefficients: {best_coeffs}")

# You can also visualize the fit
import matplotlib.pyplot as plt

# Plot original data
plt.scatter(years, values, label='Original Data', color='blue')

# Plot the best-fit polynomial
best_fit_values = np.polyval(best_coeffs, years)
plt.plot(years, best_fit_values, label=f'Best Fit (Degree {best_period})', color='red')

plt.xlabel('Year')
plt.ylabel('ARI_3_year_MA')
plt.legend()
plt.show()
