In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.api import ExponentialSmoothing

Load the data and modify it in order for only the appropriate data to be present and used

In [None]:
data = pd.read_csv('/content/results.csv', encoding='latin1')
data.head()

In [None]:
req_columns = ['Season', 'DateTime', 'HomeTeam', 'AwayTeam', 'FTR']

data = data[req_columns]
data['DateTime'] = pd.to_datetime(data['DateTime']).dt.date  # Gets rid of the Time as it is just 00:00:00 for every game
data.head()

Plot the best team and the worst team of a premier league season in order to find the slope.

*   A positive slope tells you that a team does better with more time between games
*   A negative slope tells you that a team does worse with more time between games




In [None]:
selected_season = data[data['Season'] == '2021-22'] # Most recent season available in csv file
selected_season.head()

In [None]:
team_1 = 'Man City' # Winners of the Premier League
team_1_df = selected_season[(selected_season['HomeTeam'] == team_1) | (selected_season['AwayTeam'] == team_1)]

print(f"\n{team_1} DataFrame:")
print(team_1_df)

In [None]:
team_1_df['DateTime'] = pd.to_datetime(team_1_df['DateTime'])
team_1_df = team_1_df.sort_values(by='DateTime')
team_1_df['TimeBetweenGames'] = team_1_df.groupby('Season')['DateTime'].diff().dt.days
team_1_df = team_1_df.dropna()

In [None]:
# Exponential Smoothing
seasonal_periods = 1
trend_model = ExponentialSmoothing(team_1_df['TimeBetweenGames'], seasonal_periods=seasonal_periods, trend='add', damped=True)
trend_fit_model = trend_model.fit()
trend_forecast = trend_fit_model.fittedvalues

In [None]:
# Linear Regression
X = trend_forecast.values.reshape(-1, 1)
y = team_1_df['FTR'].map({'H': 1, 'A': -1, 'D': 0}).values
model = LinearRegression()
model.fit(X, y)

In [None]:
forecast_periods = 9
future_trend_forecast = trend_fit_model.forecast(steps=forecast_periods)
future_X = future_trend_forecast.values.reshape(-1, 1)
forecast = model.predict(future_X)

In [None]:
average_time_between_games = np.mean(team_1_df['TimeBetweenGames'])

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(team_1_df['DateTime'], team_1_df['FTR'].map({'H': 1, 'A': -1, 'D': 0}), marker='o', linestyle='-', color='b')
plt.plot(team_1_df['DateTime'], model.predict(X), color='red', linestyle='-', linewidth=2, label='Linear Regression')
plt.xlabel('Date of the Game')
plt.ylabel('Result (H: 1, A: -1, D: 0)')
plt.title(f'Result vs. Date of the Game for {team_1}')
plt.grid(True)
plt.legend()
plt.show()
# Print the slope of the linear regression line and the average amount of time between games
print("Slope of the Linear Regression Line:", model.coef_[0])
print("Average Time Between Games:", average_time_between_games)
# Forecasted Outcome based on time until next game
forecast_outcome = model.predict(future_X)
print("Forecasted Outcome based on time until next game (H: Home win, A: Away win, D: Draw):")
print(forecast_outcome)

In [None]:
team_2 = 'Norwich' # Bottom team in the Premier League
team_2_df = selected_season[(selected_season['HomeTeam'] == team_2) | (selected_season['AwayTeam'] == team_2)]

print(f"\n{team_2} DataFrame:")
print(team_2_df)

In [None]:
team_2_df['DateTime'] = pd.to_datetime(team_2_df['DateTime'])
team_2_df = team_2_df.sort_values(by='DateTime')
team_2_df['TimeBetweenGames'] = team_2_df.groupby('Season')['DateTime'].diff().dt.days
team_2_df = team_2_df.dropna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_2_df['DateTime'] = pd.to_datetime(team_2_df['DateTime'])


In [None]:
# Exponential Smoothing
seasonal_periods = 1
trend_model = ExponentialSmoothing(team_2_df['TimeBetweenGames'], seasonal_periods=seasonal_periods, trend='add', damped=True)
trend_fit_model = trend_model.fit()
trend_forecast = trend_fit_model.fittedvalues

  trend_model = ExponentialSmoothing(team_2_df['TimeBetweenGames'], seasonal_periods=seasonal_periods, trend='add', damped=True)
  self._init_dates(dates, freq)


In [None]:
# Linear Regression
X = trend_forecast.values.reshape(-1, 1)
y = team_2_df['FTR'].map({'H': 1, 'A': -1, 'D': 0}).values
model = LinearRegression()
model.fit(X, y)

In [None]:
forecast_periods = 9
future_trend_forecast = trend_fit_model.forecast(steps=forecast_periods)
future_X = future_trend_forecast.values.reshape(-1, 1)
forecast = model.predict(future_X)

  return get_prediction_index(
  return get_prediction_index(


In [None]:
average_time_between_games = np.mean(team_2_df['TimeBetweenGames'])

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(team_2_df['DateTime'], team_2_df['FTR'].map({'H': 1, 'A': -1, 'D': 0}), marker='o', linestyle='-', color='b')
plt.plot(team_2_df['DateTime'], model.predict(X), color='red', linestyle='-', linewidth=2, label='Linear Regression')
plt.xlabel('Date of the Game')
plt.ylabel('Result (H: 1, A: -1, D: 0)')
plt.title(f'Result vs. Date of the Game for {team_2}')
plt.grid(True)
plt.legend()
plt.show()
# Print the slope of the linear regression line and the average amount of time between games
print("Slope of the Linear Regression Line:", model.coef_[0])
print("Average Time Between Games:", average_time_between_games)
# Forecasted Outcome based on time until next game
forecast_outcome = model.predict(future_X)
print("Forecasted Outcome based on time until next game (H: Home win, A: Away win, D: Draw):")
print(forecast_outcome)

Create a model that gets the input of the teams, and times between games for each teams. It will then return the probability that either team will win or draw.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [None]:
def time_between_games(data):
    data['DateTime'] = pd.to_datetime(data['DateTime'])
    data = data.sort_values(by='DateTime')
    data['TimeBetweenGames'] = data.groupby('Season')['DateTime'].diff().dt.days
    data = data.dropna()
    return data

In [None]:
def train_model(data):
    label_encoder = LabelEncoder()
    data['FTR_encoded'] = label_encoder.fit_transform(data['FTR'])
    X = data['TimeBetweenGames'].values[:-1].reshape(-1, 1)
    y = data['FTR_encoded'].shift(-1).dropna().values
    model = LogisticRegression()
    model.fit(X, y)

    return model, label_encoder

In [None]:
def predict_result(model, label_encoder, team1, team2, team1_tbg, team2_tbg):
    team1_prob = model.predict_proba([[team1_tbg]])[0]
    team2_prob = model.predict_proba([[team2_tbg]])[0]
    result_probability = {
        f"{team1} wins": team1_prob[2] * team2_prob[0],  # Probability of team1 winning and team2 losing
        f"{team2} wins": team1_prob[0] * team2_prob[2],  # Probability of team2 winning and team1 losing
        "Draw": team1_prob[1] * team2_prob[1]  # Probability of draw
    }
    return result_probability

In [None]:
season = input("Enter the Premier League season: ")
team1 = input("Enter the name of team 1: ")
team2 = input("Enter the name of team 2: ")
team1_tbg = input("Enter time between games for team 1: ")
team2_tbg = input("Enter time between games for team 2: ")

Enter the name of team 1: Man City
Enter the name of team 2: Norwich
Enter time between games for team 1: 6
Enter time between games for team 2: 9


In [None]:
team1_tbg = int(team1_tbg)
team2_tbg = int(team2_tbg)

In [None]:
teams_data = pd.concat([data[data['Season'] == season_year] for season_year in [season]])
teams_data = time_between_games(teams_data)
model, label_encoder = train_model(teams_data)

In [None]:
match_result = predict_result(model, label_encoder, team1, team2, team1_tbg, team2_tbg)
print("Predicted Match Result Probabilities:")
for outcome, probability in match_result.items():
    print(f"{outcome}: {probability:.2f}")

Convert probabilities into betting odds

In [None]:
def odds_calculator(team1_prob, team2_prob, draw_prob):
  if team1_prob > team2_prob:
    if team1_prob > draw_prob:
      team1_odds = round(-100 / (team1_prob / (1 - team1_prob)))
      team2_odds = round(100 / (team2_prob / (1 - team2_prob)))
      draw_odds = round(100 / (draw_prob / (1 - draw_prob)))
      return team1_odds, team2_odds, draw_odds
  if team2_prob > team1_prob:
    if team2_prob > draw_prob:
      team1_odds = round(100 / (team1_prob / (1 - team1_prob)))
      team2_odds = round(-100 / (team2_prob / (1 - team2_prob)))
      draw_odds = round(100 / (draw_prob / (1 - draw_prob)))
      return team1_odds, team2_odds, draw_odds
  if team1_prob == team2_prob:
    team1_odds = round(100 / (team1_prob / (1 - team1_prob)))
    team2_odds = round(100 / (team2_prob / (1 - team2_prob)))
    draw_odds = round(-100 / (draw_prob / (1 - draw_prob)))
    return team1_odds, team2_odds, draw_odds

In [None]:
team1_prob = match_result[f"{team1} wins"]
team2_prob = match_result[f"{team2} wins"]
draw_prob = match_result["Draw"]

In [None]:
team1_odds, team2_odds, draw_odds = odds_calculator(team1_prob, team2_prob, draw_prob)

In [None]:
print("Predicted Match Odds")
print(f"{team1}: {team1_odds:+}")
print(f"{team2}: {team2_odds:+}")
print(f"Draw: {draw_odds:+}")