In [None]:
import pandas as pd
import numpy as np


In [None]:
df_17 = pd.read_html('https://fbref.com/en/squads/19538871/2016-2017/c9/Manchester-United-Stats-Premier-League', attrs={'id': 'matchlogs_for'})[0]
df_17['Date'] = pd.to_datetime(df_17['Date'], dayfirst=True)
# Dropping irrelevant columns #
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_17.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_17.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_17['matchday'] = df_17['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_17['points_won'] = df_17['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_17['home_0_away_1'] = df_17['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_17 = df_17[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA']]


In [None]:
df_17['days_since_last_game_united'] = (
    df_17['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)

In [None]:
df_elo = pd.read_csv('/content/EloRatings.csv')
df_elo['date'] = pd.to_datetime(df_elo['date'], dayfirst=True)
#filter to only England#
df_elo = df_elo[df_elo['country'] == 'ENG']
#filter to dates on or after July 1, 2013 #
df_elo['date'] = pd.to_datetime(df_elo['date'], format='%Y-%m-%d')
cutoff = pd.Timestamp('2013-07-01')
df_elo = df_elo[df_elo['date'] >= cutoff].reset_index(drop=True)
print(df_elo.country.unique())    # should be just ['ENG']
print(df_elo.date.min(), df_elo.date.max())


In [None]:
print((df_17['Opponent'].unique()))

['Bournemouth' 'Southampton' 'Hull City' 'Manchester City' 'Watford'
 'Leicester City' 'Stoke City' 'Liverpool' 'Chelsea' 'Burnley'
 'Swansea City' 'Arsenal' 'West Ham' 'Everton' 'Tottenham'
 'Crystal Palace' 'West Brom' 'Sunderland' 'Middlesbrough']


In [None]:
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_17['merge_date'] = df_17['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    'Arsenal':          'Arsenal',
    'Bournemouth':      'Bournemouth',
    'Burnley':          'Burnley',
    'Chelsea':          'Chelsea',
    'Crystal Palace':   'Crystal Palace',
    'Everton':          'Everton',
    'Hull City':        'Hull',
    'Leicester City':   'Leicester',
    'Liverpool':        'Liverpool',
    'Manchester City':  'Man City',
    'Middlesbrough':    'Middlesbrough',
    'Southampton':      'Southampton',
    'Stoke City':       'Stoke',
    'Sunderland':       'Sunderland',
    'Swansea City':     'Swansea',
    'Tottenham':        'Tottenham',
    'Watford':          'Watford',
    'West Brom':        'West Brom',
    'West Ham':         'West Ham',
}
df_17['elo_opponent'] = df_17['Opponent'].map(name_map)
df_17['elo_club'] = 'Man United'
df_17 = df_17.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_17 = df_17.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'united_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

In [None]:
df_17.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')

In [None]:
df_17["elo_diff"] = df_17["united_elo"] - df_17["opp_elo"]
final_pos = {
   'Arsenal': 5,
    'Bournemouth': 9,
    'Burnley': 16,
    'Chelsea': 1,
    'Crystal Palace': 14,
    'Everton': 7,
    'Hull City': 18,
    'Leicester City': 12,
    'Liverpool': 4,
    'Manchester City': 3,
    'Middlesbrough': 19,
    'Southampton': 8,
    'Stoke City': 13,
    'Sunderland': 20,
    'Swansea City': 15,
    'Tottenham': 2,
    'Watford': 17,
    'West Brom': 10,
    'West Ham': 11,
}
df_17['opp_final_pos'] = df_17['Opponent'].map(final_pos)

In [None]:
df_bet_17 = pd.read_csv('/content/16-17 Odds.csv')
df_bet_17['Date'] = pd.to_datetime(df_bet_17['Date'], dayfirst=True)
home_odds_map = df_bet_17.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_17.set_index(['Date','AwayTeam'])['B365A'].to_dict()

def get_united_win_odds(row):
    key = (row['Date'], 'Man United')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)

df_17['united_win_odds'] = df_17.apply(get_united_win_odds, axis=1)

In [None]:
df_17["days_since_last_opp_game"] = [0, 6, 7, 13, 8, 7, 8, 16, 8, 7, 6, 13, 8, 7, 8, 4, 3, 9, 5, 2, 13, 7, 10, 5, 7, 7, 15, 14, 3, 5, 8, 8, 12, 8, 7, 9, 4, 7]
df_17 = df_17[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','united_elo', 'opp_elo', 'elo_diff', 'united_win_odds', 'opp_final_pos', 'days_since_last_game_united', 'days_since_last_opp_game']]

In [None]:
df_18 = pd.read_html('https://fbref.com/en/squads/19538871/2017-2018/matchlogs/c9/schedule/Manchester-United-Scores-and-Fixtures-Premier-League', attrs={"id":"matchlogs_for"})[0]
df_18['Date'] = pd.to_datetime(df_18['Date'], dayfirst=True)
# Dropping irrelevant columns #
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_18.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_18.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_18['matchday'] = df_18['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_18['points_won'] = df_18['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_18['home_0_away_1'] = df_18['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_18 = df_18[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA','xG','xGA']]

In [None]:
print(df_18['Opponent'].unique())

['West Ham' 'Swansea City' 'Leicester City' 'Stoke City' 'Everton'
 'Southampton' 'Crystal Palace' 'Liverpool' 'Huddersfield' 'Tottenham'
 'Chelsea' 'Newcastle Utd' 'Brighton' 'Watford' 'Arsenal'
 'Manchester City' 'Bournemouth' 'West Brom' 'Burnley']


In [None]:
df_18['days_since_last_game_united'] = (
    df_18['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_18['merge_date'] = df_18['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    'West Ham':        'West Ham',
    'Swansea City':    'Swansea',
    'Leicester City':  'Leicester',
    'Stoke City':      'Stoke',
    'Everton':         'Everton',
    'Southampton':     'Southampton',
    'Crystal Palace':  'Crystal Palace',
    'Liverpool':       'Liverpool',
    'Huddersfield':    'Huddersfield',
    'Tottenham':       'Tottenham',
    'Chelsea':         'Chelsea',
    'Newcastle Utd':   'Newcastle',
    'Brighton':        'Brighton',
    'Watford':         'Watford',
    'Arsenal':         'Arsenal',
    'Manchester City': 'Man City',
    'Bournemouth':     'Bournemouth',
    'West Brom':       'West Brom',
    'Burnley':         'Burnley'

}
df_18['elo_opponent'] = df_18['Opponent'].map(name_map)
df_18['elo_club'] = 'Man United'
df_18 = df_18.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_18 = df_18.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'united_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

In [None]:
df_18.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')
df_18["elo_diff"] = df_18["united_elo"] - df_18["opp_elo"]
df_18['xG_diff'] = df_18['xG'] - df_18['xGA']
final_pos = {
   'West Ham': 13,
  'Swansea City': 18,
  'Leicester City': 9,
  'Stoke City': 19,
  'Everton': 8,
  'Southampton': 17,
  'Crystal Palace': 11,
  'Liverpool': 4,
  'Huddersfield': 16,
  'Tottenham': 3,
  'Chelsea': 5,
  'Newcastle Utd': 10,
  'Brighton': 15,
  'Watford': 14,
  'Arsenal': 6,
  'Manchester City': 1,
  'Bournemouth': 12,
  'West Brom': 20,
  'Burnley': 7
}
df_18['opp_final_pos'] = df_18['Opponent'].map(final_pos)

In [None]:
df_bet_18 = pd.read_csv('/content/17-18 Odds.csv')
df_bet_18['Date'] = pd.to_datetime(df_bet_18['Date'], dayfirst=True)
home_odds_map = df_bet_18.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_18.set_index(['Date','AwayTeam'])['B365A'].to_dict()
def get_united_win_odds(row):
    key = (row['Date'], 'Man United')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)
df_18['united_win_odds'] = df_18.apply(get_united_win_odds, axis=1)

In [None]:
df_18["days_since_last_opp_game"] = [0, 7, 7, 13, 8, 7, 7, 13, 7, 6, 8, 14, 5, 3, 3, 7, 4, 4, 7, 3, 4, 2, 14, 7, 10, 4, 7, 13, 8, 7, 21, 7, 8, 4, 7, 6, 5, 8]
df_18 = df_18[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','united_elo', 'opp_elo', 'elo_diff', 'united_win_odds', 'opp_final_pos', 'days_since_last_game_united', 'days_since_last_opp_game']]

In [None]:
df_18

In [None]:
df_17['xG'] = [1.91, 1.37, 2.47, 1.01, 1.75, 2.46, 2.81,
 0.54, 0.74, 2.72, 1.00, 0.51, 1.44, 0.54,
 1.05, 1.27, 0.98, 2.32, 2.54, 1.65, 1.03,
 1.74, 1.48, 1.99, 2.63, 2.54, 2.47, 0.99,
 1.67, 1.69, 0.81, 2.25, 0.42, 1.99, 0.58,
 1.04, 0.46, 1.04]
df_17['xGA'] = [0.52, 0.61, 0.27, 1.89, 1.41, 0.29, 1.20,
 0.27, 2.25, 0.33, 0.51, 0.58, 0.75, 1.65,
 0.72, 0.71, 0.60, 0.49, 0.64, 0.80, 1.46,
 0.35, 0.40, 0.49, 0.64, 0.82, 1.09, 0.20,
 0.59, 0.98, 0.13, 0.34, 1.42, 0.78, 1.04,
 2.11, 1.67, 0.30]
df_17['xG_diff'] = df_17['xG'] = df_17['xGA']
df_17 = df_17[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','united_elo', 'opp_elo', 'elo_diff', 'united_win_odds', 'opp_final_pos', 'days_since_last_game_united', 'days_since_last_opp_game']]

In [None]:
df_16 = pd.read_html('https://fbref.com/en/squads/19538871/2015-2016/matchlogs/c9/schedule/Manchester-United-Scores-and-Fixtures-Premier-League', attrs={"id":"matchlogs_for"})[0]
df_16['Date'] = pd.to_datetime(df_16['Date'], dayfirst=False)

In [None]:
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_16.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_16.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_16['matchday'] = df_16['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_16['points_won'] = df_16['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_16['home_0_away_1'] = df_16['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_16 = df_16[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA']]

In [None]:
df_16['days_since_last_game_united'] = (
    df_16['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)

In [None]:
print((df_16['Opponent'].unique()))

['Tottenham' 'Aston Villa' 'Newcastle Utd' 'Swansea City' 'Liverpool'
 'Southampton' 'Sunderland' 'Arsenal' 'Everton' 'Manchester City'
 'Crystal Palace' 'West Brom' 'Watford' 'Leicester City' 'West Ham'
 'Bournemouth' 'Norwich City' 'Stoke City' 'Chelsea']


In [None]:
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_16['merge_date'] = df_16['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    "Tottenham": "Tottenham",
    "Aston Villa": "Aston Villa",
    "Newcastle Utd": "Newcastle",
    "Swansea City": "Swansea",
    "Liverpool": "Liverpool",
    "Southampton": "Southampton",
    "Sunderland": "Sunderland",
    "Arsenal": "Arsenal",
    "Everton": "Everton",
    "Manchester City": "Man City",
    "Crystal Palace": "Crystal Palace",
    "West Brom": "West Brom",
    "Watford": "Watford",
    "Leicester City": "Leicester",
    "West Ham": "West Ham",
    "Bournemouth": "Bournemouth",
    "Norwich City": "Norwich",
    "Stoke City": "Stoke",
    "Chelsea": "Chelsea",
}
df_16['elo_opponent'] = df_16['Opponent'].map(name_map)
df_16['elo_club'] = 'Man United'
df_16 = df_16.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_16 = df_16.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'united_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

In [None]:
df_16.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')
df_16["elo_diff"] = df_16["united_elo"] - df_16["opp_elo"]
final_pos = {
   "Tottenham": 3,
    "Aston Villa": 20,
    "Newcastle Utd": 18,
    "Swansea City": 12,
    "Liverpool": 8,
    "Southampton": 6,
    "Sunderland": 17,
    "Arsenal": 2,
    "Everton": 11,
    "Manchester City": 4,
    "Crystal Palace": 15,
    "West Brom": 14,
    "Watford": 13,
    "Leicester City": 1,
    "West Ham": 7,
    "Bournemouth": 16,
    "Norwich City": 19,
    "Stoke City": 9,
    "Chelsea": 10,
}
df_16['opp_final_pos'] = df_16['Opponent'].map(final_pos)

In [None]:
df_bet_16 = pd.read_csv('/content/15-16 Odds.csv')
df_bet_16['Date'] = pd.to_datetime(df_bet_16['Date'], dayfirst=True)
home_odds_map = df_bet_16.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_16.set_index(['Date','AwayTeam'])['B365A'].to_dict()
def get_united_win_odds(row):
    key = (row['Date'], 'Man United')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)
df_16['united_win_odds'] = df_16.apply(get_united_win_odds, axis=1)

In [None]:
df_16["days_since_last_opp_game"] = [0,
    6,     # vs Aston Villa
    8,     # vs Newcastle Utd
    8,     # vs Swansea City
    13,    # vs Liverpool
    8,     # vs Southampton
    6,     # vs Sunderland
    8,     # vs Arsenal
    13,    # vs Everton
    8,     # vs Manchester City
    6,     # vs Crystal Palace
    7,     # vs West Brom
    14,    # vs Watford
    7,     # vs Leicester City
    7,     # vs West Ham
    7,     # vs Bournemouth
    7,     # vs Norwich City
    7,     # vs Stoke City
    2,     # vs Chelsea
    5,     # vs Southampton
    10,    # vs West Ham
    5,     # vs Sunderland
    6,     # vs Everton
    10,    # vs Chelsea
    5,     # vs Watford
    6,     # vs Crystal Palace
    15,    # vs Liverpool
    3,     # vs Man City
    4,     # vs Chelsea
    14,    # vs West Brom
    14,    # vs Tottenham
    7,     # vs Newcastle Utd
    6,     # vs Norwich City
    4,     # vs Swansea City
    11,
    6,
    3,7]
df_16['xG'] = [0.63, 0.66, 1.25, 1.28, 1.46, 1.65, 2.90,
 1.04, 1.48, 0.39, 0.94, 1.28, 1.30, 0.88,
 1.76, 1.97, 0.56, 1.29, 1.35, 1.74, 2.36,
 0.72, 0.84, 2.45, 0.78, 0.91, 1.10, 0.79,
 0.42, 0.90, 1.00, 0.34, 1.09, 1.47, 1.18,
 1.48, 0.64, 1.72]
df_16['xGA'] = [0.67, 0.22, 0.44, 0.97, 1.03, 1.84, 0.83,
 2.17, 0.65, 0.31, 0.39, 0.59, 1.47, 1.06,
 1.10, 1.05, 1.01, 1.18, 0.90, 0.54, 1.81,
 0.95, 0.19, 0.35, 1.62, 1.48, 1.10, 2.16,
 0.57, 1.86, 0.76, 1.80, 0.26, 0.14, 1.18,
 1.37, 2.13, 0.50]
df_16['xG_diff'] = df_16['xG'] = df_16['xGA']
df_16 = df_16[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','united_elo', 'opp_elo', 'elo_diff', 'united_win_odds', 'opp_final_pos', 'days_since_last_game_united', 'days_since_last_opp_game']]

In [None]:
df_19 = pd.read_html('https://fbref.com/en/squads/19538871/2018-2019/matchlogs/c9/schedule/Manchester-United-Scores-and-Fixtures-Premier-League', attrs={"id":"matchlogs_for"})[0]
df_19['Date'] = pd.to_datetime(df_19['Date'], dayfirst=False)

In [None]:
# Dropping irrelevant columns #
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_19.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_19.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_19['matchday'] = df_19['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_19['points_won'] = df_19['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_19['home_0_away_1'] = df_19['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_19 = df_19[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA','xG','xGA']]

In [None]:
print(df_19['Opponent'].unique())

['Leicester City' 'Brighton' 'Tottenham' 'Burnley' 'Watford' 'Wolves'
 'West Ham' 'Newcastle Utd' 'Chelsea' 'Everton' 'Bournemouth'
 'Manchester City' 'Crystal Palace' 'Southampton' 'Arsenal' 'Fulham'
 'Liverpool' 'Cardiff City' 'Huddersfield']


In [None]:
df_19['days_since_last_game_united'] = (
    df_19['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_19['merge_date'] = df_19['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    'West Ham':        'West Ham',
    'Wolves':          'Wolves',
    'Leicester City':  'Leicester',
    'Cardiff City':    'Cardiff',
    'Everton':         'Everton',
    'Southampton':     'Southampton',
    'Crystal Palace':  'Crystal Palace',
    'Liverpool':       'Liverpool',
    'Huddersfield':    'Huddersfield',
    'Tottenham':       'Tottenham',
    'Chelsea':         'Chelsea',
    'Newcastle Utd':   'Newcastle',
    'Brighton':        'Brighton',
    'Watford':         'Watford',
    'Arsenal':         'Arsenal',
    'Manchester City': 'Man City',
    'Bournemouth':     'Bournemouth',
    'Fulham':          'Fulham',
    'Burnley':         'Burnley'
}
df_19['elo_opponent'] = df_19['Opponent'].map(name_map)
df_19['elo_club'] = 'Man United'
df_19 = df_19.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_19 = df_19.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'united_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

In [None]:
df_19.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')
df_19["elo_diff"] = df_19["united_elo"] - df_19["opp_elo"]
df_19['xG_diff'] = df_19['xG'] - df_19['xGA']
final_pos = {
   'West Ham':        10,
    'Wolves':          7,
    'Leicester City':  9,
    'Cardiff City':    18,
    'Everton':         8,
    'Southampton':     16,
    'Crystal Palace':  12,
    'Liverpool':       2,
    'Huddersfield':    20,
    'Tottenham':       4,
    'Chelsea':         3,
    'Newcastle Utd':   13,
    'Brighton':        17,
    'Watford':         11,
    'Arsenal':         5,
    'Manchester City': 1,
    'Bournemouth':     14,
    'Fulham':          19,
    'Burnley':         15
}
df_19['opp_final_pos'] = df_19['Opponent'].map(final_pos)

In [None]:
df_bet_19 = pd.read_csv('/content/18-19 Odds.csv')
df_bet_19['Date'] = pd.to_datetime(df_bet_19['Date'], dayfirst=True)
home_odds_map = df_bet_19.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_19.set_index(['Date','AwayTeam'])['B365A'].to_dict()
def get_united_win_odds(row):
    key = (row['Date'], 'Man United')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)
df_19['united_win_odds'] = df_19.apply(get_united_win_odds, axis=1)

In [None]:
print(df_19['Opponent'].unique())

['Leicester City' 'Brighton' 'Tottenham' 'Burnley' 'Watford' 'Wolves'
 'West Ham' 'Newcastle Utd' 'Chelsea' 'Everton' 'Bournemouth'
 'Manchester City' 'Crystal Palace' 'Southampton' 'Arsenal' 'Fulham'
 'Liverpool' 'Cardiff City' 'Huddersfield']


In [None]:
df_19["days_since_last_opp_game"] = [0, 9, 8, 6, 13, 7, 7, 7, 14, 8, 6, 8, 13, 7, 4, 3, 11, 5, 7, 6, 6, 10, 5, 6, 15, 3, 3, 8, 20, 3, 11, 8, 3, 4, 7, 7, 7, 7]
df_19 = df_19[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','united_elo', 'opp_elo', 'elo_diff', 'united_win_odds', 'opp_final_pos', 'days_since_last_game_united', 'days_since_last_opp_game']]

In [None]:
df_19

In [None]:
df_16['season'] = "2015/16"
df_17['season'] = "2016/17"
df_18['season'] = "2017/18"
df_19['season'] = "2018/19"

In [None]:
df_16

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,united_elo,opp_elo,elo_diff,united_win_odds,opp_final_pos,days_since_last_game_united,days_since_last_opp_game,season
0,1,2015-08-08,0,Tottenham,50,W,3,1,0,0.67,0.67,0.67,1812.81,1733.16,79.65,1.65,3,0,0,2015/16
1,2,2015-08-14,1,Aston Villa,54,W,3,1,0,0.22,0.22,0.22,1812.81,1580.38,232.43,1.73,20,6,6,2015/16
2,3,2015-08-22,0,Newcastle Utd,69,D,1,0,0,0.44,0.44,0.44,1819.01,1589.81,229.2,1.36,18,8,8,2015/16
3,4,2015-08-30,1,Swansea City,65,L,0,1,2,0.97,0.97,0.97,1819.01,1677.19,141.82,2.25,12,8,8,2015/16
4,5,2015-09-12,0,Liverpool,56,W,3,3,1,1.03,1.03,1.03,1807.87,1740.88,66.99,2.0,8,13,13,2015/16
5,6,2015-09-20,1,Southampton,59,W,3,3,2,1.84,1.84,1.84,1815.49,1683.76,131.73,2.5,6,8,8,2015/16
6,7,2015-09-26,0,Sunderland,66,W,3,3,0,0.83,0.83,0.83,1815.49,1577.37,238.12,1.2,17,6,6,2015/16
7,8,2015-10-04,1,Arsenal,62,L,0,0,3,2.17,2.17,2.17,1811.79,1800.82,10.97,3.8,2,8,8,2015/16
8,9,2015-10-17,1,Everton,53,W,3,3,0,0.65,0.65,0.65,1797.86,1719.12,78.74,2.38,11,13,13,2015/16
9,10,2015-10-25,0,Manchester City,59,D,1,0,0,0.31,0.31,0.31,1797.86,1863.57,-65.71,2.6,4,8,8,2015/16


In [None]:
df_17

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,united_elo,opp_elo,elo_diff,united_win_odds,opp_final_pos,days_since_last_game_united,days_since_last_opp_game,season
0,1,2016-08-14,1,Bournemouth,53,W,3,3,1,0.52,0.52,0.52,1790.08,1621.22,168.86,1.85,9,0,0,2016/17
1,2,2016-08-19,0,Southampton,43,W,3,2,0,0.61,0.61,0.61,1798.99,1775.83,23.16,1.53,8,5,6,2016/17
2,3,2016-08-27,1,Hull City,62,W,3,1,0,0.27,0.27,0.27,1798.99,1604.09,194.9,1.44,18,8,7,2016/17
3,4,2016-09-10,0,Manchester City,40,L,0,1,2,1.89,1.89,1.89,1806.67,1865.55,-58.88,2.38,3,14,13,2016/17
4,5,2016-09-18,1,Watford,60,L,0,1,3,1.41,1.41,1.41,1801.8,1634.34,167.46,1.62,17,8,8,2016/17
5,6,2016-09-24,0,Leicester City,67,W,3,4,1,0.29,0.29,0.29,1801.8,1801.83,-0.03,1.73,12,6,7,2016/17
6,7,2016-10-02,0,Stoke City,67,D,1,1,1,1.2,1.2,1.2,1791.42,1641.67,149.75,1.33,13,8,8,2016/17
7,8,2016-10-17,1,Liverpool,35,D,1,0,0,0.27,0.27,0.27,1786.13,1829.62,-43.49,3.5,4,15,16,2016/17
8,9,2016-10-23,1,Chelsea,56,L,0,0,4,2.25,2.25,2.25,1786.13,1797.27,-11.14,3.8,1,6,8,2016/17
9,10,2016-10-29,0,Burnley,72,D,1,0,0,0.33,0.33,0.33,1786.13,1638.18,147.95,1.2,16,6,7,2016/17


In [None]:
df_18

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,united_elo,opp_elo,elo_diff,united_win_odds,opp_final_pos,days_since_last_game_united,days_since_last_opp_game,season
0,1,2017-08-13,0,West Ham,55,W,3,4,0,2.1,0.5,1.6,1857.48,1672.23,185.25,1.3,13,0,0,2017/18
1,2,2017-08-19,1,Swansea City,58,W,3,4,0,3.0,0.4,2.6,1863.12,1652.69,210.43,1.36,18,6,7,2017/18
2,3,2017-08-26,0,Leicester City,69,W,3,2,0,2.8,0.9,1.9,1863.12,1713.68,149.44,1.33,9,7,7,2017/18
3,4,2017-09-09,1,Stoke City,63,D,1,2,2,2.1,1.2,0.9,1882.75,1677.01,205.74,1.5,19,14,13,2017/18
4,5,2017-09-17,0,Everton,49,W,3,4,0,2.9,1.1,1.8,1884.01,1744.38,139.63,1.36,8,8,8,2017/18
5,6,2017-09-23,1,Southampton,40,W,3,1,0,1.2,1.0,0.2,1884.01,1682.36,201.65,1.73,17,6,7,2017/18
6,7,2017-09-30,0,Crystal Palace,59,W,3,4,0,2.8,0.4,2.4,1884.01,1606.33,277.68,1.17,11,7,7,2017/18
7,8,2017-10-14,1,Liverpool,38,D,1,0,0,0.2,1.5,-1.3,1919.19,1856.85,62.34,2.8,4,14,13,2017/18
8,9,2017-10-21,1,Huddersfield,77,L,0,1,2,0.8,1.2,-0.4,1919.16,1524.06,395.1,1.36,16,7,7,2017/18
9,10,2017-10-28,0,Tottenham,45,W,3,1,0,0.8,1.2,-0.4,1919.16,1919.73,-0.57,2.14,3,7,6,2017/18


In [None]:
df_19

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,united_elo,opp_elo,elo_diff,united_win_odds,opp_final_pos,days_since_last_game_united,days_since_last_opp_game,season
0,1,2018-08-10,0,Leicester City,47,W,3,2,1,1.5,1.8,-0.3,1883.61,1696.17,187.44,1.57,9,0,0,2018/19
1,2,2018-08-19,1,Brighton,66,L,0,2,3,1.4,1.7,-0.3,1887.63,1623.59,264.04,1.75,17,9,9,2018/19
2,3,2018-08-27,0,Tottenham,57,L,0,0,3,1.5,1.2,0.3,1887.63,1918.75,-31.12,2.62,4,8,8,2018/19
3,4,2018-09-02,1,Burnley,53,W,3,2,0,2.5,0.8,1.7,1856.08,1651.97,204.11,1.7,15,6,6,2018/19
4,5,2018-09-15,1,Watford,57,W,3,2,1,1.9,1.3,0.6,1862.83,1669.43,193.4,1.95,11,13,13,2018/19
5,6,2018-09-22,0,Wolves,64,D,1,1,1,0.6,1.0,-0.4,1862.83,1603.58,259.25,1.57,7,7,7,2018/19
6,7,2018-09-29,1,West Ham,52,L,0,1,3,0.7,1.4,-0.7,1862.83,1641.62,221.21,1.85,10,7,7,2018/19
7,8,2018-10-06,0,Newcastle Utd,72,W,3,3,2,1.9,1.6,0.3,1851.55,1639.72,211.83,1.44,13,7,7,2018/19
8,9,2018-10-20,1,Chelsea,38,D,1,2,2,0.8,1.8,-1.0,1845.23,1861.61,-16.38,5.25,3,14,14,2018/19
9,10,2018-10-28,0,Everton,54,W,3,2,1,2.3,1.8,0.5,1845.23,1689.47,155.76,1.72,8,8,8,2018/19


In [None]:
final_df = pd.concat([df_16, df_17, df_18, df_19])

In [None]:
final_df

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,united_elo,opp_elo,elo_diff,united_win_odds,opp_final_pos,days_since_last_game_united,days_since_last_opp_game,season
0,1,2015-08-08,0,Tottenham,50,W,3,1,0,0.67,0.67,0.67,1812.81,1733.16,79.65,1.65,3,0,0,2015/16
1,2,2015-08-14,1,Aston Villa,54,W,3,1,0,0.22,0.22,0.22,1812.81,1580.38,232.43,1.73,20,6,6,2015/16
2,3,2015-08-22,0,Newcastle Utd,69,D,1,0,0,0.44,0.44,0.44,1819.01,1589.81,229.20,1.36,18,8,8,2015/16
3,4,2015-08-30,1,Swansea City,65,L,0,1,2,0.97,0.97,0.97,1819.01,1677.19,141.82,2.25,12,8,8,2015/16
4,5,2015-09-12,0,Liverpool,56,W,3,3,1,1.03,1.03,1.03,1807.87,1740.88,66.99,2.00,8,13,13,2015/16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,35,2019-04-21,1,Everton,52,L,0,0,4,0.30,1.60,-1.30,1896.66,1727.35,169.31,2.30,8,8,4,2018/19
34,31,2019-04-24,0,Manchester City,37,L,0,0,2,0.50,0.50,0.00,1896.66,2015.55,-118.89,7.50,1,3,7,2018/19
35,36,2019-04-28,0,Chelsea,52,D,1,1,1,1.10,1.70,-0.60,1896.66,1865.08,31.58,2.70,3,4,7,2018/19
36,37,2019-05-05,1,Huddersfield,64,D,1,1,1,1.80,1.00,0.80,1857.66,1500.13,357.53,1.30,20,7,7,2018/19


In [None]:
final_df.to_csv('united_pt2.csv', index=False)