In [None]:
import pandas as pd
import numpy as np


In [None]:
df_17 = pd.read_html('https://fbref.com/en/squads/b8fd03ef/2016-2017/c9/Manchester-City-Stats-Premier-League', attrs={'id': 'matchlogs_for'})[0]
df_17['Date'] = pd.to_datetime(df_17['Date'], dayfirst=False)
# Dropping irrelevant columns #
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_17.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_17.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_17['matchday'] = df_17['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_17['points_won'] = df_17['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_17['home_0_away_1'] = df_17['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_17 = df_17[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA']]


In [None]:
df_17['days_since_last_game_city'] = (
    df_17['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)

In [None]:
df_elo = pd.read_csv('/content/EloRatings.csv')
df_elo['date'] = pd.to_datetime(df_elo['date'], dayfirst=True)
#filter to only England#
df_elo = df_elo[df_elo['country'] == 'ENG']
#filter to dates on or after July 1, 2013 #
df_elo['date'] = pd.to_datetime(df_elo['date'], format='%Y-%m-%d')
cutoff = pd.Timestamp('2013-07-01')
df_elo = df_elo[df_elo['date'] >= cutoff].reset_index(drop=True)
print(df_elo.country.unique())    # should be just ['ENG']
print(df_elo.date.min(), df_elo.date.max())


['ENG']
2013-07-01 00:00:00 2025-06-01 00:00:00


In [None]:
print((df_17['Opponent'].unique()))

['Sunderland' 'Stoke City' 'West Ham' 'Manchester Utd' 'Bournemouth'
 'Swansea City' 'Tottenham' 'Everton' 'Southampton' 'West Brom'
 'Middlesbrough' 'Crystal Palace' 'Burnley' 'Chelsea' 'Leicester City'
 'Watford' 'Arsenal' 'Hull City' 'Liverpool']


In [None]:
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_17['merge_date'] = df_17['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    'Arsenal':          'Arsenal',
    'Bournemouth':      'Bournemouth',
    'Burnley':          'Burnley',
    'Chelsea':          'Chelsea',
    'Crystal Palace':   'Crystal Palace',
    'Everton':          'Everton',
    'Hull City':        'Hull',
    'Leicester City':   'Leicester',
    'Liverpool':        'Liverpool',
    'Manchester Utd':  'Man United',
    'Middlesbrough':    'Middlesbrough',
    'Southampton':      'Southampton',
    'Stoke City':       'Stoke',
    'Sunderland':       'Sunderland',
    'Swansea City':     'Swansea',
    'Tottenham':        'Tottenham',
    'Watford':          'Watford',
    'West Brom':        'West Brom',
    'West Ham':         'West Ham',
}
df_17['elo_opponent'] = df_17['Opponent'].map(name_map)
df_17['elo_club'] = 'Man City'
df_17 = df_17.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_17 = df_17.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'city_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

In [None]:
df_17.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')

In [None]:
df_17["elo_diff"] = df_17["city_elo"] - df_17["opp_elo"]
final_pos = {
   'Arsenal': 5,
    'Bournemouth': 9,
    'Burnley': 16,
    'Chelsea': 1,
    'Crystal Palace': 14,
    'Everton': 7,
    'Hull City': 18,
    'Leicester City': 12,
    'Liverpool': 4,
    'Manchester Utd': 6,
    'Middlesbrough': 19,
    'Southampton': 8,
    'Stoke City': 13,
    'Sunderland': 20,
    'Swansea City': 15,
    'Tottenham': 2,
    'Watford': 17,
    'West Brom': 10,
    'West Ham': 11,
}
df_17['opp_final_pos'] = df_17['Opponent'].map(final_pos)

In [None]:
df_bet_17 = pd.read_csv('/content/16-17 Odds.csv')
df_bet_17['Date'] = pd.to_datetime(df_bet_17['Date'], dayfirst=True)
home_odds_map = df_bet_17.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_17.set_index(['Date','AwayTeam'])['B365A'].to_dict()

def get_city_win_odds(row):
    key = (row['Date'], 'Man City')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)

df_17['city_win_odds'] = df_17.apply(get_city_win_odds, axis=1)

  df_bet_17['Date'] = pd.to_datetime(df_bet_17['Date'], dayfirst=True)


In [None]:
df_17["days_since_last_opp_game"] = [0,
 7,
 7,
 14,
 7,
 6,
 8,
 15,
 7,
 7,
 7,
 14,
 5,
 7,
 7,
 4,
 5,
 9,
 4,
 2,
 13,
 7,
 11,
 5,
 9,
 8,
 4,
 7,
 15,
 4,
 3,
 7,
 4,
 4,
 7,
 7,
 4,
 6]
df_17 = df_17[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','city_elo', 'opp_elo', 'elo_diff', 'city_win_odds', 'opp_final_pos', 'days_since_last_game_city', 'days_since_last_opp_game']]

In [None]:
df_18 = pd.read_html('https://fbref.com/en/squads/b8fd03ef/2017-2018/c9/Manchester-City-Stats-Premier-League', attrs={"id":"matchlogs_for"})[0]
df_18['Date'] = pd.to_datetime(df_18['Date'], dayfirst=False)
# Dropping irrelevant columns #
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_18.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_18.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_18['matchday'] = df_18['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_18['points_won'] = df_18['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_18['home_0_away_1'] = df_18['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_18 = df_18[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA','xG','xGA']]

In [None]:
print(df_18['Opponent'])

0           Brighton
1            Everton
2        Bournemouth
3          Liverpool
4            Watford
5     Crystal Palace
6            Chelsea
7         Stoke City
8            Burnley
9          West Brom
10           Arsenal
11    Leicester City
12      Huddersfield
13       Southampton
14          West Ham
15    Manchester Utd
16      Swansea City
17         Tottenham
18       Bournemouth
19     Newcastle Utd
20    Crystal Palace
21           Watford
22         Liverpool
23     Newcastle Utd
24         West Brom
25           Burnley
26    Leicester City
27           Arsenal
28           Chelsea
29        Stoke City
30           Everton
31    Manchester Utd
32         Tottenham
33      Swansea City
34          West Ham
35      Huddersfield
36          Brighton
37       Southampton
Name: Opponent, dtype: object


In [None]:
df_18['days_since_last_game_city'] = (
    df_18['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_18['merge_date'] = df_18['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    'West Ham':        'West Ham',
    'Swansea City':    'Swansea',
    'Leicester City':  'Leicester',
    'Stoke City':      'Stoke',
    'Everton':         'Everton',
    'Southampton':     'Southampton',
    'Crystal Palace':  'Crystal Palace',
    'Liverpool':       'Liverpool',
    'Huddersfield':    'Huddersfield',
    'Tottenham':       'Tottenham',
    'Chelsea':         'Chelsea',
    'Newcastle Utd':   'Newcastle',
    'Brighton':        'Brighton',
    'Watford':         'Watford',
    'Arsenal':         'Arsenal',
    'Manchester Utd': 'Man United',
    'Bournemouth':     'Bournemouth',
    'West Brom':       'West Brom',
    'Burnley':         'Burnley'

}
df_18['elo_opponent'] = df_18['Opponent'].map(name_map)
df_18['elo_club'] = 'Man City'
df_18 = df_18.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_18 = df_18.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'city_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_18['days_since_last_game_city'] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_18['merge_date'] = df_18['Date'].apply(round_to_elo_date)


In [None]:
df_18.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')
df_18["elo_diff"] = df_18["city_elo"] - df_18["opp_elo"]
df_18['xG_diff'] = df_18['xG'] - df_18['xGA']
final_pos = {
   'West Ham': 13,
  'Swansea City': 18,
  'Leicester City': 9,
  'Stoke City': 19,
  'Everton': 8,
  'Southampton': 17,
  'Crystal Palace': 11,
  'Liverpool': 4,
  'Huddersfield': 16,
  'Tottenham': 3,
  'Chelsea': 5,
  'Newcastle Utd': 10,
  'Brighton': 15,
  'Watford': 14,
  'Arsenal': 6,
  'Manchester Utd': 2,
  'Bournemouth': 12,
  'West Brom': 20,
  'Burnley': 7
}
df_18['opp_final_pos'] = df_18['Opponent'].map(final_pos)

In [None]:
df_bet_18 = pd.read_csv('/content/17-18 Odds.csv')
df_bet_18['Date'] = pd.to_datetime(df_bet_18['Date'], dayfirst=True)
home_odds_map = df_bet_18.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_18.set_index(['Date','AwayTeam'])['B365A'].to_dict()
def get_city_win_odds(row):
    key = (row['Date'], 'Man City')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)
df_18['city_win_odds'] = df_18.apply(get_city_win_odds, axis=1)

In [None]:
df_18["days_since_last_opp_game"] = [0, 7, 7, 14, 7, 7, 4, 3, 14, 7, 7, 8, 13, 7, 7, 7, 7, 6, 4, 4, 2, 12, 7, 10, 4, 7, 7, 7, 7, 7, 21, 7, 7, 7, 8, 6, 8, 7]
df_18 = df_18[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','city_elo', 'opp_elo', 'elo_diff', 'city_win_odds', 'opp_final_pos', 'days_since_last_game_city', 'days_since_last_opp_game']]

In [None]:
df_17['xG'] = [1.95, 1.31, 3.22, 1.01, 3.39, 1.11, 1.70,
    2.69, 1.15, 0.60, 2.50, 0.43, 0.48, 2.24,
    2.61, 1.62, 1.52, 0.35, 0.26, 0.62, 0.57,
    2.53, 0.49, 2.88, 0.15, 0.74, 1.30, 2.55,
    0.74, 1.37, 2.34, 0.62, 1.42, 1.18, 2.70,
    1.99, 2.58, 0.83]
df_17['xGA'] = [1.10, 2.17, 1.08, 1.89, 0.39, 2.58, 0.91,
    0.14, 0.56, 2.48, 0.21, 1.39, 2.26, 1.28,
    0.97, 0.50, 0.61, 2.22, 0.35, 0.64, 0.57,
    1.12, 2.66, 0.11, 2.95, 2.48, 0.34, 1.94,
    1.40, 2.26, 0.36, 2.74, 0.42, 3.11, 0.24,
    1.63, 0.39, 3.27]
df_17['xG_diff'] = df_17['xG'] = df_17['xGA']
df_17 = df_17[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','city_elo', 'opp_elo', 'elo_diff', 'city_win_odds', 'opp_final_pos', 'days_since_last_game_city', 'days_since_last_opp_game']]

In [None]:
df_16 = pd.read_html('https://fbref.com/en/squads/b8fd03ef/2015-2016/c9/Manchester-City-Stats-Premier-League', attrs={"id":"matchlogs_for"})[0]
df_16['Date'] = pd.to_datetime(df_16['Date'], dayfirst=False)

In [None]:
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_16.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_16.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_16['matchday'] = df_16['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_16['points_won'] = df_16['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_16['home_0_away_1'] = df_16['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_16 = df_16[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA']]

In [None]:
df_16['days_since_last_game_city'] = (
    df_16['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)

In [None]:
print((df_16['Opponent'].unique()))

['West Brom' 'Chelsea' 'Everton' 'Watford' 'Crystal Palace' 'West Ham'
 'Tottenham' 'Newcastle Utd' 'Bournemouth' 'Manchester Utd' 'Norwich City'
 'Aston Villa' 'Liverpool' 'Southampton' 'Stoke City' 'Swansea City'
 'Arsenal' 'Sunderland' 'Leicester City']


In [None]:
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_16['merge_date'] = df_16['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    "Tottenham": "Tottenham",
    "Aston Villa": "Aston Villa",
    "Newcastle Utd": "Newcastle",
    "Swansea City": "Swansea",
    "Liverpool": "Liverpool",
    "Southampton": "Southampton",
    "Sunderland": "Sunderland",
    "Arsenal": "Arsenal",
    "Everton": "Everton",
    "Manchester Utd": "Man United",
    "Crystal Palace": "Crystal Palace",
    "West Brom": "West Brom",
    "Watford": "Watford",
    "Leicester City": "Leicester",
    "West Ham": "West Ham",
    "Bournemouth": "Bournemouth",
    "Norwich City": "Norwich",
    "Stoke City": "Stoke",
    "Chelsea": "Chelsea",
}
df_16['elo_opponent'] = df_16['Opponent'].map(name_map)
df_16['elo_club'] = 'Man City'
df_16 = df_16.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_16 = df_16.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'city_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

In [None]:
df_16.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')
df_16["elo_diff"] = df_16["city_elo"] - df_16["opp_elo"]
final_pos = {
   "Tottenham": 3,
    "Aston Villa": 20,
    "Newcastle Utd": 18,
    "Swansea City": 12,
    "Liverpool": 8,
    "Southampton": 6,
    "Sunderland": 17,
    "Arsenal": 2,
    "Everton": 11,
    "Manchester Utd": 5,
    "Crystal Palace": 15,
    "West Brom": 14,
    "Watford": 13,
    "Leicester City": 1,
    "West Ham": 7,
    "Bournemouth": 16,
    "Norwich City": 19,
    "Stoke City": 9,
    "Chelsea": 10,
}
df_16['opp_final_pos'] = df_16['Opponent'].map(final_pos)

In [None]:
df_bet_16 = pd.read_csv('/content/15-16 Odds.csv')
df_bet_16['Date'] = pd.to_datetime(df_bet_16['Date'], dayfirst=True)
home_odds_map = df_bet_16.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_16.set_index(['Date','AwayTeam'])['B365A'].to_dict()
def get_city_win_odds(row):
    key = (row['Date'], 'Man City')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)
df_16['city_win_odds'] = df_16.apply(get_city_win_odds, axis=1)

In [None]:
df_16["days_since_last_opp_game"] = [0, 8, 8, 6, 14, 5, 6, 7, 14, 8, 7, 6, 13, 7, 7, 7, 6, 7, 6, 7,
 14, 6, 7, 7, 7, 6, 7, 14, 7, 7, 7, 14, 6, 7, 7, 7, 6, 7]
df_16['xG'] = [1.92, 0.48, 1.58, 0.36, 3.09, 1.00, 1.58, 0.98, 0.17, 0.31,
 1.17, 1.53, 2.62, 1.24, 0.48, 0.96, 0.77, 1.08, 1.67, 0.69,
 0.37, 0.61, 1.73, 0.64, 2.50, 1.24, 0.18, 0.12, 0.56, 0.90,
 0.87, 1.09, 2.63, 0.52, 1.09, 1.13, 1.08, 2.04]
df_16['xGA'] = [0.44, 2.19, 0.58, 1.53, 0.74, 2.98, 2.31, 3.33, 4.71, 0.39,
 2.84, 0.18, 1.09, 2.12, 2.01, 1.34, 1.50, 3.52, 0.81, 0.99,
 1.45, 1.74, 1.01, 0.89, 1.71, 1.98, 1.17, 2.83, 0.14, 1.86,
 1.32, 2.07, 0.49, 0.46, 2.22, 2.84, 0.75, 0.12]
df_16['xG_diff'] = df_16['xG'] = df_16['xGA']
df_16 = df_16[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','city_elo', 'opp_elo', 'elo_diff', 'city_win_odds', 'opp_final_pos', 'days_since_last_game_city', 'days_since_last_opp_game']]

In [None]:
df_19 = pd.read_html('https://fbref.com/en/squads/b8fd03ef/2018-2019/matchlogs/c9/schedule/Manchester-City-Scores-and-Fixtures-Premier-League', attrs={"id":"matchlogs_for"})[0]
df_19['Date'] = pd.to_datetime(df_19['Date'], dayfirst=False)

In [None]:
# Dropping irrelevant columns #
columns_to_drop = ['Time', 'Captain', 'Formation', 'Attendance', 'Opp Formation', 'Referee', 'Match Report', 'Notes']
df_19.drop(columns=columns_to_drop, inplace=True, errors='ignore')

# Dropping rows with missing essential info #
df_19.dropna(subset=['Date', 'Opponent', 'Result'], inplace=True)

# Extract matchday number from 'Round' column (e.g., "Matchweek 5" → 5) #
df_19['matchday'] = df_19['Round'].str.extract(r'(\d+)').astype(int)

# Create points_won column based on results #
result_to_points = {'W': 3, 'D': 1, 'L': 0}
df_19['points_won'] = df_19['Result'].map(result_to_points)

# Standardize home/away column to binary values #
df_19['home_0_away_1'] = df_19['Venue'].apply(lambda x: 0 if x == 'Home' else 1)

# Reorder columns#
df_19 = df_19[['matchday', 'Date', 'home_0_away_1', 'Opponent','Poss', 'Result', 'points_won', 'GF', 'GA','xG','xGA']]

In [None]:
print(df_19['Opponent'])

0            Arsenal
1       Huddersfield
2             Wolves
3      Newcastle Utd
4             Fulham
5       Cardiff City
6           Brighton
7          Liverpool
8            Burnley
9          Tottenham
10       Southampton
11    Manchester Utd
12          West Ham
13       Bournemouth
14           Watford
15           Chelsea
16           Everton
17    Crystal Palace
18    Leicester City
19       Southampton
20         Liverpool
21            Wolves
22      Huddersfield
23     Newcastle Utd
24           Arsenal
25           Everton
26           Chelsea
27          West Ham
28       Bournemouth
29           Watford
30            Fulham
31      Cardiff City
32    Crystal Palace
33         Tottenham
34    Manchester Utd
35           Burnley
36    Leicester City
37          Brighton
Name: Opponent, dtype: object


In [None]:
df_19['days_since_last_game_city'] = (
    df_19['Date']
      .diff()               # current_date - previous_date
      .dt.days              # convert to integer days
      .fillna(0)            # first match has no “previous” → 0
      .astype(int)
)
def round_to_elo_date(dt):
    return pd.Timestamp(dt.year, dt.month, 1) if dt.day < 15 else pd.Timestamp(dt.year, dt.month, 15)
df_19['merge_date'] = df_19['Date'].apply(round_to_elo_date)
df_elo['merge_date'] = df_elo['date']
name_map = {
    'West Ham':        'West Ham',
    'Wolves':          'Wolves',
    'Leicester City':  'Leicester',
    'Cardiff City':    'Cardiff',
    'Everton':         'Everton',
    'Southampton':     'Southampton',
    'Crystal Palace':  'Crystal Palace',
    'Liverpool':       'Liverpool',
    'Huddersfield':    'Huddersfield',
    'Tottenham':       'Tottenham',
    'Chelsea':         'Chelsea',
    'Newcastle Utd':   'Newcastle',
    'Brighton':        'Brighton',
    'Watford':         'Watford',
    'Arsenal':         'Arsenal',
    'Manchester Utd': 'Man United',
    'Bournemouth':     'Bournemouth',
    'Fulham':          'Fulham',
    'Burnley':         'Burnley'
}
df_19['elo_opponent'] = df_19['Opponent'].map(name_map)
df_19['elo_club'] = 'Man City'
df_19 = df_19.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'opp_elo'}),
    left_on=['elo_opponent', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns='club')
df_19 = df_19.merge(
    df_elo[['club', 'merge_date', 'elo']].rename(columns={'elo': 'city_elo'}),
    left_on=['elo_club', 'merge_date'],
    right_on=['club', 'merge_date'],
    how='left'
).drop(columns=['club', 'elo_club'])

In [None]:
df_19.drop(columns=['merge_date','elo_opponent',], inplace=True, errors='ignore')
df_19["elo_diff"] = df_19["city_elo"] - df_19["opp_elo"]
df_19['xG_diff'] = df_19['xG'] - df_19['xGA']
final_pos = {
   'West Ham':        10,
    'Wolves':          7,
    'Leicester City':  9,
    'Cardiff City':    18,
    'Everton':         8,
    'Southampton':     16,
    'Crystal Palace':  12,
    'Liverpool':       2,
    'Huddersfield':    20,
    'Tottenham':       4,
    'Chelsea':         3,
    'Newcastle Utd':   13,
    'Brighton':        17,
    'Watford':         11,
    'Arsenal':         5,
    'Manchester Utd': 6,
    'Bournemouth':     14,
    'Fulham':          19,
    'Burnley':         15
}
df_19['opp_final_pos'] = df_19['Opponent'].map(final_pos)

In [None]:
df_bet_19 = pd.read_csv('/content/18-19 Odds.csv')
df_bet_19['Date'] = pd.to_datetime(df_bet_19['Date'], dayfirst=True)
home_odds_map = df_bet_19.set_index(['Date','HomeTeam'])['B365H'].to_dict()
away_odds_map = df_bet_19.set_index(['Date','AwayTeam'])['B365A'].to_dict()
def get_city_win_odds(row):
    key = (row['Date'], 'Man City')
    if row['home_0_away_1'] == 0:
        return home_odds_map.get(key, pd.NA)
    else:
        return away_odds_map.get(key, pd.NA)
df_19['city_win_odds'] = df_19.apply(get_city_win_odds, axis=1)

In [None]:
df_19["days_since_last_opp_game"] = [0, 8, 7, 6, 14, 7, 7, 8, 14, 9,
 8, 8, 14, 6, 3, 3, 5, 7, 4, 3,
 5, 12, 8, 10, 5, 4, 8, 5, 3, 6,
 13, 3, 8, 7, 3, 6, 8, 7]
df_19 = df_19[['matchday','Date','home_0_away_1','Opponent','Poss','Result','points_won','GF','GA','xG','xGA','xG_diff','city_elo', 'opp_elo', 'elo_diff', 'city_win_odds', 'opp_final_pos', 'days_since_last_game_city', 'days_since_last_opp_game']]

In [None]:
df_16

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,city_elo,opp_elo,elo_diff,city_win_odds,opp_final_pos,days_since_last_game_city,days_since_last_opp_game
0,1,2015-08-10,1,West Brom,69,W,3,3,0,0.44,0.44,0.44,1884.95,1638.09,246.86,1.67,14,0,0
1,2,2015-08-16,0,Chelsea,49,W,3,3,0,2.19,2.19,2.19,1888.5,1884.38,4.12,2.1,10,6,8
2,3,2015-08-23,1,Everton,54,W,3,2,0,0.58,0.58,0.58,1888.5,1701.66,186.84,1.73,11,7,8
3,4,2015-08-29,0,Watford,59,W,3,2,0,1.53,1.53,1.53,1888.5,1578.99,309.51,1.2,13,6,6
4,5,2015-09-12,1,Crystal Palace,58,W,3,1,0,0.74,0.74,0.74,1905.42,1670.35,235.07,1.67,15,14,14
5,6,2015-09-19,0,West Ham,72,L,0,1,2,2.98,2.98,2.98,1908.94,1627.77,281.17,1.29,7,7,5
6,7,2015-09-26,1,Tottenham,54,L,0,1,4,2.31,2.31,2.31,1908.94,1723.61,185.33,1.91,3,7,6
7,8,2015-10-03,0,Newcastle Utd,64,W,3,6,1,3.33,3.33,3.33,1861.44,1561.39,300.05,1.25,18,7,7
8,9,2015-10-17,0,Bournemouth,54,W,3,5,1,4.71,4.71,4.71,1863.57,1571.21,292.36,1.33,16,14,14
9,10,2015-10-25,1,Manchester Utd,41,D,1,0,0,0.39,0.39,0.39,1863.57,1797.86,65.71,2.8,5,8,8


In [None]:
df_17

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,city_elo,opp_elo,elo_diff,city_win_odds,opp_final_pos,days_since_last_game_city,days_since_last_opp_game
0,1,2016-08-13,0,Sunderland,77,W,3,2,1,1.1,1.1,1.1,1849.89,1656.89,193.0,1.25,20,0,0
1,2,2016-08-20,1,Stoke City,58,W,3,4,1,2.17,2.17,2.17,1854.01,1683.77,170.24,1.73,13,7,7
2,3,2016-08-28,0,West Ham,67,W,3,3,1,1.08,1.08,1.08,1854.01,1727.62,126.39,1.25,11,8,7
3,4,2016-09-10,1,Manchester Utd,60,W,3,2,1,1.89,1.89,1.89,1865.55,1806.67,58.88,3.4,6,13,14
4,5,2016-09-17,0,Bournemouth,64,W,3,4,0,0.39,0.39,0.39,1884.49,1617.8,266.69,1.29,9,7,7
5,6,2016-09-24,1,Swansea City,58,W,3,3,1,2.58,2.58,2.58,1884.49,1677.8,206.69,1.36,15,7,6
6,7,2016-10-02,1,Tottenham,58,L,0,0,2,0.91,0.91,0.91,1889.08,1822.07,67.01,2.5,2,8,8
7,8,2016-10-15,0,Everton,73,D,1,1,1,0.14,0.14,0.14,1877.44,1718.51,158.93,1.5,7,13,15
8,9,2016-10-23,0,Southampton,65,D,1,1,1,0.56,0.56,0.56,1877.44,1778.66,98.78,1.5,8,8,7
9,10,2016-10-29,1,West Brom,70,W,3,4,0,2.48,2.48,2.48,1877.44,1651.42,226.02,1.44,10,6,7


In [None]:
df_18

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,city_elo,opp_elo,elo_diff,city_win_odds,opp_final_pos,days_since_last_game_city,days_since_last_opp_game
0,1,2017-08-12,1,Brighton,77,W,3,2,0,1.9,0.3,1.6,1866.48,1583.48,283.0,1.33,15,0,0
1,2,2017-08-21,0,Everton,64,D,1,1,1,1.1,0.6,0.5,1871.53,1755.67,115.86,1.33,8,9,7
2,3,2017-08-26,1,Bournemouth,70,W,3,2,1,1.4,0.5,0.9,1871.53,1647.5,224.03,1.3,12,5,7
3,4,2017-09-09,0,Liverpool,66,W,3,5,0,2.4,0.7,1.7,1876.28,1864.92,11.36,1.85,4,14,14
4,5,2017-09-16,1,Watford,67,W,3,6,0,3.3,0.5,2.8,1899.49,1644.88,254.61,1.33,14,7,7
5,6,2017-09-23,0,Crystal Palace,71,W,3,5,0,4.5,0.6,3.9,1899.49,1606.33,293.16,1.14,11,7,7
6,7,2017-09-30,1,Chelsea,61,W,3,1,0,0.9,0.5,0.4,1899.49,1922.44,-22.95,2.5,5,7,4
7,8,2017-10-14,0,Stoke City,79,W,3,7,2,3.8,0.5,3.3,1934.3,1683.29,251.01,1.14,19,14,3
8,9,2017-10-21,0,Burnley,77,W,3,3,0,3.1,0.2,2.9,1938.48,1675.01,263.47,1.11,7,7,14
9,10,2017-10-28,1,West Brom,77,W,3,3,2,2.3,1.5,0.8,1938.48,1656.52,281.96,1.28,20,7,7


In [None]:
df_19

Unnamed: 0,matchday,Date,home_0_away_1,Opponent,Poss,Result,points_won,GF,GA,xG,xGA,xG_diff,city_elo,opp_elo,elo_diff,city_win_odds,opp_final_pos,days_since_last_game_city,days_since_last_opp_game
0,1,2018-08-12,1,Arsenal,58,W,3,2,0,1.7,0.5,1.2,1973.63,1822.67,150.96,1.95,5,0,0
1,2,2018-08-19,0,Huddersfield,76,W,3,6,1,4.2,0.7,3.5,1982.87,1561.06,421.81,1.1,20,7,8
2,3,2018-08-25,1,Wolves,71,D,1,1,1,1.6,1.0,0.6,1982.87,1594.2,388.67,1.28,7,6,7
3,4,2018-09-01,0,Newcastle Utd,78,W,3,2,1,2.0,0.5,1.5,1973.53,1657.48,316.05,1.11,13,7,6
4,5,2018-09-15,0,Fulham,64,W,3,3,0,4.8,0.4,4.4,1974.86,1629.78,345.08,1.12,19,14,14
5,6,2018-09-22,1,Cardiff City,78,W,3,5,0,2.5,0.1,2.4,1974.86,1565.73,409.13,1.16,18,7,7
6,7,2018-09-29,0,Brighton,79,W,3,2,0,2.7,0.2,2.5,1974.86,1629.83,345.03,1.1,17,7,7
7,8,2018-10-07,1,Liverpool,51,D,1,0,0,1.0,0.4,0.6,1965.5,1943.64,21.86,2.8,2,8,8
8,9,2018-10-20,0,Burnley,69,W,3,5,0,4.0,0.4,3.6,1966.16,1649.96,316.2,1.08,15,13,14
9,10,2018-10-29,1,Tottenham,51,W,3,1,0,1.6,0.4,1.2,1966.16,1890.49,75.67,1.75,4,9,9


In [None]:
final_df = pd.concat([df_16, df_17, df_18, df_19])

In [None]:
final_df.to_csv('city_pt2.csv')