<a href="https://colab.research.google.com/github/alwaysneedhelp/Football-AI/blob/main/Football.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [134]:
import pandas as pd
import kagglehub
import os

In [135]:
def load_file(link, name):
  path = kagglehub.dataset_download(link)
  path = os.path.join(path, f'{name}.csv')
  return path

In [136]:
matches = pd.read_csv(load_file('martj42/international-football-results-from-1872-to-2017', 'results'))

**PREPARING DATA FOR AI**

In [137]:
matches['date'] = pd.to_datetime(matches['date'])

In [138]:
matches['opp_code'] = matches['away_team'].astype('category').cat.codes

In [139]:
matches['day_code'] = matches['date'].dt.dayofweek
matches['goal_diff'] = matches['home_score'] - matches['away_score']

In [140]:
matches['result'] = (matches['home_score'] > matches['away_score']).astype('int')

In [141]:
matches['neutral'] = matches['neutral'].apply(lambda x: not x).astype(int)
matches['tournament_id'] = matches['tournament'].astype('category').cat.codes
matches['city_id'] = matches['city'].astype('category').cat.codes
matches['country_id'] = matches['country'].astype('category').cat.codes

**PRIORITIZING TOURNAMENTS**

In [142]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [143]:
rf = xgb.XGBClassifier()

**CALCULATE ELO**

In [144]:
def calculate_elo(my_code, opp_code, rating_a, rating_b, res, weight=1, add_on=0):
  K = 100
  expected_a = 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
  new_ratinga = rating_a + weight * K*(res-expected_a)

  expected_b = 1 / (1 + 10 ** ((rating_a - rating_b) / 400))
  new_ratingb = rating_b + weight * K*(res-expected_b)

  elo_ratings[my_code] = new_ratinga+add_on
  elo_ratings[opp_code] = new_ratingb+add_on

  return new_ratinga, new_ratingb

**ADDING CODE FOR ELO TO BE CALCULATED PROPERLY FOR EACH COUNTRY**

In [145]:
matches['elo'], matches['opp_elo'] = 2000, 2000
matches['my_code'] = matches['home_team'].astype('category').cat.codes
matches['opp_code'] = matches['away_team'].astype('category').cat.codes
all_codes = pd.concat([matches['my_code'], matches['opp_code']]).unique()
elo_ratings = dict(zip(all_codes, [2000] * len(all_codes)))

In [146]:
matches[['elo', 'opp_elo']] = matches.apply(
    lambda row:calculate_elo(
        row['my_code'],
        row['opp_code'],
        elo_ratings[row['my_code']],
        elo_ratings[row['opp_code']],
        row['result'],
        #tournament_weights[row['tournament']],
    ),
    axis=1,
    result_type='expand'
)

STORE INFO TO COMPARE LATER

In [147]:
import logging

# Create and configure logger
logging.basicConfig(filename="accuracies.log",
                    format='%(asctime)s %(message)s',
                    filemode='a',
                    force=True # Reset previous settings
                    )

# Creating an object
logger = logging.getLogger()

# Setting the threshold of logger to INFO
logger.setLevel(logging.INFO)

In [148]:
def log_info(*args):
  for arg in args:
    logger.info(arg)
    logger.info('--------------------------------------------------------------')
  logger.info('---------------------------------------------------------------------------------')

**FUNC TO ALWAYS CHECK IF CHANGES DONE HAD ANY RESULT ON ACC OR PRECISION**

In [151]:
def check_predictions(matches, predictors):
  X_train, X_test, y_train, y_test = train_test_split(
      matches[predictors].values,
      matches['result'].values,
      test_size=0.2,
      random_state=42
  )
  rf.fit(X_train, y_train)
  preds = rf.predict(X_test)
  combined = pd.DataFrame(dict(actual=y_test, predicted=preds))
  acc = accuracy_score(y_test, preds)
  prec = precision_score(y_test, preds)
  recall = recall_score(y_test, preds)
  f1 = f1_score(y_test, preds)
  print(f'Accurasy score: {acc}')
  print(f'Precision score: {prec}')
  print(f'Recall score: {recall}')
  print(f'F1 score: {f1}')
  log_info(acc, prec, recall, f1)

In [152]:
fifa_ranking = pd.read_csv(load_file('cashncarry/fifaworldranking', 'fifa_ranking-2024-06-20'))

**PREPARING FIFA RANKING TABLE TO MERGE WITH MY ELO SYSTEM**

In [153]:
fifa_ranking = fifa_ranking.rename(columns={
    'country_full': 'home_team',
    'rank': 'home_fifa_rank',
    'rank_date': 'date'
})
matches['date'] = pd.to_datetime(matches['date'])
fifa_ranking['date'] = pd.to_datetime(fifa_ranking['date'])
predictors = ['my_code', 'elo', 'opp_elo', 'opp_code', 'day_code', 'neutral', 'tournament_id', 'city_id', 'country_id']
check_predictions(matches, predictors)

Accurasy score: 0.7082555486413608
Precision score: 0.7044066317626527
Recall score: 0.6889268188606785
F1 score: 0.6965807356272247


In [154]:
rankings_merged = pd.merge_asof(
    matches.sort_values('date'),
    fifa_ranking.sort_values('date'),
    on='date',
    by='home_team',
    direction='backward')

In [155]:
predictors.extend(['home_fifa_rank', 'previous_points'])
check_predictions(rankings_merged, predictors)

Accurasy score: 0.7365691765193944
Precision score: 0.7297356346952152
Recall score: 0.719362481154426
F1 score: 0.7245119305856833


In [156]:
more_data = pd.read_csv(load_file('piterfm/fifa-football-world-cup', 'matches_1930_2022'))

In [157]:
more_data = more_data.rename(columns={
    'Date': 'date'
})
more_data['date'] = pd.to_datetime(more_data['date'])

rankings_merged = pd.merge(
    more_data.sort_values('date'),
    rankings_merged.sort_values('date'),
    on=['date', 'home_team', 'away_team'],
)

In [178]:
def top_teams(N, matches):
  top_teams = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True)[:N]
  for id, elo in top_teams:
    name = matches[matches['elo']==elo]['home_team']
    print(f'{name}:{elo}')

Unnamed: 0,home_team,away_team,home_score_x,home_xg,home_penalty,away_score_x,away_xg,away_penalty,home_manager,home_captain,...,country_id,elo,opp_elo,my_code,home_fifa_rank,country_abrv,total_points,previous_points,rank_change,confederation
0,France,Mexico,4,,,1,,,Raoul Caudron,Alexandre Villaplane,...,251,2023.584370,2085.591187,97,,,,,,
1,Argentina,France,1,,,0,,,Francisco Olazar,Manuel Ferreira,...,251,2354.122104,2384.759453,12,,,,,,
2,Chile,Mexico,3,,,0,,,Gyorgy Orth,Carlos Schneeberger,...,251,1979.013239,2111.731630,58,,,,,,
3,Uruguay,Peru,1,,,0,,,Alberto Suppici,José Nasazzi,...,251,2111.812396,2191.350719,300,,,,,,
4,Argentina,Mexico,6,,,3,,,Francisco Olazar,Ángel Bossio,...,251,2373.978216,2191.875518,12,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,Morocco,Portugal,1,1.4,,0,0.9,,Hoalid Regragui,Romain Saïss,...,189,1920.251918,1956.362181,185,22.0,MAR,1563.50,1558.35,-1.0,CAF
677,Argentina,Croatia,3,2.3,,0,0.5,,Lionel Scaloni,Lionel Messi,...,189,1907.734749,1481.098674,12,3.0,ARG,1773.88,1770.65,0.0,CONMEBOL
678,France,Morocco,2,2.0,,0,0.9,,Didier Deschamps,Hugo Lloris,...,189,1721.819082,1716.619141,97,4.0,FRA,1759.78,1764.85,0.0,UEFA
679,Croatia,Morocco,2,0.7,,1,1.2,,Zlatko Dalić,Luka Modrić,...,189,1624.221157,1744.576076,67,12.0,CRO,1645.64,1632.15,-3.0,UEFA
