<a href="https://colab.research.google.com/github/alwaysneedhelp/Football-AI/blob/main/Football.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [88]:
import pandas as pd

In [89]:
matches = pd.read_csv('results.csv')

**PREPARING DATA FOR AI**

In [90]:
matches['date'] = pd.to_datetime(matches['date'])

In [91]:
matches['opp_code'] = matches['away_team'].astype('category').cat.codes

In [92]:
matches['day_code'] = matches['date'].dt.dayofweek
matches['goal_diff'] = matches['home_score'] - matches['away_score']

In [93]:
matches['result'] = (matches['home_score'] > matches['away_score']).astype('int')

In [94]:
matches['neutral'] = matches['neutral'].apply(lambda x: not x)
matches['tournament_id'] = matches['tournament'].astype('category').cat.codes
matches['city_id'] = matches['city'].astype('category').cat.codes
matches['country_id'] = matches['country'].astype('category').cat.codes

In [95]:
matches

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,opp_code,day_code,goal_diff,result,tournament_id,city_id,country_id
0,1872-11-30,Scotland,England,0,0,Friendly,Glasgow,Scotland,True,87,5,0,0,88,658,204
1,1873-03-08,England,Scotland,4,2,Friendly,London,England,True,241,5,2,1,88,1040,71
2,1874-03-07,Scotland,England,2,1,Friendly,Glasgow,Scotland,True,87,5,1,1,88,658,204
3,1875-03-06,England,Scotland,2,2,Friendly,London,England,True,241,5,0,0,88,1040,71
4,1876-03-04,Scotland,England,3,0,Friendly,Glasgow,Scotland,True,87,5,3,1,88,658,204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48202,2025-03-25,Israel,Norway,2,4,FIFA World Cup qualification,Debrecen,Hungary,False,201,1,-2,0,83,488,104
48203,2025-03-25,Liechtenstein,Kazakhstan,0,2,FIFA World Cup qualification,Vaduz,Liechtenstein,True,144,1,-2,0,83,1933,131
48204,2025-03-25,North Macedonia,Wales,1,1,FIFA World Cup qualification,Skopje,North Macedonia,True,302,1,0,0,83,1726,168
48205,2025-03-25,Gibraltar,Czech Republic,0,4,FIFA World Cup qualification,Faro-Loulé,Portugal,False,74,1,-4,0,83,585,186


**PRIORITIZING TOURNAMENTS**

In [96]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [97]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [98]:
from sklearn.metrics import accuracy_score

In [99]:
from sklearn.metrics import precision_score

**CALCULATE ELO**

In [108]:
def calculate_elo(my_code, opp_code, rating_a, rating_b, res, weight=1):
  K = 100
  expected_a = 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
  new_ratinga = rating_a + weight * K*(res-expected_a)

  expected_b = 1 / (1 + 10 ** ((rating_a - rating_b) / 400))
  new_ratingb = rating_b + weight * K*(res-expected_b)

  elo_ratings[my_code] = new_ratinga
  elo_ratings[opp_code] = new_ratingb


  return new_ratinga, new_ratingb

In [109]:
matches['elo'], matches['opp_elo'] = 2000, 2000

**ADDING CODE FOR ELO TO BE CALCULATED PROPERLY FOR EACH COUNTRY**

In [110]:
matches['my_code'] = matches['home_team'].astype('category').cat.codes
matches['opp_code'] = matches['away_team'].astype('category').cat.codes
all_codes = pd.concat([matches['my_code'], matches['opp_code']]).unique()
elo_ratings = dict(zip(all_codes, [2000] * len(all_codes)))

In [112]:
matches[['elo', 'opp_elo']] = matches.apply(
    lambda row:calculate_elo(
        row['my_code'],
        row['opp_code'],
        elo_ratings[row['my_code']],
        elo_ratings[row['opp_code']],
        row['result'],
        #tournament_weights[row['tournament']],
    ),
    axis=1,
    result_type='expand'
)

**FUNC TO ALWAYS CHECK IF CHANGES DONE HAD ANY RESULT ON ACC OR PRECISION**

In [113]:
def check_predictions(matches, predictors):
  X_train, X_test, y_train, y_test = train_test_split(matches[predictors], matches['result'], test_size=0.2, random_state=42)
  rf.fit(X_train, y_train)
  preds = rf.predict(X_test)
  combined = pd.DataFrame(dict(actual=y_test, predicted=preds))
  print(combined.head(10))
  print(f'Accurasy score: {accuracy_score(y_test, preds)}')
  print(f'Precision score: {precision_score(y_test, preds)}')
  return X_train

In [114]:
grouped_matches = matches.groupby('home_team').get_group('Uzbekistan')

In [115]:
grouped_matches

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,opp_code,day_code,goal_diff,result,tournament_id,city_id,country_id,elo,opp_elo,my_code
18356,1992-06-28,Uzbekistan,Turkmenistan,2,1,Friendly,Tashkent,Uzbekistan,True,285,6,1,1,88,1829,252,2000.917070,1868.312652,301
18446,1992-08-23,Uzbekistan,Kyrgyzstan,3,0,Friendly,Tashkent,Uzbekistan,True,150,6,3,1,88,1829,252,2027.706573,1899.484082,301
18570,1992-10-14,Uzbekistan,Kazakhstan,1,1,Friendly,Tashkent,Uzbekistan,True,144,2,0,0,88,1829,252,1944.942798,1737.911097,301
19659,1994-04-11,Uzbekistan,Turkmenistan,4,0,Friendly,Tashkent,Uzbekistan,True,285,0,4,1,88,1829,252,1981.877127,1915.063344,301
19662,1994-04-13,Uzbekistan,Kazakhstan,1,0,Friendly,Tashkent,Uzbekistan,True,144,2,1,1,88,1829,252,2011.844899,1904.451965,301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47492,2024-09-05,Uzbekistan,North Korea,1,0,FIFA World Cup qualification,Tashkent,Uzbekistan,True,195,3,1,1,83,1829,252,1653.004530,1434.659499,301
47677,2024-10-10,Uzbekistan,Iran,0,0,FIFA World Cup qualification,Tashkent,Uzbekistan,True,131,3,0,0,83,1829,252,1570.012390,1360.641756,301
47814,2024-10-15,Uzbekistan,United Arab Emirates,1,0,FIFA World Cup qualification,Tashkent,Uzbekistan,True,291,1,1,1,83,1829,252,1582.123406,1313.599821,301
48019,2025-01-22,Uzbekistan,Jordan,0,0,Friendly,Doha,Qatar,False,142,2,0,0,88,511,189,1492.874173,1203.706621,301


In [116]:
train = matches[matches['date']>='2018-01-01']

In [117]:
fifa_ranking = pd.read_csv('ranking.csv')

In [118]:
fifa_ranking = fifa_ranking[fifa_ranking['rank_date']>='2018-01-01']

**PREPARING FIFA RANKING TABLE TO MERGE WITH MY ELO SYSTEM**

In [122]:
fifa_ranking = fifa_ranking.rename(columns={
    'country_full': 'home_team',
    'rank': 'home_fifa_rank',
    'rank_date': 'date'
})
matches['date'] = pd.to_datetime(matches['date'])
fifa_ranking['date'] = pd.to_datetime(fifa_ranking['date'])
predictors = ['my_code', 'opp_code', 'elo', 'opp_elo', 'day_code', 'neutral', 'tournament_id', 'city_id', 'country_id']
check_predictions(matches, predictors)

       actual  predicted
31986       1          1
147         1          1
31915       1          1
45490       0          0
11010       0          1
20949       1          1
4969        0          0
24499       0          0
43261       0          0
26422       0          0
Accurasy score: 0.6893797967226717
Precision score: 0.6856892010535558


Unnamed: 0,my_code,opp_code,elo,opp_elo,day_code,neutral,tournament_id,city_id,country_id
36055,158,8,1493.481519,1522.695785,1,True,88,1933,131
48020,139,282,1385.404204,1498.356222,4,True,88,1197,116
33197,89,39,1461.812322,1233.293292,2,True,88,1820,74
22349,21,55,1651.270327,1814.939236,3,False,88,1891,130
44570,159,266,1395.185816,1240.972398,1,True,83,1974,132
...,...,...,...,...,...,...,...,...,...
11284,136,78,1743.289568,1777.305527,2,True,88,803,113
44732,139,293,1342.978820,1322.546425,1,True,83,893,116
38158,229,179,1599.291297,1578.082628,6,True,167,1216,192
860,300,211,1768.810898,1876.066397,6,True,49,1200,251


In [123]:
rankings_merged = pd.merge_asof(
    matches.sort_values('date'),
    fifa_ranking.sort_values('date'),
    on='date',
    by='home_team',
    direction='backward')

In [124]:
rankings_merged

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,opp_code,...,country_id,elo,opp_elo,my_code,home_fifa_rank,country_abrv,total_points,previous_points,rank_change,confederation
0,1872-11-30,Scotland,England,0,0,Friendly,Glasgow,Scotland,True,87,...,204,1547.207898,1567.673943,247,,,,,,
1,1873-03-08,England,Scotland,4,2,Friendly,London,England,True,241,...,71,1740.643449,1664.151623,86,,,,,,
2,1874-03-07,Scotland,England,2,1,Friendly,Glasgow,Scotland,True,87,...,204,1600.149797,1614.732044,247,,,,,,
3,1875-03-06,England,Scotland,2,2,Friendly,London,England,True,241,...,71,1679.809865,1624.985207,86,,,,,,
4,1876-03-04,Scotland,England,3,0,Friendly,Glasgow,Scotland,True,87,...,204,1652.247120,1662.634722,247,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48202,2025-03-25,Switzerland,Luxembourg,3,1,Friendly,Sankt Gallen,Switzerland,True,161,...,227,1425.685963,1317.079295,272,19.0,SUI,1617.24,1616.41,0.0,UEFA
48203,2025-03-25,Bolivia,Uruguay,0,0,FIFA World Cup qualification,El Alto,Bolivia,True,295,...,27,1271.440431,1230.052377,33,84.0,BOL,1282.43,1283.88,-1.0,CONMEBOL
48204,2025-03-25,Colombia,Paraguay,2,2,FIFA World Cup qualification,Barranquilla,Colombia,True,211,...,49,1473.472626,1313.227624,60,12.0,COL,1669.44,1664.28,0.0,CONMEBOL
48205,2025-03-25,Honduras,Bermuda,2,0,Gold Cup qualification,Tegucigalpa,Honduras,True,32,...,102,1509.525601,1625.751708,125,78.0,HON,1313.05,1301.92,-4.0,CONCACAF


In [125]:
predictors.extend(['home_fifa_rank', 'previous_points'])
check_predictions(rankings_merged, predictors)

       actual  predicted
31986       0          0
147         1          1
31915       0          0
45490       1          1
11010       0          1
20949       1          0
4969        0          0
24499       0          0
43261       0          0
26422       1          1
Accurasy score: 0.6959137108483717
Precision score: 0.684972972972973


Unnamed: 0,my_code,opp_code,elo,opp_elo,day_code,neutral,tournament_id,city_id,country_id,home_fifa_rank,previous_points
36055,158,8,1493.481519,1522.695785,1,True,88,1933,131,,
48020,139,282,1385.404204,1498.356222,4,True,88,1197,116,53.0,1435.33
33197,256,60,1310.500367,1714.060968,2,True,88,1717,210,,
22349,21,55,1651.270327,1814.939236,3,False,88,1891,130,,
44570,185,119,1692.752729,1429.111620,1,True,83,23,155,33.0,1487.89
...,...,...,...,...,...,...,...,...,...,...,...
11284,136,78,1743.289568,1777.305527,2,True,88,803,113,,
44732,26,142,1331.565836,1467.255450,1,True,88,1172,18,95.0,1254.81
38158,89,87,1472.710450,1265.802870,6,True,167,1820,74,,
860,300,211,1768.810898,1876.066397,6,True,49,1200,251,,
