# Data load

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from matplotlib import pyplot as plt
pd.set_option('display.max_columns', None)
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

#hide warning message in jupyter notebook
import warnings
warnings.filterwarnings(action='ignore')

matches = pd.read_csv('dataset.csv') #모델 학습을 위한 dataset
league_matches = pd.read_csv('league_schedules.csv') #2022 월드컵 예측용 dataset(prediction 및 test용 데모 data)
matches = matches.iloc[:,1:] #필요 없는 index 생략
league_matches = league_matches.iloc[:,1:]

# data preprocessing(add rank difference feature to training dataset)
matches['rank_difference'] = matches['home_team_fifa_rank'] - matches['away_team_fifa_rank'] 
league_matches = league_matches.iloc[:,1:]
league_matches['rank_difference'] = league_matches['home_team_fifa_rank'] - league_matches['away_team_fifa_rank'] 


# Modeling

In [2]:
#define features
feature = ['home_team_fifa_rank', 'away_team_fifa_rank',\
                   'past_win_rate','past_draw_rate','past_lose_rate',\
                   'home_isAfrica', 'home_isAsia',\
                   'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica',\
                   'home_isOceania', 'away_isAfrica', 'away_isAsia', 'away_isEurope',\
                   'away_isNorthAmerica', 'away_isSouthAmerica', 'away_isOceania',\
                   ]


#Define input&output
X = matches.loc[:,feature]#input
y = matches['home_team_score']# Goal For

#Linear Regression for Goal For

#Split Train set and Test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Generate Model
linreg = LinearRegression()
features = PolynomialFeatures(degree=3)
model = Pipeline([
    ('polynomial_features', features),
    ('linear_regression', linreg)
])
model = model.fit(X_train, y_train)

print("GoalFor training set score : {:.2f}".format( model.score(X_train, y_train) ))
print("GoalFor set score: {:.2f}".format( model.score(X_test, y_test) ))



GoalFor training set score : 0.25
GoalFor set score: 0.23


# Evaluate model

In [3]:
GoalFor_predict = model.predict(X_test) #Testing data 예측


#initialize
matches['predict_GoalFor'] = 1
matches['predict_GoalAgainst'] = 0
matches['predict_Result'] = 0 
matches['predict_Accuracy'] = 0
acc_sum = 0

for i in range(len(X_test.index)):
    #Testset에 대한 예측득점 계산
    matches.loc[X_test.index[i],'predict_GoalFor'] = GoalFor_predict[i]

    #Testset에 대한 예측실점 계산
    if X_test.index[i] < 23921:
        value = matches.loc[X_test.index[i]+23921,feature]
        matches.loc[X_test.index[i],'predict_GoalAgainst'] = model.predict([value])
    else:
        value = matches.loc[X_test.index[i]-23921,feature]
        matches.loc[X_test.index[i],'predict_GoalAgainst'] = model.predict([value])
    
    #|예측 득점 - 예측 실점| < 0.1이면 'Draw', 예측 득점 > 예측 실점이면 'Win',
    #예측 득점 < 예측 실점이면 'Lose'
    if abs(matches.loc[X_test.index[i]]['predict_GoalFor'] - matches.loc[X_test.index[i]]['predict_GoalAgainst']) < 0.1:
        matches.loc[X_test.index[i],'predict_Result'] = 'Draw'
    elif matches.loc[X_test.index[i]]['predict_GoalFor'] > matches.loc[X_test.index[i]]['predict_GoalAgainst']:
        matches.loc[X_test.index[i],'predict_Result'] = 'Win'
    elif matches.loc[X_test.index[i]]['predict_GoalFor'] < matches.loc[X_test.index[i]]['predict_GoalAgainst']:
        matches.loc[X_test.index[i],'predict_Result'] = 'Lose'
    
    #예측 결과가 실제 결과와 일치하면 1, 그렇지 않으면 0
    if matches.loc[X_test.index[i]]['home_team_result'] == matches.loc[X_test.index[i]]['predict_Result']:
        matches.loc[X_test.index[i],'predict_Accuracy'] = 1
    
    acc_sum = acc_sum + matches.loc[X_test.index[i]]['predict_Accuracy']

#모델 performance 계산
accuracy = acc_sum/len(X_test.index)

    
print('-------------------------------------------------------')
print("Used feature: ",X.columns)
print("Prediction Accuracy :",accuracy)


-------------------------------------------------------
Used feature:  Index(['home_team_fifa_rank', 'away_team_fifa_rank', 'past_win_rate',
       'past_draw_rate', 'past_lose_rate', 'home_isAfrica', 'home_isAsia',
       'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica',
       'home_isOceania', 'away_isAfrica', 'away_isAsia', 'away_isEurope',
       'away_isNorthAmerica', 'away_isSouthAmerica', 'away_isOceania'],
      dtype='object')
Prediction Accuracy : 0.5576340265440485


위에서 학습시킨 model을 이용하여 2022 카타르 월드컵 우승국을 예측한다.
예측용 data set은 첫번째 cell에서 불러온 'league_schedules.csv' 파일을 이용한다.
예측 과정은 실제 월드컵 일정을 토대로 한다.
조별리그를 통해 토너먼트에 진출할 팀을 결정한 후, 16강, 8강, 4강, 결승을 차례대로 예측한다.
아래부터는 2022 카타르 월드컵 우승국 예측을 위한 코드 및 결과이다.

# League matchup

Training and predicting results

In [4]:
X = league_matches.loc[:,['home_team_fifa_rank', 'away_team_fifa_rank',\
                          'past_win_rate', 'past_draw_rate', 'past_lose_rate',\
                          'home_isAfrica', 'home_isAsia', 'home_isEurope',\
                          'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',\
                          'away_isAfrica', 'away_isAsia', 'away_isEurope',\
                          'away_isNorthAmerica', 'away_isSouthAmerica', 'away_isOceania']]#input
GoalFor_predict = model.predict(X)
league_matches['predict_GoalFor'] = 0
league_matches['predict_GoalAgainst'] = 0
for i in range(len(X.index)):
  #득점 예측
  league_matches.loc[X.index[i],'predict_GoalFor'] = GoalFor_predict[i]
  # print(league_matches['away_team_fifa_rank'][i])
  #실점 예측
  league_matches.loc[X.index[i],'predict_GoalAgainst'] = model.predict([[league_matches['away_team_fifa_rank'][i], #모델의 feature을 결정할 때 득점의 home, away를 뒤바꿔서 대입해야함
                                                                       league_matches['home_team_fifa_rank'][i], 
                                                                       league_matches['past_lose_rate'][i],
                                                                       league_matches['past_draw_rate'][i],
                                                                       league_matches['past_win_rate'][i],
                                                                       league_matches['away_isAfrica'][i],
                                                                       league_matches['away_isAsia'][i],
                                                                       league_matches['away_isEurope'][i],
                                                                       league_matches['away_isNorthAmerica'][i],
                                                                       league_matches['away_isSouthAmerica'][i],
                                                                       league_matches['away_isOceania'][i],
                                                                       league_matches['home_isAfrica'][i],
                                                                       league_matches['home_isAsia'][i],
                                                                       league_matches['home_isEurope'][i],
                                                                       league_matches['home_isNorthAmerica'][i],
                                                                       league_matches['home_isSouthAmerica'][i],
                                                                       league_matches['home_isOceania'][i]]]
                                                                       )  

league_matches['predict_Result'] = 0 # initialize

#Predict Win/Draw/Lose by comparing predicted values
for i in league_matches.index:
    if abs(league_matches.predict_GoalFor[i] - league_matches.predict_GoalAgainst[i]) < 0.1: #draw threshold를 0.1로 설정한 모습
        league_matches.predict_Result[i] = 'Draw'
    elif league_matches.predict_GoalFor[i] > league_matches.predict_GoalAgainst[i]:
        league_matches.predict_Result[i] = 'Win'
    elif league_matches.predict_GoalFor[i] < league_matches.predict_GoalAgainst[i]:
        league_matches.predict_Result[i] = 'Lose'

league_matches

Unnamed: 0,group,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,phase,match,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania,rank_difference,predict_GoalFor,predict_GoalAgainst,predict_Result
0,A,Qatar,Ecuador,50,44,,,Asia,South America,1,1,1,0.33,0.33,0.33,group matches,1,0,1,0,0,0,0,0,0,0,0,1,0,6,0.920486,1.227647,Lose
1,A,Senegal,Netherlands,18,8,,,Africa,Europe,0,0,0,0.33,0.33,0.33,group matches,2,1,0,0,0,0,0,0,0,1,0,0,0,10,1.026912,1.549215,Lose
2,B,England,IR Iran,5,20,,,Europe,Asia,0,0,0,0.33,0.33,0.33,group matches,3,0,0,1,0,0,0,0,1,0,0,0,0,-15,1.850746,0.597038,Win
3,B,USA,Wales,16,19,,,North America,Europe,1,1,0,0.5,0.5,0.0,group matches,4,0,0,0,1,0,0,0,0,1,0,0,0,-3,1.255175,1.565378,Lose
4,D,France,Australia,4,38,,,Europe,Oceania,3,1,1,0.6,0.2,0.2,group matches,5,0,0,1,0,0,0,0,0,0,0,0,1,-34,1.802783,0.783622,Win
5,D,Denmark,Tunisia,10,30,,,Europe,Africa,1,0,0,1.0,0.0,0.0,group matches,6,0,0,1,0,0,0,1,0,0,0,0,0,-20,1.724562,0.960279,Win
6,C,Mexico,Poland,13,26,,,North America,Europe,1,3,0,0.25,0.75,0.0,group matches,7,0,0,0,1,0,0,0,0,1,0,0,0,-13,1.366895,1.518143,Lose
7,C,Argentina,Saudi Arabia,3,51,,,South America,Asia,0,1,0,0.0,1.0,0.0,group matches,8,0,0,0,0,1,0,0,1,0,0,0,0,-48,2.216448,0.443904,Win
8,F,Belgium,Canada,2,41,,,Europe,North America,0,0,0,0.33,0.33,0.33,group matches,9,0,0,1,0,0,0,0,0,0,1,0,0,-39,1.973508,0.538151,Win
9,E,Spain,Costa Rica,7,31,,,Europe,North America,2,1,0,0.67,0.33,0.0,group matches,10,0,0,1,0,0,0,0,0,0,1,0,0,-24,1.815844,0.694116,Win


League matchup predict & print each group results

In [5]:
group_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
group_result1 = dict()
group_result2 = dict()
for selected_group in group_list:
  result = league_matches[(league_matches['group'] == selected_group)] #조별로 데이터프레임을 새로 생성
  group_tmp = list(set(result.home_team) | set(result.away_team)) #조별 국가 리스트를 group_tmp에 저장
  group_winpoint = dict() #승점 계산을 위한 빈 dictionary 생성
  point_diff = dict() #득실차 계산을 위한 빈 dictionary 생성
  #initialize
  for groups in group_tmp:
    group_winpoint[groups] = 0
    point_diff[groups] = 0
  #예측된 경기 결과에 따라 승점, 득실차 계산
  for groups in group_tmp:
    for i in result.index:
      if (result.home_team[i] == groups and result.predict_Result[i] == 'Win'): #승리 시 승점 + 3 (home_team 기준)
        group_winpoint[groups] += 3
        point_diff[groups] += (result.predict_GoalFor[i] - result.predict_GoalAgainst[i]) #득실 차 계산
      elif (result.away_team[i] == groups and result.predict_Result[i] == 'Lose'): 
        group_winpoint[groups] += 3
        point_diff[groups] += (result.predict_GoalAgainst[i] - result.predict_GoalFor[i])
      elif (result.home_team[i] == groups and result.predict_Result[i] == 'Lose'): #패배 시 승점 그대로 (home_team 기준)
        point_diff[groups] += (result.predict_GoalFor[i] - result.predict_GoalAgainst[i])
      elif (result.away_team[i] == groups and result.predict_Result[i] == 'Win'):
        point_diff[groups] += (result.predict_GoalAgainst[i] - result.predict_GoalFor[i])
      elif(result.home_team[i] == groups and result.predict_Result[i] == 'Draw') or (result.away_team[i] == groups and result.predict_Result[i] == 'Draw'): #무승부 시 승점 + 1
        group_winpoint[groups] += 1
      point_diff[groups] = round(point_diff[groups], 3) #득실 차 소수점 셋 째 자리까지 반올림
  group_result1[selected_group] = group_winpoint
  group_result2[selected_group] = point_diff
  #결과 출력
  print('<', selected_group, 'group match predicted results >')
  print()
  for i in result.index:
    print(result.home_team[i], 'vs', result.away_team[i], result.predict_GoalFor[i], ':', result.predict_GoalAgainst[i])
    if result.predict_Result[i] == 'Win':
      print(result.home_team[i], 'wins')
    elif result.predict_Result[i] == 'Lose':
      print(result.away_team[i], 'wins')
    else:
      print('draw')
    print()
  print(group_winpoint)
  print(point_diff)
  print()

print(group_result1)
print(group_result2)


< A group match predicted results >

Qatar vs Ecuador 0.9204860216445923 : 1.2276472782584733
Ecuador wins

Senegal vs Netherlands 1.0269119560887248 : 1.5492152110475672
Netherlands wins

Qatar vs Senegal 0.9018237516993395 : 1.3089412477318092
Senegal wins

Netherlands vs Ecuador 1.8559110514373742 : 0.702215262102527
Netherlands wins

Ecuador vs Senegal 1.2530991905280189 : 1.8283558492250904
Senegal wins

Netherlands vs Qatar 2.0055368595465097 : 0.5934095873244587
Netherlands wins

{'Netherlands': 9, 'Qatar': 0, 'Ecuador': 3, 'Senegal': 6}
{'Netherlands': 3.088, 'Qatar': -2.126, 'Ecuador': -1.422, 'Senegal': 0.46}

< B group match predicted results >

England vs IR Iran 1.8507464832248388 : 0.5970380253613712
England wins

USA vs Wales 1.2551749957435732 : 1.5653776091050986
Wales wins

Wales vs IR Iran 1.573877450405007 : 0.8385312722036815
Wales wins

England vs USA 1.806798146848223 : 0.8255269373082683
England wins

Wales vs England 0.9127329372361146 : 1.5889357504813688
Engl

Find group best team algorithm

In [6]:
group_first = dict()
max_winpoint = dict()
max_pointdiff = dict()
for selected_group in group_list:
  result = league_matches[(league_matches['group'] == selected_group)]
  group_tmp = list(set(result.home_team) | set(result.away_team))
  max_winpoint[selected_group] = 0
  max_pointdiff[selected_group] = 0
  #승점 먼저 비교 -> 같다면 득실 차 비교
  for groups in group_tmp:
    if (group_result1[selected_group][groups] > max_winpoint[selected_group]):
      max_winpoint[selected_group] = group_result1[selected_group][groups]
      max_pointdiff[selected_group] = group_result2[selected_group][groups]
      group_first[selected_group] = groups
    elif (group_result1[selected_group][groups] == max_winpoint[selected_group]):
      if group_result2[selected_group][groups] > max_pointdiff[selected_group]:
        max_pointdiff[selected_group] = group_result2[selected_group][groups]
        group_first[selected_group] = groups
first_teams = list(group_first.values())

Getting rid of best team from the list

In [7]:
import copy
#조 2위를 위 블록과 같은 방식으로 찾기 위해 이미 찾은 조 1위를 새로 복사된 조에서 제외
group_result1_2 = copy.deepcopy(group_result1)
group_result2_2 = copy.deepcopy(group_result2)
for selected_group in group_list:
  for firsts in first_teams:
    if firsts in group_result1[selected_group].keys():
      group_result1_2[selected_group].pop(firsts)
for selected_group in group_list:
  for firsts in first_teams:
    if firsts in group_result2[selected_group].keys():
      group_result2_2[selected_group].pop(firsts)

Find group best(2nd) team

In [8]:
#1위를 찾은 방식과 같은 방식으로 2위 탐색
group_second = dict()
max_winpoint = dict()
max_pointdiff = dict()
for selected_group in group_list:
  result = league_matches[(league_matches['group'] == selected_group)]
  group_tmp = list(set(result.home_team) | set(result.away_team))
  max_winpoint[selected_group] = 0
  max_pointdiff[selected_group] = 0
  group_tmp = list(set(group_tmp) - set(first_teams))
  for groups in group_tmp:
    if (group_result1_2[selected_group][groups] > max_winpoint[selected_group]):
      max_winpoint[selected_group] = group_result1_2[selected_group][groups]
      max_pointdiff[selected_group] = group_result2_2[selected_group][groups]
      group_second[selected_group] = groups
    elif (group_result1_2[selected_group][groups] == max_winpoint[selected_group]):
      if group_result2_2[selected_group][groups] > max_pointdiff[selected_group]:
        max_pointdiff[selected_group] = group_result2_2[selected_group][groups]
        group_second[selected_group] = groups
second_teams = list(group_second.values())

#조 1, 2위 출력 (A ~ G 순서)
print('1st rank in group: ', first_teams)
print('2nd rank in group: ', second_teams)

1st rank in group:  ['Netherlands', 'England', 'Argentina', 'France', 'Spain', 'Belgium', 'Brazil', 'Uruguay']
2nd rank in group:  ['Senegal', 'Wales', 'Poland', 'Denmark', 'Germany', 'Croatia', 'Switzerland', 'Portugal']


# Round 16 matchup

Empty dataframe with necessary columns

In [9]:
#16강의 결과를 예측하기 위해 기존의 학습모델의 틀에 맞춘 빈 dataframe 생성
round16 = pd.DataFrame(columns=['home_team', 'away_team', 'home_team_fifa_rank',
       'away_team_fifa_rank', 'home_team_score', 'away_team_score',
       'home_team_continent', 'away_team_continent', 'past_win', 'past_draw',
       'past_lose', 'past_win_rate', 'past_draw_rate', 'past_lose_rate',
       'home_isAfrica', 'home_isAsia', 'home_isEurope',
       'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',
       'away_isAfrica', 'away_isAsia', 'away_isEurope', 'away_isNorthAmerica',
       'away_isSouthAmerica', 'away_isOceania'])


Data preprocessing

In [10]:
match_list16 = [[0, 1], [2, 3], [1, 0], [3, 2], [4, 5], [6, 7], [5, 4], [7, 6]] #리그 성적에 따른 월드컵 16강 진행 규칙에 따라 매치업 결정
for index in range(len(match_list16)):
  home_team = first_teams[match_list16[index][0]]
  away_team = second_teams[match_list16[index][1]]

  #16강 매치업에 맞춰서 data preprocessing
  round16.loc[index, :2] = [home_team, away_team] 
  round16.loc[index, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']] = league_matches[(league_matches['home_team'] == home_team)].loc[:, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']].iloc[0]
  round16.loc[index, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']] = league_matches[(league_matches['away_team'] == away_team)].loc[:, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']].iloc[0]

  #역대 전적을 가져오기 위해 matches dataset에서 조건절 사용
  match = matches[((matches['home_team'] == home_team) & (matches['away_team'] == away_team)) | ((matches['home_team'] == away_team) & (matches['away_team'] == home_team))].iloc[-1:]
  resultcolumns = [past_win, past_draw, past_lose, past_win_rate, past_draw_rate, past_lose_rate] = [0, 0, 0, 0.33, 0.33, 0.33] #역대 전적이 0승 0무 0패인 경우 승, 무, 패 확률을 0.33으로 같게 설정
  if list(match.index) == []:
    round16.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                        'past_draw_rate', 'past_lose_rate']] = resultcolumns
  else:
    if home_team == match.iloc[0]['home_team']:
      #역대 전적 업데이트(과거 전적이기 떄문에 matches 내 마지막 매치업들은 적용이 안된 상태이기 때문)
      resultcolumns = match.loc[:, ['past_win', 'past_draw', 'past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']].iloc[0]
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1

      #역대 전적을 총합 1로 normalize (e.g. 4승 1무 0패 -> 0.8 : 0.2 : 0)
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round16.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns
    else:
      #역대 전적 업데이트(과거 전적이기 떄문에 matches 내 마지막 매치업들은 적용이 안된 상태이기 때문) -> 거꾸로 되어 있을 경우 뒤집기
      resultcolumns = list(match.loc[:, ['past_lose', 'past_draw', 'past_win', 'past_lose_rate', 
                          'past_draw_rate', 'past_win_rate']].iloc[0])
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round16.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns

round16

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania
0,Netherlands,Wales,8,19,,,Europe,Europe,7.0,0.0,0.0,1.0,0.0,0.0,0,0,1,0,0,0,0,0,1,0,0,0
1,Argentina,Denmark,3,10,,,South America,Europe,1.0,0.0,0.0,1.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0,0,0
2,England,Senegal,5,18,,,Europe,Africa,0.0,0.0,0.0,0.33,0.33,0.33,0,0,1,0,0,0,1,0,0,0,0,0
3,France,Poland,4,26,,,Europe,Europe,2.0,3.0,0.0,0.4,0.6,0.0,0,0,1,0,0,0,0,0,1,0,0,0
4,Spain,Croatia,7,12,,,Europe,Europe,5.0,1.0,3.0,0.56,0.11,0.33,0,0,1,0,0,0,0,0,1,0,0,0
5,Brazil,Portugal,1,9,,,South America,Europe,2.0,2.0,2.0,0.33,0.33,0.33,0,0,0,0,1,0,0,0,1,0,0,0
6,Belgium,Germany,2,11,,,Europe,Europe,1.0,0.0,5.0,0.17,0.0,0.83,0,0,1,0,0,0,0,0,1,0,0,0
7,Uruguay,Switzerland,14,15,,,South America,Europe,1.0,0.0,0.0,1.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0,0,0


Predicting results of round 16

In [11]:
X = round16.loc[:,['home_team_fifa_rank', 'away_team_fifa_rank',\
                          'past_win_rate', 'past_draw_rate', 'past_lose_rate',\
                          'home_isAfrica', 'home_isAsia', 'home_isEurope',\
                          'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',\
                          'away_isAfrica', 'away_isAsia', 'away_isEurope',\
                          'away_isNorthAmerica', 'away_isSouthAmerica', 'away_isOceania']]#input
GoalFor_predict = model.predict(X)
round16['predict_GoalFor'] = 0
round16['predict_GoalAgainst'] = 0
for i in range(len(X.index)):
  #16강 득점 예측
  round16.loc[X.index[i],'predict_GoalFor'] = GoalFor_predict[i]
  # print(league_matches['away_team_fifa_rank'][i])
  #16강 실점 예측
  round16.loc[X.index[i],'predict_GoalAgainst'] = model.predict([[round16['away_team_fifa_rank'][i], 
                                                                       round16['home_team_fifa_rank'][i], 
                                                                       round16['past_lose_rate'][i],
                                                                       round16['past_draw_rate'][i],
                                                                       round16['past_win_rate'][i],
                                                                       round16['away_isAfrica'][i],
                                                                       round16['away_isAsia'][i],
                                                                       round16['away_isEurope'][i],
                                                                       round16['away_isNorthAmerica'][i],
                                                                       round16['away_isSouthAmerica'][i],
                                                                       round16['away_isOceania'][i],
                                                                       round16['home_isAfrica'][i],
                                                                       round16['home_isAsia'][i],
                                                                       round16['home_isEurope'][i],
                                                                       round16['home_isNorthAmerica'][i],
                                                                       round16['home_isSouthAmerica'][i],
                                                                       round16['home_isOceania'][i]]]
                                                                       )  


round16['predict_Result'] = 0 # initialize

#Predict Win/Lose by comparing predicted values
#16강부터는 무승부가 존재하지 않기 때문에 win과 lose만 결정
for i in round16.index:
    if round16.predict_GoalFor[i] >= round16.predict_GoalAgainst[i]:
        round16.predict_Result[i] = 'Win'
    else:
        round16.predict_Result[i] = 'Lose'

round16

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania,predict_GoalFor,predict_GoalAgainst,predict_Result
0,Netherlands,Wales,8,19,,,Europe,Europe,7.0,0.0,0.0,1.0,0.0,0.0,0,0,1,0,0,0,0,0,1,0,0,0,1.526607,0.965341,Win
1,Argentina,Denmark,3,10,,,South America,Europe,1.0,0.0,0.0,1.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0,0,0,1.654929,0.926666,Win
2,England,Senegal,5,18,,,Europe,Africa,0.0,0.0,0.0,0.33,0.33,0.33,0,0,1,0,0,0,1,0,0,0,0,0,1.622096,0.975477,Win
3,France,Poland,4,26,,,Europe,Europe,2.0,3.0,0.0,0.4,0.6,0.0,0,0,1,0,0,0,0,0,1,0,0,0,1.602924,0.850349,Win
4,Spain,Croatia,7,12,,,Europe,Europe,5.0,1.0,3.0,0.56,0.11,0.33,0,0,1,0,0,0,0,0,1,0,0,0,1.438457,1.191179,Win
5,Brazil,Portugal,1,9,,,South America,Europe,2.0,2.0,2.0,0.33,0.33,0.33,0,0,0,0,1,0,0,0,1,0,0,0,1.417857,1.054711,Win
6,Belgium,Germany,2,11,,,Europe,Europe,1.0,0.0,5.0,0.17,0.0,0.83,0,0,1,0,0,0,0,0,1,0,0,0,1.341857,1.201568,Win
7,Uruguay,Switzerland,14,15,,,South America,Europe,1.0,0.0,0.0,1.0,0.0,0.0,0,0,0,0,1,0,0,0,1,0,0,0,1.438219,1.014417,Win


Printing results

In [12]:
win_teams16 = list()
#결과 출력(승리팀 wins 형태)
for i in round16.index:
  if round16.predict_Result[i] == 'Win':
    win_teams16.append(round16.home_team[i])
  else:
    win_teams16.append(round16.away_team[i])
  print(round16.home_team[i], 'vs', round16.away_team[i], round16.predict_GoalFor[i], ':', round16.predict_GoalAgainst[i])
  if round16.predict_GoalFor[i] > round16.predict_GoalAgainst[i]:
    print(round16.home_team[i], ' wins')
  else:
    print(round16.away_team[i], ' wins')
  print()

Netherlands vs Wales 1.5266069407455234 : 0.9653406974177869
Netherlands  wins

Argentina vs Denmark 1.6549285046021396 : 0.9266659713014178
Argentina  wins

England vs Senegal 1.6220964893593646 : 0.9754772249955295
England  wins

France vs Poland 1.602924397754009 : 0.8503492508800718
France  wins

Spain vs Croatia 1.4384567214149229 : 1.1911785499489724
Spain  wins

Brazil vs Portugal 1.4178570845787135 : 1.0547111220364513
Brazil  wins

Belgium vs Germany 1.3418572750281328 : 1.201567732241756
Belgium  wins

Uruguay vs Switzerland 1.4382194460226856 : 1.0144171297799858
Uruguay  wins



In [13]:
#8강 진출국가 출력
win_teams16

['Netherlands',
 'Argentina',
 'England',
 'France',
 'Spain',
 'Brazil',
 'Belgium',
 'Uruguay']

# Quarter-Finals matchup

Empty dataframe with necessary columns

In [14]:
#8강의 결과를 예측하기 위해 기존의 학습모델의 틀에 맞춘 빈 dataframe 생성
round8 = pd.DataFrame(columns=['home_team', 'away_team', 'home_team_fifa_rank',
       'away_team_fifa_rank', 'home_team_score', 'away_team_score',
       'home_team_continent', 'away_team_continent', 'past_win', 'past_draw',
       'past_lose', 'past_win_rate', 'past_draw_rate', 'past_lose_rate',
       'home_isAfrica', 'home_isAsia', 'home_isEurope',
       'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',
       'away_isAfrica', 'away_isAsia', 'away_isEurope', 'away_isNorthAmerica',
       'away_isSouthAmerica', 'away_isOceania'])
round8

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania


Data preprocessing

In [15]:
match_list8 = [[0, 1], [4, 5], [2, 3], [6, 7]] #16강 경기 매치업 결과로 월드컵 8강 규칙을 따라 8강 매치업 결정

#동일하게 data preprocessing 진행
for index in range(len(match_list8)):
  home_team = win_teams16[match_list8[index][0]]
  away_team = win_teams16[match_list8[index][1]]
  round8.loc[index, :2] = [home_team, away_team]
  round8.loc[index, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']] = league_matches[(league_matches['home_team'] == home_team)].loc[:, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']].iloc[0]
  round8.loc[index, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']] = league_matches[(league_matches['away_team'] == away_team)].loc[:, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']].iloc[0]
  match = matches[((matches['home_team'] == home_team) & (matches['away_team'] == away_team)) | ((matches['home_team'] == away_team) & (matches['away_team'] == home_team))].iloc[-1:]
  resultcolumns = [past_win, past_draw, past_lose, past_win_rate, past_draw_rate, past_lose_rate] = [0, 0, 0, 0.33, 0.33, 0.33]
  if list(match.index) == []:
    round8.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                        'past_draw_rate', 'past_lose_rate']] = resultcolumns
  else:
    if home_team == match.iloc[0]['home_team']:
      resultcolumns = match.loc[:, ['past_win', 'past_draw', 'past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']].iloc[0]
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round8.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns
    else:
      resultcolumns = list(match.loc[:, ['past_lose', 'past_draw', 'past_win', 'past_lose_rate', 
                          'past_draw_rate', 'past_win_rate']].iloc[0])
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round8.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns

round8

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania
0,Netherlands,Argentina,8,3,,,Europe,South America,3.0,2.0,0.0,0.6,0.4,0.0,0,0,1,0,0,0,0,0,0,0,1,0
1,Spain,Brazil,7,1,,,Europe,South America,1.0,1.0,0.0,0.5,0.5,0.0,0,0,1,0,0,0,0,0,0,0,1,0
2,England,France,5,4,,,Europe,Europe,3.0,2.0,4.0,0.33,0.22,0.44,0,0,1,0,0,0,0,0,1,0,0,0
3,Belgium,Uruguay,2,14,,,Europe,South America,0.0,0.0,0.0,0.33,0.33,0.33,0,0,1,0,0,0,0,0,0,0,1,0


Predicting results of Quarter-Finals(round 8)

In [16]:
#동일하게 결과 예측
X = round8.loc[:,['home_team_fifa_rank', 'away_team_fifa_rank',\
                          'past_win_rate', 'past_draw_rate', 'past_lose_rate',\
                          'home_isAfrica', 'home_isAsia', 'home_isEurope',\
                          'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',\
                          'away_isAfrica', 'away_isAsia', 'away_isEurope',\
                          'away_isNorthAmerica', 'away_isSouthAmerica', 'away_isOceania']]#input
GoalFor_predict = model.predict(X)
round8['predict_GoalFor'] = 0
round8['predict_GoalAgainst'] = 0
for i in range(len(X.index)):
  round8.loc[X.index[i],'predict_GoalFor'] = GoalFor_predict[i]
  # print(league_matches['away_team_fifa_rank'][i])
  round8.loc[X.index[i],'predict_GoalAgainst'] = model.predict([[round8['away_team_fifa_rank'][i], 
                                                                       round8['home_team_fifa_rank'][i], 
                                                                       round8['past_lose_rate'][i],
                                                                       round8['past_draw_rate'][i],
                                                                       round8['past_win_rate'][i],
                                                                       round8['away_isAfrica'][i],
                                                                       round8['away_isAsia'][i],
                                                                       round8['away_isEurope'][i],
                                                                       round8['away_isNorthAmerica'][i],
                                                                       round8['away_isSouthAmerica'][i],
                                                                       round8['away_isOceania'][i],
                                                                       round8['home_isAfrica'][i],
                                                                       round8['home_isAsia'][i],
                                                                       round8['home_isEurope'][i],
                                                                       round8['home_isNorthAmerica'][i],
                                                                       round8['home_isSouthAmerica'][i],
                                                                       round8['home_isOceania'][i]]]
                                                                       )  


round8['predict_Result'] = 0 # initialize

#Predict Win/Draw/Lose by comparing predicted values
for i in round8.index:
    if round8.predict_GoalFor[i] >= round8.predict_GoalAgainst[i]:
        round8.predict_Result[i] = 'Win'
    else:
        round8.predict_Result[i] = 'Lose'

round8

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania,predict_GoalFor,predict_GoalAgainst,predict_Result
0,Netherlands,Argentina,8,3,,,Europe,South America,3.0,2.0,0.0,0.6,0.4,0.0,0,0,1,0,0,0,0,0,0,0,1,0,1.265433,1.165059,Win
1,Spain,Brazil,7,1,,,Europe,South America,1.0,1.0,0.0,0.5,0.5,0.0,0,0,1,0,0,0,0,0,0,0,1,0,1.169306,1.115804,Win
2,England,France,5,4,,,Europe,Europe,3.0,2.0,4.0,0.33,0.22,0.44,0,0,1,0,0,0,0,0,1,0,0,0,1.186431,1.434803,Lose
3,Belgium,Uruguay,2,14,,,Europe,South America,0.0,0.0,0.0,0.33,0.33,0.33,0,0,1,0,0,0,0,0,0,0,1,0,1.362667,0.949564,Win


Printing results

In [17]:
#동일하게 출력
win_teams8 = list()
for i in round8.index:
  if round8.predict_Result[i] == 'Win':
    win_teams8.append(round8.home_team[i])
  else:
    win_teams8.append(round8.away_team[i])
  print(round8.home_team[i], 'vs', round8.away_team[i], round8.predict_GoalFor[i], ':', round8.predict_GoalAgainst[i])
  if round8.predict_GoalFor[i] > round8.predict_GoalAgainst[i]:
    print(round8.home_team[i], ' wins')
  else:
    print(round8.away_team[i], ' wins')
  print()

Netherlands vs Argentina 1.2654332150401615 : 1.1650590504623324
Netherlands  wins

Spain vs Brazil 1.1693056512070825 : 1.1158044965077352
Spain  wins

England vs France 1.1864314400852436 : 1.4348030780299723
France  wins

Belgium vs Uruguay 1.36266666205006 : 0.9495636220904089
Belgium  wins



In [18]:
#4강 진출 국가 출력
win_teams8

['Netherlands', 'Spain', 'France', 'Belgium']

# Semi-Finals matchup

Empty dataframe with necessary columns

In [19]:
#4강의 결과를 예측하기 위해 기존의 학습모델의 틀에 맞춘 빈 dataframe 생성
round4 = pd.DataFrame(columns=['home_team', 'away_team', 'home_team_fifa_rank',
       'away_team_fifa_rank', 'home_team_score', 'away_team_score',
       'home_team_continent', 'away_team_continent', 'past_win', 'past_draw',
       'past_lose', 'past_win_rate', 'past_draw_rate', 'past_lose_rate',
       'home_isAfrica', 'home_isAsia', 'home_isEurope',
       'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',
       'away_isAfrica', 'away_isAsia', 'away_isEurope', 'away_isNorthAmerica',
       'away_isSouthAmerica', 'away_isOceania'])

Data Preprocessing

In [20]:
match_list4 = [[0, 1], [2, 3]] #8강 경기 매치업 결과로 월드컵 4강 규칙을 따라 4강 매치업 결정

#동일하게 data preprocessing
for index in range(len(match_list4)):
  home_team = win_teams8[match_list4[index][0]]
  away_team = win_teams8[match_list4[index][1]]
  round4.loc[index, :2] = [home_team, away_team]
  round4.loc[index, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']] = league_matches[(league_matches['home_team'] == home_team)].loc[:, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']].iloc[0]
  round4.loc[index, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']] = league_matches[(league_matches['away_team'] == away_team)].loc[:, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']].iloc[0]
  match = matches[((matches['home_team'] == home_team) & (matches['away_team'] == away_team)) | ((matches['home_team'] == away_team) & (matches['away_team'] == home_team))].iloc[-1:]
  resultcolumns = [past_win, past_draw, past_lose, past_win_rate, past_draw_rate, past_lose_rate] = [0, 0, 0, 0.33, 0.33, 0.33]
  if list(match.index) == []:
    round4.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                        'past_draw_rate', 'past_lose_rate']] = resultcolumns
  else:
    if home_team == match.iloc[0]['home_team']:
      resultcolumns = match.loc[:, ['past_win', 'past_draw', 'past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']].iloc[0]
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round4.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns
    else:
      resultcolumns = list(match.loc[:, ['past_lose', 'past_draw', 'past_win', 'past_lose_rate', 
                          'past_draw_rate', 'past_win_rate']].iloc[0])
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round4.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns

round4

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania
0,Netherlands,Spain,8,7,,,Europe,Europe,5.0,0.0,1.0,0.83,0.0,0.17,0,0,1,0,0,0,0,0,1,0,0,0
1,France,Belgium,4,2,,,Europe,Europe,5.0,2.0,2.0,0.56,0.22,0.22,0,0,1,0,0,0,0,0,1,0,0,0


Predicting results of Semi_Finals(round4)

In [21]:
#동일하게 결과 예측
X = round4.loc[:,['home_team_fifa_rank', 'away_team_fifa_rank',\
                          'past_win_rate', 'past_draw_rate', 'past_lose_rate',\
                          'home_isAfrica', 'home_isAsia', 'home_isEurope',\
                          'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',\
                          'away_isAfrica', 'away_isAsia', 'away_isEurope',\
                          'away_isNorthAmerica', 'away_isSouthAmerica', 'away_isOceania']]#input
GoalFor_predict = model.predict(X)
round4['predict_GoalFor'] = 0
round4['predict_GoalAgainst'] = 0
for i in range(len(X.index)):
  round4.loc[X.index[i],'predict_GoalFor'] = GoalFor_predict[i]
  # print(league_matches['away_team_fifa_rank'][i])
  round4.loc[X.index[i],'predict_GoalAgainst'] = model.predict([[round4['away_team_fifa_rank'][i], 
                                                                       round4['home_team_fifa_rank'][i], 
                                                                       round4['past_lose_rate'][i],
                                                                       round4['past_draw_rate'][i],
                                                                       round4['past_win_rate'][i],
                                                                       round4['away_isAfrica'][i],
                                                                       round4['away_isAsia'][i],
                                                                       round4['away_isEurope'][i],
                                                                       round4['away_isNorthAmerica'][i],
                                                                       round4['away_isSouthAmerica'][i],
                                                                       round4['away_isOceania'][i],
                                                                       round4['home_isAfrica'][i],
                                                                       round4['home_isAsia'][i],
                                                                       round4['home_isEurope'][i],
                                                                       round4['home_isNorthAmerica'][i],
                                                                       round4['home_isSouthAmerica'][i],
                                                                       round4['home_isOceania'][i]]]
                                                                       )  


round4['predict_Result'] = 0 # initialize

#Predict Win/Draw/Lose by comparing predicted values
for i in round4.index:
    if round4.predict_GoalFor[i] >= round4.predict_GoalAgainst[i]:
        round4.predict_Result[i] = 'Win'
    else:
        round4.predict_Result[i] = 'Lose'

round4

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania,predict_GoalFor,predict_GoalAgainst,predict_Result
0,Netherlands,Spain,8,7,,,Europe,Europe,5.0,0.0,1.0,0.83,0.0,0.17,0,0,1,0,0,0,0,0,1,0,0,0,1.350277,1.210637,Win
1,France,Belgium,4,2,,,Europe,Europe,5.0,2.0,2.0,0.56,0.22,0.22,0,0,1,0,0,0,0,0,1,0,0,0,1.33892,1.253242,Win


Printing results

In [22]:
#동일하게 결과 출력
win_teams4 = list()
for i in round4.index:
  if round4.predict_Result[i] == 'Win':
    win_teams4.append(round4.home_team[i])
  else:
    win_teams4.append(round4.away_team[i])
  print(round4.home_team[i], 'vs', round4.away_team[i], round4.predict_GoalFor[i], ':', round4.predict_GoalAgainst[i])
  if round4.predict_GoalFor[i] > round4.predict_GoalAgainst[i]:
    print(round4.home_team[i], ' wins')
  else:
    print(round4.away_team[i], ' wins')
  print()

Netherlands vs Spain 1.3502766794138097 : 1.2106369822315628
Netherlands  wins

France vs Belgium 1.3389200900262495 : 1.2532421951639208
France  wins



In [23]:
#결승 진출 국가 출력
win_teams4

['Netherlands', 'France']

# Final matchup

Empty dataframe with necessary columns

In [24]:
#결승 결과를 예측하기 위해 기존의 학습모델의 틀에 맞춘 빈 dataframe 생성
round2 = pd.DataFrame(columns=['home_team', 'away_team', 'home_team_fifa_rank',
       'away_team_fifa_rank', 'home_team_score', 'away_team_score',
       'home_team_continent', 'away_team_continent', 'past_win', 'past_draw',
       'past_lose', 'past_win_rate', 'past_draw_rate', 'past_lose_rate',
       'home_isAfrica', 'home_isAsia', 'home_isEurope',
       'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',
       'away_isAfrica', 'away_isAsia', 'away_isEurope', 'away_isNorthAmerica',
       'away_isSouthAmerica', 'away_isOceania'])

Data Preprocessing

In [25]:
match_list2 = [[0, 1]] #4강 결과로 결승 매치업 결정

#동일하게 data preprocessing
for index in range(len(match_list2)):
  home_team = win_teams4[match_list4[index][0]]
  away_team = win_teams4[match_list4[index][1]]
  round2.loc[index, :2] = [home_team, away_team]
  round2.loc[index, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']] = league_matches[(league_matches['home_team'] == home_team)].loc[:, ['home_team', 'home_team_fifa_rank', 
                  'home_team_continent', 'home_isAfrica', 'home_isAsia', 
                  'home_isEurope', 'home_isNorthAmerica', 'home_isSouthAmerica', 
                  'home_isOceania']].iloc[0]
  round2.loc[index, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']] = league_matches[(league_matches['away_team'] == away_team)].loc[:, ['away_team', 'away_team_fifa_rank', 
                  'away_team_continent', 'away_isAfrica', 'away_isAsia', 
                  'away_isEurope', 'away_isNorthAmerica', 'away_isSouthAmerica', 
                  'away_isOceania']].iloc[0]
  match = matches[((matches['home_team'] == home_team) & (matches['away_team'] == away_team)) | ((matches['home_team'] == away_team) & (matches['away_team'] == home_team))].iloc[-1:]
  resultcolumns = [past_win, past_draw, past_lose, past_win_rate, past_draw_rate, past_lose_rate] = [0, 0, 0, 0.33, 0.33, 0.33]
  if list(match.index) == []:
    round2.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                        'past_draw_rate', 'past_lose_rate']] = resultcolumns
  else:
    if home_team == match.iloc[0]['home_team']:
      resultcolumns = match.loc[:, ['past_win', 'past_draw', 'past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']].iloc[0]
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round2.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns
    else:
      resultcolumns = list(match.loc[:, ['past_lose', 'past_draw', 'past_win', 'past_lose_rate', 
                          'past_draw_rate', 'past_win_rate']].iloc[0])
      if match.iloc[0]['home_team_result'] == 'Win':
        resultcolumns[past_lose] += 1
      elif match.iloc[0]['home_team_result'] == 'Lose':
        resultcolumns[past_win] += 1
      elif match.iloc[0]['home_team_result'] == 'Draw':
        resultcolumns[past_draw] += 1
      if (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]) != 0:
        resultcolumns[3] = round(resultcolumns[0] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[5] = round(resultcolumns[2] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
        resultcolumns[4] = round(resultcolumns[1] / (resultcolumns[0] + resultcolumns[2] + resultcolumns[1]), 2)
      round2.loc[index, ['past_win', 'past_draw','past_lose', 'past_win_rate', 
                          'past_draw_rate', 'past_lose_rate']] = resultcolumns

round2

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania
0,Netherlands,France,8,4,,,Europe,Europe,3.0,1.0,8.0,0.25,0.08,0.67,0,0,1,0,0,0,0,0,1,0,0,0


Predicting results of Final(round2)

In [26]:
#동일하게 결과 예측(우승국 예측)
X = round2.loc[:,['home_team_fifa_rank', 'away_team_fifa_rank',\
                          'past_win_rate', 'past_draw_rate', 'past_lose_rate',\
                          'home_isAfrica', 'home_isAsia', 'home_isEurope',\
                          'home_isNorthAmerica', 'home_isSouthAmerica', 'home_isOceania',\
                          'away_isAfrica', 'away_isAsia', 'away_isEurope',\
                          'away_isNorthAmerica', 'away_isSouthAmerica', 'away_isOceania']]#input
GoalFor_predict = model.predict(X)
round2['predict_GoalFor'] = 0
round2['predict_GoalAgainst'] = 0
for i in range(len(X.index)):
  round2.loc[X.index[i],'predict_GoalFor'] = GoalFor_predict[i]
  # print(league_matches['away_team_fifa_rank'][i])
  round2.loc[X.index[i],'predict_GoalAgainst'] = model.predict([[round2['away_team_fifa_rank'][i], 
                                                                       round2['home_team_fifa_rank'][i], 
                                                                       round2['past_lose_rate'][i],
                                                                       round2['past_draw_rate'][i],
                                                                       round2['past_win_rate'][i],
                                                                       round2['away_isAfrica'][i],
                                                                       round2['away_isAsia'][i],
                                                                       round2['away_isEurope'][i],
                                                                       round2['away_isNorthAmerica'][i],
                                                                       round2['away_isSouthAmerica'][i],
                                                                       round2['away_isOceania'][i],
                                                                       round2['home_isAfrica'][i],
                                                                       round2['home_isAsia'][i],
                                                                       round2['home_isEurope'][i],
                                                                       round2['home_isNorthAmerica'][i],
                                                                       round2['home_isSouthAmerica'][i],
                                                                       round2['home_isOceania'][i]]]
                                                                       )  


round2['predict_Result'] = 0 # initialize

#Predict Win/Draw/Lose by comparing predicted values
for i in round2.index:
    if round2.predict_GoalFor[i] >= round2.predict_GoalAgainst[i]:
        round2.predict_Result[i] = 'Win'
    else:
        round2.predict_Result[i] = 'Lose'

round2

Unnamed: 0,home_team,away_team,home_team_fifa_rank,away_team_fifa_rank,home_team_score,away_team_score,home_team_continent,away_team_continent,past_win,past_draw,past_lose,past_win_rate,past_draw_rate,past_lose_rate,home_isAfrica,home_isAsia,home_isEurope,home_isNorthAmerica,home_isSouthAmerica,home_isOceania,away_isAfrica,away_isAsia,away_isEurope,away_isNorthAmerica,away_isSouthAmerica,away_isOceania,predict_GoalFor,predict_GoalAgainst,predict_Result
0,Netherlands,France,8,4,,,Europe,Europe,3.0,1.0,8.0,0.25,0.08,0.67,0,0,1,0,0,0,0,0,1,0,0,0,1.16328,1.454049,Lose


Printing result

In [27]:
#동일하게 결과 출력
final_winner = list()
for i in round2.index:
  if round2.predict_Result[i] == 'Win':
    final_winner.append(round2.home_team[i])
  else:
    final_winner.append(round2.away_team[i])
  print(round2.home_team[i], 'vs', round2.away_team[i], round2.predict_GoalFor[i], ':', round2.predict_GoalAgainst[i])
  if round2.predict_GoalFor[i] > round2.predict_GoalAgainst[i]:
    print(round2.home_team[i], ' wins')
  else:
    print(round2.away_team[i], ' wins')
  print()

Netherlands vs France 1.1632795009811616 : 1.4540490400897852
France  wins



In [28]:
#이번 카타르 월드컵의 우승국 출력 (프랑스 우승)
print('The winner of 2022 Qatar Worldcup is', final_winner[0], '!')

The winner of 2022 Qatar Worldcup is France !
