# 필요한 라이브러리 import

In [1]:
import csv
import pandas as pd
import os
import re
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
import warnings
import pickle
plt.rc('font',family='Malgun Gothic')
plt.rc('font',size=10)
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import load_model
import tensorflow as tf

warnings.filterwarnings(action='ignore')

In [2]:
with open('./models/defender_qt.pickle','rb') as f:
    defender_qt = pickle.load(f)

defender_model = load_model('./models/defender_nn.h5')

    
premier_df = pd.read_csv('./datas/NEW_FM_DATA_MERGED/2021-2022/2021-2022-PremierLeague.csv')
premier_df.head(1)

Unnamed: 0,이름,시즌,포지션,구단,리그,골,도움,임대 기간,승,무,...,공중 A/90,태클 성공률,경기당 패스 시도/90,경기당 패스 성공/90,K Ps/90,태클 성공/90,드리블 성공/90,헤더 성공/90,Cr A/90,평균획득승점/경기
0,Teemu Pukki,2021-2022,공격수,노리치,잉글랜드 프리미어 리그,7,2,0,7,11,...,7.28,100,30.64,26.0,0.8,0.88,0.98,3.39,0.58,0.94


In [3]:
premier_df.rename(columns={'Cr C/A':'크로스 성공률','FA':'파울 획득','Int/90':'인터셉트/90','공중 A/90':'공중볼시도/90','K Ps/90':'키패스/90','Cr A/90':'크로스 시도/90'},inplace=True)
premier_df = premier_df.astype({'골':np.int64,'도움':np.int64,'파울 획득':np.int64,'태클 성공률':np.int64})
premier_df.head(1)

Unnamed: 0,이름,시즌,포지션,구단,리그,골,도움,임대 기간,승,무,...,공중볼시도/90,태클 성공률,경기당 패스 시도/90,경기당 패스 성공/90,키패스/90,태클 성공/90,드리블 성공/90,헤더 성공/90,크로스 시도/90,평균획득승점/경기
0,Teemu Pukki,2021-2022,공격수,노리치,잉글랜드 프리미어 리그,7,2,0,7,11,...,7.28,100,30.64,26.0,0.8,0.88,0.98,3.39,0.58,0.94


In [4]:
defender_df = premier_df[premier_df['포지션']=='수비수']

In [5]:
num_col = [defender_df.columns[i] for i,x in enumerate(defender_df.dtypes) if x == 'int64' or x == 'float64']
independent_value = set(num_col) - set(['평균획득승점/경기','출장시간','승','무','패','경기당 팀 득점/90','경기당 팀 실점/90'])
dependent_value = '평균획득승점/경기'

In [6]:
# header accuracy
defender_df['HA'] = (defender_df['헤더 성공/90'] / defender_df['공중볼시도/90'])

independent_value.add('HA')
independent_value = independent_value - set(['헤더 성공/90','공중볼시도/90'])

defender_df[['이름','HA']].sort_values(by='HA',ascending=False).head()

Unnamed: 0,이름,HA
87,Rodri,1.40241
89,Fernandinho,1.386819
277,Marc Guehi,1.201472
104,Scott McTominay,1.16094
54,Joël Matip,1.144772


In [7]:
# Weighted Pass Accuracy 
defender_df['WPA'] = (defender_df['경기당 패스 성공/90'] + defender_df['경기당 패스 시도/90'])

independent_value.add('WPA')
independent_value = independent_value - set(['경기당 패스 시도/90','경기당 패스 성공/90'])

defender_df[['이름','WPA']].sort_values(by='WPA',ascending=False).head()

Unnamed: 0,이름,WPA
287,William Carvalho,124.76
87,Rodri,124.45
275,Luka Milivojević,121.94
19,Diego Carlos,121.3
44,Jonny Evans,121.07


In [8]:
defender_df['태클 시도/90'] = defender_df['태클 성공/90']/defender_df['태클 성공률']

In [9]:
defender_df['태클'] = (defender_df['태클 성공/90'])+(defender_df['태클 시도/90'])-defender_df['태클 성공률']
defender_df[['태클','평균획득승점/경기']].corr()

Unnamed: 0,태클,평균획득승점/경기
태클,1.0,0.023498
평균획득승점/경기,0.023498,1.0


In [10]:
independent_value.add('태클 시도/90')
independent_value.add('태클')
independent_value = independent_value-set(['태클 성공률','태클 성공/90' ,'태클 시도/90'])

In [11]:
independent_value = independent_value - set(['크로스 시도/90','인터셉트/90','WPA','달린 거리/90분','반칙 수/90'])
independent_value

{'HA',
 '경기당 슈팅/90',
 '경기당 유효 슈팅/90',
 '골',
 '기회 창출/90',
 '도움',
 '드리블 성공/90',
 '크로스 성공률',
 '키패스/90',
 '태클',
 '파울 획득'}

In [12]:
independent_value= independent_value - set(['달린 거리/90분'])

In [13]:
independent_value

{'HA',
 '경기당 슈팅/90',
 '경기당 유효 슈팅/90',
 '골',
 '기회 창출/90',
 '도움',
 '드리블 성공/90',
 '크로스 성공률',
 '키패스/90',
 '태클',
 '파울 획득'}

In [14]:
defender_df.loc[:,independent_value] = defender_qt.transform(defender_df.loc[:,independent_value])

In [15]:
defender_df = defender_df.reset_index(drop=True)

In [16]:
good_features = ('드리블 성공/90','골','키패스/90','파울 획득','HA','도움','경기당 슈팅/90','태클','크로스 성공률','기회 창출/90')

In [17]:
defender_df['예측PPG']=defender_model.predict(tf.constant(defender_df.loc[:,good_features].values))

In [18]:
defender_df.sort_values(by='예측PPG',ascending=False).head(5)

Unnamed: 0,이름,시즌,포지션,구단,리그,골,도움,임대 기간,승,무,...,태클 성공/90,드리블 성공/90,헤더 성공/90,크로스 시도/90,평균획득승점/경기,HA,WPA,태클 시도/90,태클,예측PPG
42,Takehiro Tomiyasu,2021-2022,수비수,아스날,잉글랜드 프리미어 리그,1.0,0.9499,0,13,5,...,2.51,1.0,7.83,8.06,1.69,0.016241,80.98,0.031772,0.0,1.506004
58,Jonny,2021-2022,수비수,울브스,잉글랜드 프리미어 리그,1.0,1.0,0,9,5,...,2.6,0.915832,5.05,8.83,1.33,0.003395,85.56,0.030233,0.0,1.48613
68,Aaron Cresswell,2021-2022,수비수,웨스트 햄,잉글랜드 프리미어 리그,1.0,1.0,0,16,12,...,2.45,0.948898,4.06,8.12,1.5,0.003089,87.81,0.029878,0.0,1.470238
62,Vladimír Coufal,2021-2022,수비수,웨스트 햄,잉글랜드 프리미어 리그,1.0,0.999885,0,16,13,...,3.06,0.952906,6.12,8.76,1.39,0.003832,75.57,0.036429,0.0,1.467891
79,Japhet Tanganga,2021-2022,수비수,토트넘,잉글랜드 프리미어 리그,1.0,0.999885,0,18,8,...,1.49,0.905812,8.05,3.26,1.88,0.003675,93.58,0.018625,0.0,1.459505


In [19]:
defender_min = np.min(defender_df['예측PPG'].tolist())

In [20]:
defender_df['WAR'] = defender_df['예측PPG'].map(lambda x : x-defender_min+1)

In [21]:
defender_df.sort_values(by='WAR',ascending=False).head(60)

Unnamed: 0,이름,시즌,포지션,구단,리그,골,도움,임대 기간,승,무,...,드리블 성공/90,헤더 성공/90,크로스 시도/90,평균획득승점/경기,HA,WPA,태클 시도/90,태클,예측PPG,WAR
42,Takehiro Tomiyasu,2021-2022,수비수,아스날,잉글랜드 프리미어 리그,1.0,0.9499,0,13,5,...,1.0,7.83,8.06,1.69,0.016241,80.98,0.031772,0.0,1.506004,1.620304
58,Jonny,2021-2022,수비수,울브스,잉글랜드 프리미어 리그,1.0,1.0,0,9,5,...,0.915832,5.05,8.83,1.33,0.003395,85.56,0.030233,0.0,1.48613,1.60043
68,Aaron Cresswell,2021-2022,수비수,웨스트 햄,잉글랜드 프리미어 리그,1.0,1.0,0,16,12,...,0.948898,4.06,8.12,1.5,0.003089,87.81,0.029878,0.0,1.470238,1.584538
62,Vladimír Coufal,2021-2022,수비수,웨스트 햄,잉글랜드 프리미어 리그,1.0,0.999885,0,16,13,...,0.952906,6.12,8.76,1.39,0.003832,75.57,0.036429,0.0,1.467891,1.582191
79,Japhet Tanganga,2021-2022,수비수,토트넘,잉글랜드 프리미어 리그,1.0,0.999885,0,18,8,...,0.905812,8.05,3.26,1.88,0.003675,93.58,0.018625,0.0,1.459505,1.573805
27,James Tarkowski,2021-2022,수비수,번리,잉글랜드 프리미어 리그,1.0,0.999885,0,13,14,...,0.583166,8.65,0.05,1.32,0.003525,111.45,0.008846,0.0,1.445621,1.559922
80,Davinson Sánchez,2021-2022,수비수,토트넘,잉글랜드 프리미어 리그,1.0,0.9499,0,17,9,...,0.713427,10.72,0.09,2.0,0.016097,116.3,0.01193,0.0,1.444298,1.558598
50,Mason Holgate,2021-2022,수비수,에버튼,잉글랜드 프리미어 리그,1.0,0.999885,0,15,13,...,0.703407,7.2,2.0,1.71,0.003517,103.47,0.015,0.0,1.444067,1.558368
51,Calum Chambers,2021-2022,수비수,에버튼,잉글랜드 프리미어 리그,1.0,0.9499,0,4,6,...,0.952906,7.18,7.06,1.2,0.016194,78.26,0.032371,0.0,1.436472,1.550773
48,Ben Godfrey,2021-2022,수비수,에버튼,잉글랜드 프리미어 리그,1.0,0.999885,0,8,7,...,0.787575,8.8,1.25,1.63,0.003729,106.53,0.012738,0.0,1.431152,1.545452


In [23]:
defender_df.loc[:,independent_value] = defender_qt.inverse_transform(defender_df.loc[:,independent_value])

In [24]:
defender_df[['이름','시즌','WAR']].sort_values(by='WAR',ascending=False).head(10)

Unnamed: 0,이름,시즌,WAR
42,Takehiro Tomiyasu,2021-2022,1.620304
58,Jonny,2021-2022,1.60043
68,Aaron Cresswell,2021-2022,1.584538
62,Vladimír Coufal,2021-2022,1.582191
79,Japhet Tanganga,2021-2022,1.573805
27,James Tarkowski,2021-2022,1.559922
80,Davinson Sánchez,2021-2022,1.558598
50,Mason Holgate,2021-2022,1.558368
51,Calum Chambers,2021-2022,1.550773
48,Ben Godfrey,2021-2022,1.545452
