# 선수 개인별 리그 / 팀 추천 시스템 구현

#### import packages

In [2]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

import numpy as np
from sklearn.decomposition import PCA
from sklearn import linear_model, decomposition
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

## 0) 데이터전처리

In [19]:
merged_player = pd.read_csv("./merged_player.csv", encoding='utf-8')
# merged_player = merged_player.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis=1)

In [20]:
drop_column_list = ['Unnamed: 0', 'Unnamed: 0.1', 'full_time', 'half_time', 'player_number', 'motm','flag', 'mins', 'ps_y', 'rating']
merged_player = merged_player.drop(drop_column_list, axis=1)

In [21]:
# 포지션 별 데이터프레임 얻기
merged_player["position"] = merged_player["position"].apply(lambda x: x.strip())

# get unique position
forward_pos = []
mid_pos = []
defense_pos = []

for position in merged_player["position"].unique():
    
    
    if position.find("F") == 0:
        forward_pos.append(position)
        
    if position.find("A") == 0:
        forward_pos.append(position)
        
    if position.find("M") == 0:
        mid_pos.append(position)
        
    if position.find("D") == 0:
        defense_pos.append(position)
        
        
forward_df = pd.DataFrame(columns=merged_player.columns)

for position in forward_pos:
    f_df = merged_player[merged_player["position"] == position]
    forward_df = pd.concat([forward_df, f_df])
    
mid_df = pd.DataFrame(columns=merged_player.columns)

for position in mid_pos:
    m_df = merged_player[merged_player["position"] == position]
    mid_df = pd.concat([mid_df, m_df])
    
defense_df = pd.DataFrame(columns=merged_player.columns)

for position in defense_pos:
    d_df = merged_player[merged_player["position"] == position]
    defense_df = pd.concat([defense_df, d_df])
    
    
forward_df.to_csv("./forward.csv")
mid_df.to_csv("./midfielder.csv")
defense_df.to_csv("./defense.csv")

## 1) 특성변수 파악

In [30]:
def check_important_var(df):
    bundes_min_list, bundes_max_list = [], []
    laliga_min_list, laliga_max_list = [], []
    ligue1_min_list, ligue1_max_list = [], []
    premier_min_list, premier_max_list = [], []
    ered_min_list, ered_max_list = [], []
    serie_min_list, serie_max_list = [], []
    for column in df.columns[:-1]:
        min_value, max_value, avg_value = df[column].min(), df[column].max(), df[column].mean()
        dif_min_value, dif_max_value = np.abs(min_value - avg_value), np.abs(max_value - avg_value)
        league_name_in_min_value, league_name_in_max_value = df[df[column] == min_value]['league'], df[df[column] == max_value]['league']
        
#         if df[df[column] == min_value]['league'] == 'Bundesliga':
#             bundes_min_list.append(column)
            
#         if df[df[column] == max_value]['league'] == 'Bundesliga':
#             bundes_max_list.append(column)
            
#         if df[df[column] == min_value]['league'] == 'Ligue1':
#             ligue1_min_list.append(column)
            
#         if df[df[column] == max_value]['league'] == 'Ligue1':
#             ligue1_max_list.append(column)
        
#         if df[df[column] == min_value]['league'] == 'SerieA':
#             serie_min_list.append(column)
            
#         if df[df[column] == max_value]['league'] == 'SerieA':
#             serie_max_list.append(column)
            
#         if df[df[column] == min_value]['league'] == 'PremierLeague':
#             premier_min_list.append(column)
            
#         if df[df[column] == max_value]['league'] == 'PremierLeague':
#             premier_max_list.append(column)
            
#         if df[df[column] == min_value]['league'] == 'LaLiga':
#             laliga_min_list.append(column)
            
#         if df[df[column] == max_value]['league'] == 'LaLiga':
#             laliga_max_list.append(column)
            
#         if df[df[column] == min_value]['league'] == 'Eredivisie':
#             ered_min_list.append(column)
            
#         if df[df[column] == max_value]['league'] == 'Eredivisie':
#             ered_max_list.append(column)
        
        print(column)
        print("-----------------------------------")
        print("리그이름    최소값     평균     오차")
        print(league_name_in_min_value, min_value, avg_value, dif_min_value)
        print("리그이름    최대값     평균     오차")
        print(league_name_in_max_value, max_value, avg_value, dif_max_value)
        print("\n")

In [24]:
forward_df0 = forward_df.drop(['league', 'name', 'team_name'], axis=1)
forward_df0['league'] = forward_df['league']
forward_df0.columns

Index(['age', 'asists', 'avgp', 'aw', 'blocks', 'clear', 'crosses', 'disp',
       'drb', 'fouled', 'fouls', 'goals', 'inter', 'keyp', 'longb', 'off',
       'offsides', 'owng', 'position', 'ps_x', 'red', 'spg', 'tackles', 'tall',
       'thrb', 'unstch', 'weight', 'yel', 'league'],
      dtype='object')

In [None]:
forw