In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier


In [8]:
final = pd.read_csv('signature_table_final.csv',encoding='euc-kr')

In [9]:
x = final.iloc[:,1:]
y = final.iloc[:,:1]
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state=42)

In [11]:
baseline = LogisticRegression()
baseline.fit(x_train,y_train)
print(baseline.score(x_train,y_train))
print(baseline.score(x_test,y_test))

0.7106151813243283
0.698184309695101


In [19]:
model1 = AdaBoostClassifier()
model1.fit(x_train,y_train)
print("AdaBoostClassifier train",model1.score(x_train,y_train))
print("AdaBoostClassifier test",model1.score(x_test,y_test))

model2 = BaggingClassifier()
model2.fit(x_train,y_train)
print("BaggingClassifier train",model2.score(x_train,y_train))
print("BaggingClassifier test",model2.score(x_test,y_test))

model3 = GradientBoostingClassifier()
model3.fit(x_train,y_train)
print("GradientBoosting train",model3.score(x_train,y_train))
print("GradientBoosting test",model3.score(x_test,y_test))

model4 = RandomForestClassifier()
model4.fit(x_train,y_train)
print("Randomforest train",model4.score(x_train,y_train))
print("Randomforest test",model4.score(x_test,y_test))

AdaBoostClassifier train 0.7090001468213184
AdaBoostClassifier test 0.6954436450839329
BaggingClassifier train 0.9856115107913669
BaggingClassifier test 0.7858855772524838
GradientBoosting train 0.7480546175304654
GradientBoosting test 0.7050359712230215
Randomforest train 0.9901629716634855
Randomforest test 0.7882836587872559


In [12]:
cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=42)

In [13]:
param_grid = {'max_depth':[10,50,100,150,200,250,300], 'n_estimators': [100,200,300]}
grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=cv)
grid.fit(x_train, y_train)
print(grid.best_params_)
print(grid.score(x_train,y_train))
print(grid.score(x_test,y_test))

{'max_depth': 200, 'n_estimators': 200}
0.9991190720892673
0.815690304898938


In [14]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_x_train = scaler.fit_transform(x_train)
scaled_x_test = scaler.transform(x_test)

In [15]:
param_grid = {'max_depth':[10,50,100,150,200,250,300], 'n_estimators': [100,200,300]}
grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=cv)
grid.fit(scaled_x_train, y_train)
print(grid.best_params_)
print(grid.score(scaled_x_train,y_train))
print(grid.score(scaled_x_test,y_test))

{'max_depth': 50, 'n_estimators': 100}
0.9991190720892673
0.8139773895169579


In [55]:
MU_vs_TT = pd.read_csv('gloryManU2.csv',encoding='euc-kr')

In [56]:
MU_vs_TT

Unnamed: 0,tackleTry,tackleSuccess,yellowCards,redCards,shootTotal,shootOutPenalty,shootInPenalty,goalTotal,goalInPenalty,goalOutPenalty,...,shortPassSuccess,longpassRate,shortpassRate,possession,passRate,shootSetPiece,shootPenaltyKick,effectiveShootTotal,goalPenaltykick,ownGoal
0,20,41,0,0,12,5.04,6.96,2,2,0,...,390,0.12,0.85,47,75,1,1,7,1,0
1,18,59,1,0,8,3.04,5.04,1,1,0,...,437,0.11,0.83,53,79,1,0,5,0,0


In [61]:
model_real = RandomForestClassifier(max_depth=200, n_estimators=200)
model_real.fit(x_train,y_train)
print("Randomforest train",model_real.score(x_train,y_train))
print("Randomforest test",model_real.score(x_test,y_test))

Randomforest train 0.9991190720892673
Randomforest test 0.817060637204522


In [62]:
model_real.predict_proba(MU_vs_TT)

array([[0.185, 0.145, 0.67 ],
       [0.195, 0.16 , 0.645]])

In [63]:
model_real.predict(MU_vs_TT)

array([3, 3], dtype=int64)