In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
soccerdf = pd.read_csv('data.csv', index_col='Unnamed: 0')
# soccerdf.columns
needed = ['Name', 'Club', 'Nationality', 'Age', 'Overall', 'Potential']
soccerdf = soccerdf[needed]
soccerdf.head()

Unnamed: 0,Name,Club,Nationality,Age,Overall,Potential
0,L. Messi,FC Barcelona,Argentina,31,94,94
1,Cristiano Ronaldo,Juventus,Portugal,33,94,94
2,Neymar Jr,Paris Saint-Germain,Brazil,26,92,93
3,De Gea,Manchester United,Spain,27,91,93
4,K. De Bruyne,Manchester City,Belgium,27,91,92


In [3]:
# TESTING
soccerdf['Age <= 25'] = soccerdf.apply(
    lambda row: 1 if row['Age'] <= 25 else 0,
    axis=1
)
soccerdf.head()
soccerdf['Overall >= 80'] = soccerdf.apply(
    lambda row: 1 if row['Overall'] >= 80 else 0,
    axis=1
)
soccerdf['Potential >= 80'] = soccerdf.apply(
    lambda row: 1 if row['Potential'] >= 80 else 0,
    axis=1
)
# ACTUAL TARGET CREATION
soccerdf['Accepted'] = soccerdf.apply(
    lambda row: 1 if (row['Age'] <= 25)&(row['Overall'] >= 80)&(row['Potential']>=80) else 0,
    axis=1
)
soccerdf.head()

Unnamed: 0,Name,Club,Nationality,Age,Overall,Potential,Age <= 25,Overall >= 80,Potential >= 80,Accepted
0,L. Messi,FC Barcelona,Argentina,31,94,94,0,1,1,0
1,Cristiano Ronaldo,Juventus,Portugal,33,94,94,0,1,1,0
2,Neymar Jr,Paris Saint-Germain,Brazil,26,92,93,0,1,1,0
3,De Gea,Manchester United,Spain,27,91,93,0,1,1,0
4,K. De Bruyne,Manchester City,Belgium,27,91,92,0,1,1,0


## Splitting

In [16]:
from sklearn.model_selection import train_test_split, cross_val_score
xtr, xts, ytr, yts = train_test_split(
    soccerdf[['Age', 'Overall', 'Potential']], 
    soccerdf['Accepted'],
    test_size=.1
)

## KNeighborsClassifier

In [17]:
def nilai_k():
    k = round((len(soccerdf['Accepted'])) ** .5)
    if k % 2 == 0:
        return k + 1
    else:
        return k
modelK = KNeighborsClassifier(
    n_neighbors=nilai_k()
)
modelK.fit(xtr, ytr)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=135, p=2,
                     weights='uniform')

## RandomForestClassifier

In [18]:
modelR = RandomForestClassifier(
    n_estimators=100
)
modelR.fit(xtr,ytr)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

## LogisticRegression

In [19]:
modelL = LogisticRegression(
    solver='lbfgs',
    multi_class='auto',
    max_iter=100000
)
modelL.fit(xtr,ytr)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Scoring

In [20]:
nilaiK = np.mean(cross_val_score(modelK, xts, yts, cv=10)) * 100 
nilaiR = np.mean(cross_val_score(modelR, xts, yts, cv=10)) * 100 
nilaiL = np.mean(cross_val_score(modelL, xts, yts, cv=10)) * 100 
print(
    f'KNeighborsClassifier score: {nilaiK}%\nRandomForestClassifier score: {nilaiR}\nLogistic Regression score: {nilaiL}%')

KNeighbors Classifier score: 99.39620488800817%
RandomForestClassifier score: 99.94505494505495
Logistic Regression score: 99.28631477811805%


##### Setelah beberapa kali nge-run, RandomForest Classifer paling akurat, jadi itu yang dipakai

# Prediction

In [9]:
predictDict = [
    {'Name': 'Andik Vermansyah', 'Club': 'Madura United FC', 'Nationality': 'Indonesia', 'Age': 27, 'Overall': 87, 'Potential': 90},
    {'Name': 'Awan Setho Raharjo', 'Club': 'Bhayangkara FC', 'Nationality': 'Indonesia', 'Age': 22, 'Overall': 75, 'Potential': 83},
    {'Name': 'Bambang Pamungkas', 'Club': 'Persija Jakarta', 'Nationality': 'Indonesia', 'Age': 38, 'Overall': 85, 'Potential': 75},
    {'Name': 'Cristian Gonzales', 'Club': 'PSS Sleman', 'Nationality': 'Indonesia', 'Age': 43, 'Overall': 90, 'Potential': 85},
    {'Name': 'Egy Maulana Vikri', 'Club': 'Lechia Gdansk', 'Nationality': 'Indonesia', 'Age': 18, 'Overall': 88, 'Potential': 90},
    {'Name': 'Evan Dimas', 'Club': 'Barito Putra', 'Nationality': 'Indonesia', 'Age': 24, 'Overall': 85, 'Potential': 87},
    {'Name': 'Febri Hariyadi', 'Club': 'Persib Bandung', 'Nationality': 'Indonesia', 'Age': 23, 'Overall': 77, 'Potential': 80},
    {'Name': 'Hansamu Yama Pranata', 'Club': 'Persebaya Surabaya', 'Nationality': 'Indonesia', 'Age': 24, 'Overall': 82, 'Potential': 85},
    {'Name': 'Septian David Maulana', 'Club': 'PSIS Semarang', 'Nationality': 'Indonesia', 'Age': 22, 'Overall': 83, 'Potential': 80},
    {'Name': 'Stefano Lilipaly', 'Club': 'Bali United', 'Nationality': 'Indonesia', 'Age': 29, 'Overall': 88, 'Potential': 86}
]
predictdf = pd.DataFrame(predictDict, columns=predictDict[0].keys())
predictdf

Unnamed: 0,Name,Club,Nationality,Age,Overall,Potential
0,Andik Vermansyah,Madura United FC,Indonesia,27,87,90
1,Awan Setho Raharjo,Bhayangkara FC,Indonesia,22,75,83
2,Bambang Pamungkas,Persija Jakarta,Indonesia,38,85,75
3,Cristian Gonzales,PSS Sleman,Indonesia,43,90,85
4,Egy Maulana Vikri,Lechia Gdansk,Indonesia,18,88,90
5,Evan Dimas,Barito Putra,Indonesia,24,85,87
6,Febri Hariyadi,Persib Bandung,Indonesia,23,77,80
7,Hansamu Yama Pranata,Persebaya Surabaya,Indonesia,24,82,85
8,Septian David Maulana,PSIS Semarang,Indonesia,22,83,80
9,Stefano Lilipaly,Bali United,Indonesia,29,88,86


In [10]:
predictdf["Prediction"] = predictdf[["Age", "Overall", "Potential"]].apply(
    lambda row: modelR.predict([row.values])[0], axis=1
)

# Labelling
predictdf["Prediction"] = predictdf.apply(
    lambda row: 'Accepted' if row['Prediction'] == 1 else 'Not Accepted',
    axis=1
)
predictdf

Unnamed: 0,Name,Club,Nationality,Age,Overall,Potential,Prediction
0,Andik Vermansyah,Madura United FC,Indonesia,27,87,90,Not Accepted
1,Awan Setho Raharjo,Bhayangkara FC,Indonesia,22,75,83,Not Accepted
2,Bambang Pamungkas,Persija Jakarta,Indonesia,38,85,75,Not Accepted
3,Cristian Gonzales,PSS Sleman,Indonesia,43,90,85,Not Accepted
4,Egy Maulana Vikri,Lechia Gdansk,Indonesia,18,88,90,Accepted
5,Evan Dimas,Barito Putra,Indonesia,24,85,87,Accepted
6,Febri Hariyadi,Persib Bandung,Indonesia,23,77,80,Not Accepted
7,Hansamu Yama Pranata,Persebaya Surabaya,Indonesia,24,82,85,Accepted
8,Septian David Maulana,PSIS Semarang,Indonesia,22,83,80,Accepted
9,Stefano Lilipaly,Bali United,Indonesia,29,88,86,Not Accepted
