## Scikit-Learn 라이브러리

- 분류, 회귀, 클러스터링 등 다양한 알고리즘 제공.
- 아나콘다 배포판에 포함되어 있음.

#### 농구선수 포지션 예측

In [1]:
import pandas as pd

df = pd.read_csv("./basketball_stat.csv")
data_df = df[0:100]
data_df.head()

Unnamed: 0,Player,Pos,3P,2P,TRB,AST,STL,BLK
0,Alex Abrines,SG,1.3,0.5,1.5,0.6,0.5,0.2
1,Steven Adams,C,0.0,6.0,9.5,1.6,1.5,1.0
2,Bam Adebayo,C,0.0,3.4,7.3,2.2,0.9,0.8
3,DeVaughn Akoon-Purcell,SG,0.0,0.4,0.6,0.9,0.3,0.0
4,LaMarcus Aldridge,C,0.1,8.3,9.2,2.4,0.5,1.3


In [2]:
from sklearn.model_selection import train_test_split


# 학습데이터 80%, 테스트데이터 20%로 분리
train, test = train_test_split(data_df, test_size=0.2)
train.shape

(80, 8)

In [3]:
train_data_df = train[['3P', 'BLK', 'TRB']]
train_label_df = train[['Pos']]

train_data = train_data_df.values
train_data

array([[ 0. ,  0.3,  1. ],
       [ 2.1,  0. ,  2.9],
       [ 0.9,  0.2,  2.7],
       [ 2.6,  0.1,  2. ],
       [ 0. ,  0.9, 10.3],
       [ 2.5,  0.7,  5. ],
       [ 0.3,  0.3,  0.9],
       [ 1.9,  0.1,  2.5],
       [ 0. ,  0.6,  3.8],
       [ 0.8,  0. ,  0.8],
       [ 0.2,  1.4,  7.4],
       [ 0. ,  0.6,  8.4],
       [ 1.4,  0.6,  3.9],
       [ 0.1,  0.2,  1.9],
       [ 1.7,  0.1,  3.6],
       [ 0. ,  0. ,  2. ],
       [ 0.3,  0. ,  2.8],
       [ 1.3,  1.1,  7.5],
       [ 2.5,  0.7,  4. ],
       [ 0. ,  1.3,  6.5],
       [ 0.5,  0.1,  1.6],
       [ 0.1,  0.1,  0.7],
       [ 0. ,  0.4,  5.6],
       [ 2.3,  0.3,  3.4],
       [ 0.9,  0.1,  2.1],
       [ 2.1,  0.2,  4.1],
       [ 0. ,  0. ,  1.2],
       [ 0. ,  0.6,  4. ],
       [ 1.4,  0.3,  2.8],
       [ 1.8,  0.2,  3.3],
       [ 1.3,  0.2,  1.5],
       [ 1.1,  0.4,  4.2],
       [ 2.5,  0.1,  3.4],
       [ 1.6,  0.5,  4.6],
       [ 0.5,  0.9,  6.3],
       [ 0.9,  1.5,  8.2],
       [ 0. ,  0.3,  0.8],
 

In [4]:
train_label = train_label_df.values.ravel()
train_label

array(['C', 'SG', 'SG', 'SG', 'C', 'SG', 'SG', 'SG', 'C', 'SG', 'C', 'C',
       'SG', 'SG', 'SG', 'C', 'SG', 'C', 'SG', 'C', 'SG', 'SG', 'C', 'SG',
       'SG', 'SG', 'SG', 'C', 'SG', 'SG', 'SG', 'SG', 'SG', 'SG', 'C',
       'C', 'SG', 'C', 'SG', 'SG', 'C', 'C', 'C', 'SG', 'SG', 'SG', 'SG',
       'SG', 'C', 'C', 'SG', 'C', 'C', 'C', 'SG', 'SG', 'SG', 'C', 'SG',
       'SG', 'SG', 'SG', 'SG', 'C', 'SG', 'SG', 'C', 'C', 'SG', 'C', 'SG',
       'C', 'SG', 'C', 'SG', 'SG', 'C', 'SG', 'C', 'C'], dtype=object)

In [5]:
test_data_df = test[['3P', 'BLK', 'TRB']]
test_label_df = test[['Pos']]

test_data = test_data_df.values
test_label = test_label_df.values.ravel()

### KNN 모델로 학습

In [6]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3) # KNN 분류기 객체 생성, 최근접 이웃의 수를 3을 가정
knn.fit(train_data, train_label) # KNN 모델 학습

pred_knn = knn.predict(test_data)
pred_knn

array(['C', 'C', 'C', 'SG', 'SG', 'C', 'SG', 'SG', 'SG', 'SG', 'C', 'SG',
       'SG', 'SG', 'C', 'SG', 'SG', 'C', 'SG', 'SG'], dtype=object)

#### 모델 정확도 확인

In [7]:
from sklearn import metrics # 예측값 비교

ac_score = metrics.accuracy_score(test_label, pred_knn)
print('accuracy : ', ac_score)

accuracy :  0.9


In [8]:
comparison = pd.DataFrame({'Prediction' : pred_knn, 'Ground_Truth' : test_label})
comparison

Unnamed: 0,Prediction,Ground_Truth
0,C,C
1,C,SG
2,C,C
3,SG,SG
4,SG,C
5,C,C
6,SG,SG
7,SG,SG
8,SG,SG
9,SG,SG


---

### SVM 모델로 학습

In [9]:
from sklearn import svm

clf = svm.SVC(C = 1, gamma = 0.1)
clf.fit(train_data, train_label)

pred_svm = clf.predict(test_data)

ac_score = metrics.accuracy_score(test_label, pred_svm)
print('accuracy : ', ac_score)

comparison = pd.DataFrame({'Prediction' : pred_knn, 'Ground_Truth' : test_label})
comparison

accuracy :  0.9


Unnamed: 0,Prediction,Ground_Truth
0,C,C
1,C,SG
2,C,C
3,SG,SG
4,SG,C
5,C,C
6,SG,SG
7,SG,SG
8,SG,SG
9,SG,SG
