In [23]:
import pandas as pd

# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

drive_folder_path = '/content/drive/My Drive/dataset/preprocessed'

# 데이터 로드
csv_file1 = f'{drive_folder_path}/data_grade1.csv'
df = pd.read_csv(csv_file1)
df = df.drop(columns=['STUID'])
print(df)

csv_file2 = f'{drive_folder_path}/data_grade2.csv'
df2 = pd.read_csv(csv_file2)
df2 = df2.drop(columns=['STUID'])

csv_file3 = f'{drive_folder_path}/data_grade3.csv'
df3 = pd.read_csv(csv_file3)
df3 = df3.drop(columns=['STUID'])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
      ST0  ST14  ST15  ST1_1  ST1_2  ST2_3  ST2_4  ST2_5  ST2_6  ST3  ...  \
0       1  48.0     0      2      4      4      3      4      0    1  ...   
1       1  45.0     1      2      4      1      1      3      3    2  ...   
2       1  48.0     4      1      2      0      8      1      2    2  ...   
3       0  44.0     2      2      5      4      1      3      0    2  ...   
4       0  50.0     0      2      3      0      4      3      4    2  ...   
...   ...   ...   ...    ...    ...    ...    ...    ...    ...  ...  ...   
1927    1  55.0     4      2      3      1      8      3      4    1  ...   
1928    1  35.0     4      2      3      3      8      4      5    0  ...   
1929    1  62.0     0      2      2      1      3      3      1    2  ...   
1930    1  43.0     0      2      3      2      3      4      8    3  ...   
1931    1  50.0     4   

In [24]:
# 김동명님 코드 활용
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, cohen_kappa_score, confusion_matrix

def model_predict(model, df, subject):

    # 과목별 학업성취도 (0,1) 변수를 예측 변수 y로 지정
    y = df[subject]
    # 과목별 원점수 및 학업성취도(0,1) 변수 제거 후 나머지 변수 모두 예측을 위한 변수 x값으로 지정
    X = df.drop(columns=['KOR_HIGH', 'ENG_HIGH', 'MATH_HIGH', 'KOR_S', 'ENG_S', 'MATH_S'])

    # Traning 데이터, Test 데이터 분할
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    # 평가지표
    accuracy = accuracy_score(y_test, y_pred)
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    kappa = cohen_kappa_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred_proba)

    return accuracy, sensitivity, specificity, kappa, auc

In [32]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

#KNN 모델 정의
knn_model = KNeighborsClassifier()

grade_list = [df, df2, df3]
subject_list = ['KOR_HIGH', 'ENG_HIGH', 'MATH_HIGH']

grade_count = 0
for grade in grade_list:
    grade_count += 1
    print('학년 : ', grade_count)
    # 과목별로
    for subject in subject_list:
        # model_predict() 첫번째 변수 knn_model로 설정  # 평가지표 계산
        accuracy, sensitivity, specificity, kappa, auc = model_predict(knn_model, grade, subject)
        print(f"Subject: {subject}")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Sensitvity: {sensitivity:.4f}")
        print(f"Specificity: {specificity:.4f}")
        print(f"Kappa: {kappa:.4f}")
        print(f"AUC: {auc:.4f} \n")

학년 :  1
Subject: KOR_HIGH
Accuracy: 0.5762
Sensitvity: 0.6573
Specificity: 0.4770
Kappa: 0.1356
AUC: 0.5894 

Subject: ENG_HIGH
Accuracy: 0.6486
Sensitvity: 0.6959
Specificity: 0.6010
Kappa: 0.2970
AUC: 0.6911 

Subject: MATH_HIGH
Accuracy: 0.6873
Sensitvity: 0.7647
Specificity: 0.5843
Kappa: 0.3535
AUC: 0.7261 

학년 :  2
Subject: KOR_HIGH
Accuracy: 0.5285
Sensitvity: 0.6379
Specificity: 0.3929
Kappa: 0.0312
AUC: 0.5522 

Subject: ENG_HIGH
Accuracy: 0.6446
Sensitvity: 0.6568
Specificity: 0.6305
Kappa: 0.2867
AUC: 0.7056 

Subject: MATH_HIGH
Accuracy: 0.6105
Sensitvity: 0.6330
Specificity: 0.5882
Kappa: 0.2212
AUC: 0.6662 

학년 :  3
Subject: KOR_HIGH
Accuracy: 0.5819
Sensitvity: 0.6368
Specificity: 0.5263
Kappa: 0.1632
AUC: 0.6256 

Subject: ENG_HIGH
Accuracy: 0.6865
Sensitvity: 0.6759
Specificity: 0.6976
Kappa: 0.3731
AUC: 0.7199 

Subject: MATH_HIGH
Accuracy: 0.7150
Sensitvity: 0.6734
Specificity: 0.7523
Kappa: 0.4267
AUC: 0.7637 

