In [17]:
import joblib
import os
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report

# 모델 로드
current_directory = os.getcwd()
model_path1 = os.path.join(current_directory, 'steelplate_model1.pkl')
model1 = joblib.load(model_path1)

model_path2 = os.path.join(current_directory, 'binary1.pkl')
model2 = joblib.load(model_path2)

model_path3 = os.path.join(current_directory, 'steelplate_model2.pkl')
model3 = joblib.load(model_path3)

#  Load val, test set 

In [18]:
x_val = pd.read_csv('val_test_set/x_val.csv')
x_test = pd.read_csv('val_test_set/x_test.csv')
y_val = pd.read_csv('val_test_set/y_val.csv')
y_test = pd.read_csv('val_test_set/y_test.csv')

In [19]:
print(x_val.shape, y_val.shape)
print(x_test.shape, y_test.shape)

(311, 14) (311, 1)
(389, 14) (389, 1)


# 예측

In [20]:
x = x_val
y = y_val

# 1차 예측
pred1 = model1.predict(x)

# 1차 예측 결과 정리
result = pd.concat([pd.DataFrame(pred1, columns=['Pred'])], axis=1)
result = result.set_index(x.index)  
result

Unnamed: 0,Pred
0,6
1,2
2,6
3,5
4,5
...,...
306,5
307,6
308,3
309,2


In [21]:
# 2차 예측을 위한 데이터 정리
x['Pred'] = result['Pred'] # x set과 결과값 합치기
selected_rows = x[(x['Pred'] == 0) | (x['Pred'] == 5) | (x['Pred'] == 6)] # 예측값이 5와 6인 데이터 추출
selected_rows.drop(['Pred', 'Empty_Index'], axis=1, inplace=True) # 필요없는 컬럼 삭제

# 2차 예측
pred2 = model2.predict(selected_rows)

# 2차 예측 결과 정리
for idx, pred in zip(selected_rows.index, pred2) :
    if pred == 0 :
        result.loc[idx, 'Pred'] = pred
    else : # 2차 model이 5 또는 6이라고 판별한 경우
        result.loc[idx, 'Pred'] = 56
result


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows.drop(['Pred', 'Empty_Index'], axis=1, inplace=True) # 필요없는 컬럼 삭제


Unnamed: 0,Pred
0,56
1,2
2,56
3,0
4,56
...,...
306,56
307,56
308,3
309,2


In [22]:
# 3차 예측을 위한 데이터 정리
x['Pred'] = result['Pred'] # x set과 결과값 합치기
selected_rows2 = x[(x['Pred'] == 56)] # 2차 모델이 5 또는 6이라고 예측한 데이터 추출
selected_rows2.drop(['Pred', 'Empty_Index'], axis=1, inplace=True) # 필요없는 컬럼 삭제

# 3차 예측
pred3 = model3.predict(selected_rows2)

# 3차 예측 결과 정리
for idx, pred in zip(selected_rows2.index, pred3) :
    result.loc[idx, 'Pred'] = pred
result

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows2.drop(['Pred', 'Empty_Index'], axis=1, inplace=True) # 필요없는 컬럼 삭제


Unnamed: 0,Pred
0,6
1,2
2,6
3,0
4,5
...,...
306,6
307,6
308,3
309,2


In [23]:
# classification_report 
report_final = classification_report(y, result['Pred'])
print(report_final)

              precision    recall  f1-score   support

           0       0.96      0.93      0.95        28
           1       0.91      0.91      0.91        22
           2       1.00      0.95      0.97        58
           3       1.00      1.00      1.00        12
           4       0.89      1.00      0.94         8
           5       0.87      0.96      0.91        75
           6       0.93      0.89      0.91       108

    accuracy                           0.93       311
   macro avg       0.94      0.95      0.94       311
weighted avg       0.93      0.93      0.93       311



# 성적 확인을 위한 함수

In [24]:
def model_score(x, y) :
    # 1차 예측
    pred1 = model1.predict(x)

    # 1차 예측 결과 정리
    result = pd.concat([pd.DataFrame(pred1, columns=['Pred'])], axis=1)
    result = result.set_index(x.index)  

    # 2차 예측을 위한 데이터 정리
    x['Pred'] = pred1 # x set과 결과값 합치기
    selected_rows = x[(x['Pred'] == 5) | (x['Pred'] == 6)] # 예측값이 5와 6인 데이터 추출
    selected_rows.drop(['Pred', 'Empty_Index'], axis=1, inplace=True) # 필요없는 컬럼 삭제

    # 2차 예측
    pred2 = model2.predict(selected_rows)

    # 2차 예측 결과 정리
    for idx, pred in zip(selected_rows.index, pred2) :
        result.loc[idx, 'Pred'] = pred

    # classification_report 
    report_final = classification_report(y, result['Pred'])

    return report_final



In [25]:
x = x_val
y = y_val

# 1차 예측
pred1 = model1.predict(x)

# 1차 예측 결과 정리
result = pd.concat([pd.DataFrame(pred1, columns=['Pred'])], axis=1)
result = result.set_index(x.index)  
result


ValueError: Feature shape mismatch, expected: 14, got 15

# Classification Report를 통한 성능 확인

In [None]:
print(model_score(x_val, y_val))

              precision    recall  f1-score   support

           0       0.87      0.46      0.60        28
           1       0.91      0.91      0.91        22
           2       1.00      0.95      0.97        58
           3       1.00      1.00      1.00        12
           4       0.89      1.00      0.94         8
           5       0.83      0.96      0.89        75
           6       0.86      0.88      0.87       108

    accuracy                           0.88       311
   macro avg       0.91      0.88      0.88       311
weighted avg       0.89      0.88      0.88       311



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows.drop(['Pred', 'Empty_Index'], axis=1, inplace=True) # 필요없는 컬럼 삭제


In [None]:
print(model_score(x_test, y_test))

              precision    recall  f1-score   support

           0       0.64      0.52      0.57        27
           1       0.94      0.98      0.96        46
           2       0.96      0.97      0.96        89
           3       0.92      1.00      0.96        11
           4       1.00      0.82      0.90        11
           5       0.86      0.83      0.84        81
           6       0.82      0.86      0.84       124

    accuracy                           0.87       389
   macro avg       0.88      0.85      0.86       389
weighted avg       0.87      0.87      0.87       389



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_rows.drop(['Pred', 'Empty_Index'], axis=1, inplace=True) # 필요없는 컬럼 삭제
