## mobile 예제
### import

In [43]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

### CSV 파일 로딩하기

In [46]:
file_path = "./mobile.csv"
df = pd.read_csv(file_path)
df

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,794,1,0.5,1,0,1,2,0.8,106,6,...,1222,1890,668,13,4,19,1,1,0,0
1996,1965,1,2.6,1,0,0,39,0.2,187,4,...,915,1965,2032,11,10,16,1,1,1,2
1997,1911,0,0.9,1,1,1,36,0.7,108,8,...,868,1632,3057,9,1,5,1,1,0,3
1998,1512,0,0.9,0,4,1,46,0.1,145,5,...,336,670,869,18,10,19,1,1,1,0


In [48]:
print(df.shape)
print(df.columns)

(2000, 21)
Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')


### 결측치 확인

In [51]:
print("===결측치 현황===")
print(df.isnull().sum())

===결측치 현황===
battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64


### 레이블 분포 확인 

In [54]:
print("===레이블 분포===")
print(df['price_range'].value_counts())

===레이블 분포===
price_range
1    500
2    500
3    500
0    500
Name: count, dtype: int64


### 불필요한 칼럼 제거

In [57]:
cols_to_drop = ['blue', 'dual_sim', 'touch_screen', 'wifi', 'sc_h', 'sc_w']
df = df.drop(columns = cols_to_drop)

df.columns

Index(['battery_power', 'clock_speed', 'fc', 'four_g', 'int_memory', 'm_dep',
       'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram',
       'talk_time', 'three_g', 'price_range'],
      dtype='object')

### 데이터 분할

In [60]:
X = df.drop(['price_range'], axis = 1) #특성
Y = df['price_range'] # 레이블
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

### 데이터 분류기 초기화
solver는 알고리즘 선택 매개변수

saga는 확률적 경사 하강법의 변형 알고리즘 L1, L2를 모두 지원함
lbfgs를 사용했을 때 경고 문구가 떠서 사용함.

In [96]:
classifiers = {
    'Random Forest': RandomForestClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Logistic Regression': LogisticRegression(solver = 'liblinear', max_iter=1000),
    # liblinear: 작은 데이터셋 (이진 분류)시 적합한 solver
    'KNN': KNeighborsClassifier(),
    'SVM': SVC()
}

### 모델 학습 및 평가

In [99]:
results = []
for name, clf in classifiers.items():
    clf.fit(X_train, y_train) # 모델 학습
    y_pred = clf.predict(X_test) # 테스트 데이터 예측
    acc = accuracy_score(y_test, y_pred) # 정확도 계산
    cm = confusion_matrix(y_test, y_pred) # 혼동 행렬 계산 (TP|FN|FP|TN)
    cr = classification_report(y_test, y_pred) # 분류 보고서
    results.append((name, acc, cm, cr)) # 결과를 리스트에 저장


### 결과 출력

In [102]:
print("\n=== 모델 성능 비교 ===")
for name, acc, cm, cr in results:
    print(f"\n======={name}=======")
    print(f"Accuracy: {acc:.4f}")
    print(cr)
    print("Confusion Matrix:")
    print(cm)


=== 모델 성능 비교 ===

Accuracy: 0.8925
              precision    recall  f1-score   support

           0       0.95      0.95      0.95       105
           1       0.86      0.87      0.86        91
           2       0.80      0.86      0.83        92
           3       0.95      0.88      0.92       112

    accuracy                           0.89       400
   macro avg       0.89      0.89      0.89       400
weighted avg       0.90      0.89      0.89       400

Confusion Matrix:
[[100   5   0   0]
 [  5  79   7   0]
 [  0   8  79   5]
 [  0   0  13  99]]

Accuracy: 0.8300
              precision    recall  f1-score   support

           0       0.93      0.85      0.89       105
           1       0.74      0.86      0.80        91
           2       0.78      0.71      0.74        92
           3       0.86      0.89      0.88       112

    accuracy                           0.83       400
   macro avg       0.83      0.83      0.83       400
weighted avg       0.83      0.83   