In [None]:
# Importing Library
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 🎲 시드 값 고정 (재현 가능성 확보)
np.random.seed(42)

# 📂 데이터셋 불러오기 (iris.data 파일이 동일 폴더에 있어야 함)
file_path = '/Users/dean/Desktop/iris/iris.data'

# 데이터셋에 컬럼 이름 추가
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
df = pd.read_csv(file_path, header=None, names=columns)

# 랜덤으로 5개 행을 출력
random_samples = df.sample(n=5)
print("Random five samples:")
print(random_samples)

# 🎨 종(species) 컬럼을 숫자로 변환 (Setosa → 0, Versicolor → 1, Virginica → 2)
df['species'] = df['species'].astype('category').cat.codes

# 🧪 데이터 분할 (학습용/테스트용)
X = df.drop('species', axis=1)  # Features (입력 데이터)
y = df['species']  # Target (출력 데이터)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 🤖 모델 훈련 (Random Forest)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# 🔍 예측 및 평가
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# 📊 Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

sns.pairplot(df, hue='species', palette='coolwarm')
plt.show()