In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score
from sklearn import tree
import graphviz

# 데이터 로드 및 전처리
gun_death = pd.read_csv('gun_deaths.csv')
gd_cleaned = gun_death.dropna()
target = gd_cleaned['intent']
data = gd_cleaned[['age', 'race', 'education']]

# 훈련 및 테스트 데이터 분리
train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)

# StandardScaler 및 OneHotEncoder를 사용하여 데이터 스케일 조정 및 원-핫 인코딩
ss = StandardScaler()
train_input[['age']] = ss.fit_transform(train_input[['age']])
test_input[['age']] = ss.transform(test_input[['age']])

encoder = OneHotEncoder(sparse=False, drop='first')
train_input_encoded = pd.DataFrame(encoder.fit_transform(train_input[['race', 'education']]), columns=encoder.get_feature_names(['race', 'education']))
test_input_encoded = pd.DataFrame(encoder.transform(test_input[['race', 'education']]), columns=encoder.get_feature_names(['race', 'education']))

train_input = pd.concat([train_input[['age']], train_input_encoded], axis=1)
test_input = pd.concat([test_input[['age']], test_input_encoded], axis=1)

# 모델 훈련
dt = DecisionTreeClassifier(max_depth=3, random_state=1)
dt.fit(train_input, train_target)

# 훈련 및 테스트 데이터에 대한 정확도 출력
print("Train Accuracy:", dt.score(train_input, train_target))
print("Test Accuracy:", dt.score(test_input, test_target))

# 의사결정 트리 시각화
dot_data = tree.export_graphviz(dt, out_file=None, feature_names=train_input.columns,
                                class_names=dt.classes_, filled=True, rounded=True, special_characters=True)
graph = graphviz.Source(dot_data)
graph.render("decision_tree")  # Save the visualization to a file

# 정밀도 및 재현율 계산
train_pred = dt.predict(train_input)
test_pred = dt.predict(test_input)

train_precision = precision_score(train_target, train_pred, average='weighted')
train_recall = recall_score(train_target, train_pred, average='weighted')

test_precision = precision_score(test_target, test_pred, average='weighted')
test_recall = recall_score(test_target, test_pred, average='weighted')

print("Train Precision:", train_precision)
print("Train Recall:", train_recall)

print("Test Precision:", test_precision)
print("Test Recall:", test_recall)

# 데이터 예측
new_data = [40, 'Black', 'Less than HS']  # age, race, edu
new_data[0] = ss.transform([[new_data[0]]])[0][0]  # Scale age
new_data_encoded = encoder.transform([[race_mapping[new_data[1]], education_mapping[new_data[2]]]])
new_data = pd.concat([pd.DataFrame([new_data[0]]), pd.DataFrame(new_data_encoded, columns=encoder.get_feature_names(['race', 'education']))], axis=1)
pred = dt.predict(new_data)
print("Predicted Intent:", pred[0])