In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# 데이터 로딩
data = pd.read_csv('finance_data.csv')

# 라벨 인코딩 (Label Encoding)
label_encoder = LabelEncoder()
data['labels'] = label_encoder.fit_transform(data['labels'])

# 데이터 분리 (train/test split)
X_train, X_test, y_train, y_test = train_test_split(data['kor_sentence'], data['labels'], test_size=0.2, random_state=42)

# TF-IDF 벡터화 (TF-IDF Vectorization)
tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Naive Bayes 모델 초기화 및 학습 (Naive Bayes Model Initialization and Training)
nb = MultinomialNB()
nb.fit(X_train_tfidf, y_train)

# 예측 수행 (Making Predictions)
y_pred = nb.predict(X_test_tfidf)

# 성능 평가 (Performance Evaluation)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# 예측 함수 (Prediction Function)
def predict_review(sentence, model, tfidf, label_encoder):
    # 입력 데이터 벡터화 (Vectorization of the Input Data)
    sentence_tfidf = tfidf.transform([sentence])
    
    # 예측 수행 (Making a Prediction)
    prediction = model.predict(sentence_tfidf)
    
    # 라벨 역매핑 (Label Decoding)
    predicted_label = label_encoder.inverse_transform(prediction)
    
    return predicted_label[0]

Accuracy: 0.7123711340206186
Classification Report:
              precision    recall  f1-score   support

    negative       0.77      0.22      0.34       110
     neutral       0.71      0.97      0.82       571
    positive       0.70      0.38      0.50       289

    accuracy                           0.71       970
   macro avg       0.73      0.53      0.55       970
weighted avg       0.72      0.71      0.67       970



In [2]:
# 예제 문장 예측 (Predicting Example Sentences)
example_sentence = "오늘 주식시장은 긍정적인 반응을 보였습니다."
predicted_label = predict_review(example_sentence, nb, tfidf, label_encoder)
print(f"Predicted label: {predicted_label}")

Predicted label: positive


In [3]:
# 결과 저장 (Saving the Results)
data['predict_labels'] = data['kor_sentence'].apply(lambda x: predict_review(x, nb, tfidf, label_encoder))
data.to_csv('finance_data_with_nb_predictions.csv', index=False)