In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score



In [2]:
iris = datasets.load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [3]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)


In [4]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='macro')
rec = recall_score(y_test, y_pred, average='macro')

print(f"üìä Accuracy (–î”ô–ª–¥—ñ–∫):  {acc:.4f}")
print(f"üéØ Precision (–ù–∞“õ—Ç—ã–ª—ã“õ): {prec:.4f}")
print(f"üì° Recall (–¢–æ–ª—ã–º–¥—ã–ª—ã“õ):  {rec:.4f}")



üìä Accuracy (–î”ô–ª–¥—ñ–∫):  1.0000
üéØ Precision (–ù–∞“õ—Ç—ã–ª—ã“õ): 1.0000
üì° Recall (–¢–æ–ª—ã–º–¥—ã–ª—ã“õ):  1.0000


In [6]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline



In [7]:
categories = ['sci.space', 'sci.med']
data = fetch_20newsgroups(subset='all', categories=categories, shuffle=True, random_state=42)

print(f"üìù –ñ“Ø–∫—Ç–µ–ª–≥–µ–Ω –º”ô—Ç—ñ–Ω–¥–µ—Ä —Å–∞–Ω—ã: {len(data.data)}")
print(f"–¢–∞“õ—ã—Ä—ã–ø—Ç–∞—Ä: {data.target_names}")

# –î–µ—Ä–µ–∫—Ç–µ—Ä–¥—ñ –±”©–ª—É
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)



üìù –ñ“Ø–∫—Ç–µ–ª–≥–µ–Ω –º”ô—Ç—ñ–Ω–¥–µ—Ä —Å–∞–Ω—ã: 1977
–¢–∞“õ—ã—Ä—ã–ø—Ç–∞—Ä: ['sci.med', 'sci.space']


In [9]:
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)



In [10]:
acc = accuracy_score(y_test, y_pred)
print(f"\n‚úÖ –ú”ô—Ç—ñ–Ω–¥—ñ–∫ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—è Accuracy: {acc:.4f}")

# –ú—ã—Å–∞–ª —Ç–µ–∫—Å–µ—Ä—É
test_sms = [
    "Doctors found a cure for the virus",   # –ú–µ–¥–∏—Ü–∏–Ω–∞
    "NASA landed a rover on Mars"           # “í–∞—Ä—ã—à
]
pred_sms = model.predict(test_sms)
for text, label in zip(test_sms, pred_sms):
    print(f"–ú”ô—Ç—ñ–Ω: '{text}' -> –ö–ª–∞—Å—Å: {data.target_names[label]}")


‚úÖ –ú”ô—Ç—ñ–Ω–¥—ñ–∫ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—è Accuracy: 0.9773
–ú”ô—Ç—ñ–Ω: 'Doctors found a cure for the virus' -> –ö–ª–∞—Å—Å: sci.med
–ú”ô—Ç—ñ–Ω: 'NASA landed a rover on Mars' -> –ö–ª–∞—Å—Å: sci.space


In [11]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

print("\n" + "="*40)
print("‚öîÔ∏è –ú–û–î–ï–õ–¨–î–ï–† –°–ê–õ–´–°–¢–´–†–£–´ (–ú”ô—Ç—ñ–Ω “Ø—à—ñ–Ω)")
print("="*40)

# –í–µ–∫—Ç–æ—Ä–∏–∑–∞—Ü–∏—è–Ω—ã –±”©–ª–µ–∫ –∂–∞—Å–∞–ø –∞–ª–∞–º—ã–∑ (–±–∞—Ä–ª—ã“õ –º–æ–¥–µ–ª—å–¥–µ—Ä–≥–µ –¥–∞–π—ã–Ω –±–æ–ª—É “Ø—à—ñ–Ω)
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# –ú–æ–¥–µ–ª—å–¥–µ—Ä —Ç—ñ–∑—ñ–º—ñ
models = {
    "Naive Bayes (Multinomial)": MultinomialNB(),
    "K-Nearest Neighbors (KNN)": KNeighborsClassifier(n_neighbors=5),
    "Decision Tree": DecisionTreeClassifier(max_depth=10)
}

for name, model in models.items():
    model.fit(X_train_vec, y_train)
    acc = accuracy_score(y_test, model.predict(X_test_vec))
    print(f"{name:30} | Accuracy: {acc:.4f}")


‚öîÔ∏è –ú–û–î–ï–õ–¨–î–ï–† –°–ê–õ–´–°–¢–´–†–£–´ (–ú”ô—Ç—ñ–Ω “Ø—à—ñ–Ω)
Naive Bayes (Multinomial)      | Accuracy: 0.9773
K-Nearest Neighbors (KNN)      | Accuracy: 0.9596
Decision Tree                  | Accuracy: 0.8965
