In [None]:
import numpy as np

class NaiveBayesClassifier:
    def __init__(self):
        self.class_probs = None
        self.word_probs = None
        self.classes = None
        self.vocabulary = None

    def fit(self, X, y):
        self.classes, class_counts = np.unique(y, return_counts=True)
        num_classes = len(self.classes)
        num_documents = len(y)

        #          
        self.class_probs = class_counts / num_documents

        #      
        self.vocabulary = set(word for doc in X for word in doc.split())

        #          
        self.word_probs = {word: np.zeros(num_classes) for word in self.vocabulary}

        #              
        for i, cls in enumerate(self.classes):
            #           
            class_docs = X[y == cls]
            #           
            for word in self.vocabulary:
                #                   
                word_count = sum(doc.split().count(word) for doc in class_docs)
                #       
                self.word_probs[word][i] = (word_count + 1) / (len(class_docs) + len(self.vocabulary))

    def predict(self, X):
        predictions = []
        for doc in X:
            #      
            probs = np.log(self.class_probs)
            for word in doc.split():
                if word in self.vocabulary:
                    #              
                    probs += np.log(self.word_probs[word])
            #                
            predicted_class = self.classes[np.argmax(probs)]
            predictions.append(predicted_class)
        return predictions

#     
X_train = ["  1", "  2", "  3", "  4"]
y_train = ["  1", "  2", "  1", "  2"]
X_test = ["  5", "  6"]

#              
classifier = NaiveBayesClassifier()
classifier.fit(X_train, y_train)

#     
y_pred = classifier.predict(X_test)

#       
print("    :", y_pred)


In [None]:
import requests
import json

def get_word_definition(word):
    #              API     
    app_key = 'YOUR_APP_KEY'
    app_secret = 'YOUR_APP_SECRET'

    #        API
    url = 'https://openapi.youdao.com/api'
    params = {
        'q': word,
        'from': 'auto',
        'to': 'auto',
        'appKey': app_key,
        'salt': 'random_salt',
        'sign': '',  #             ，     
    }

    #     
    sign_str = app_key + word + 'random_salt' + app_secret
    params['sign'] = hashlib.md5(sign_str.encode('utf-8')).hexdigest()

    #     
    response = requests.get(url, params=params)
    result = response.json()

    #     
    if result['errorCode'] == '0':
        if 'basic' in result:
            for explanation in result['basic']['explains']:
                print(explanation)
        elif 'translation' in result:
            for translation in result['translation']:
                print(translation)
    else:
        print('    ，    ：', result['errorCode'])

if __name__ == '__main__':
    word = "sequences"
    get_word_definition(word)


: 

In [None]:
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

#          ，            
documents = ["Buffer in overflows in PL/SQL module ","in the conteudo parameter."]
labels = ["1"]

# 1.      
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d', '', text)
    return text.lower()

#           
documents = [preprocess_text(doc) for doc in documents]

# 2.   TF-IDF      
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(documents)
# 
print(X)
tfidf_matrix = vectorizer.fit_transform(documents)

word2tfidf = dict(zip(vectorizer.get_feature_names_out(), tfidf_matrix.toarray()[0]))
# print(word2tfidf)
#  ,   TF-IDF       
sorted_word2tfidf = sorted(word2tfidf.items(), key=lambda x: x[1], reverse=True)
print(sorted_word2tfidf)

word2tfidf = dict(zip(vectorizer.get_feature_names_out(), tfidf_matrix.toarray()[1]))
# print(word2tfidf)
#  ,   TF-IDF       
sorted_word2tfidf = sorted(word2tfidf.items(), key=lambda x: x[1], reverse=True)
print(sorted_word2tfidf)

# 3.      
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# 4.          
naive_bayes_classifier = MultinomialNB()
naive_bayes_classifier.fit(X_train, y_train)


# 5.   
y_pred = naive_bayes_classifier.predict(X_test)














# 6.     
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

#           
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

#          ，            
documents = ["Buffer buffer in overflows in PL/SQL module ","in the conteudo parameter."]
# labels = ["  1", "  2", ..., "  N"]

# 1.      
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\d', '', text)
    return text.lower()

#           
documents = [preprocess_text(doc) for doc in documents]

# 2.   TF-IDF      
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(documents)




# 3.  TF-IDF       
feature_names = vectorizer.get_feature_names_out()

#                TF-IDF  
tfidf_values = X.toarray()

#       ，         TF-IDF       
sorted_indices = np.argsort(-tfidf_values, axis=1)

#              TF-IDF       
for i, indices in enumerate(sorted_indices):
    doc = documents[i]
    print(f"  {i + 1}       TF-IDF   ：")
    for j in indices:
        word = feature_names[j]
        tfidf = tfidf_values[i, j]
        print(f"{word}: {tfidf}")
    print("\n")



# # 3.      
# X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# # 4.          
# naive_bayes_classifier = MultinomialNB()
# naive_bayes_classifier.fit(X_train, y_train)

# # 5.   
# y_pred = naive_bayes_classifier.predict(X_test)

# # 6.     
# accuracy = accuracy_score(y_test, y_pred)
# print("Accuracy:", accuracy)

# #           
# print("Classification Report:\n", classification_report(y_test, y_pred))


: 