In [1]:
import spacy
from spacy.lang.tr.stop_words import STOP_WORDS

import nltk
from nltk.tokenize import PunktSentenceTokenizer

In [2]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
knowledge = """
Natural Language Processing (NLP) is a field of artificial intelligence that focuses on the interaction between computers and humans using natural language. It involves the development of algorithms and models to enable machines to understand, interpret, and generate human-like text.

NLP tasks include text classification, named entity recognition, sentiment analysis, machine translation, and question answering. Techniques like tokenization, stemming, and lemmatization are commonly used in NLP to process and analyze text.

spaCy is a popular Python library for NLP tasks, providing tools for text processing and analysis. It includes various models for part-of-speech tagging, parsing, and more.

To build an NLP model, you can use techniques like Bag of Words (BoW) or Word Embeddings. BoW represents text as a vector of word frequencies, while Word Embeddings capture semantic relationships between words.

Feel free to ask questions about NLP, spaCy, or any related topics!
"""

In [4]:
nlp = spacy.load('tr_core_web_sm')

# Tokenize and remove stopwords
def preprocess_text(text):
    doc = nlp(text.lower())
    filtered_words = [token.text for token in doc if token.is_alpha and token.text not in STOP_WORDS]
    return ' '.join(filtered_words)

knowledge_base = preprocess_text(knowledge)

In [5]:
# Use NLTK's Punkt tokenizer for sentence tokenization
sentence_tokenizer = PunktSentenceTokenizer()
sentences = sentence_tokenizer.tokenize(knowledge_base)

In [6]:
# Vectorize using Bag of Words (BoW) model
vectorizer = CountVectorizer()
knowledge_vector = vectorizer.fit_transform([knowledge_base])

In [7]:
# Define a function to answer questions
def answer_question(question):
    # Preprocess and vectorize the question
    question = preprocess_text(question)
    question_vector = vectorizer.transform([question])

    # Calculate cosine similarity between question and knowledge base
    similarity = cosine_similarity(question_vector, knowledge_vector)

    # Find the most similar sentence in the knowledge base
    most_similar_sentence_index = similarity.argmax()
    answer = sentences[most_similar_sentence_index]

    return answer.strip()

In [8]:
# Test the model
question = "What is spaCy?"
print(f"Question: {question}")
print("Answer:", answer_question(question))

question = "Tell me about NLP tasks."
print(f"\nQuestion: {question}")
print("Answer:", answer_question(question))

Question: What is spaCy?
Answer: natural language processing nlp field artificial intelligence focuses interaction computers humans natural language involves development algorithms models enable machines understand interpret generate human like text nlp tasks include text classification named entity recognition sentiment analysis machine translation question answering techniques like tokenization stemming lemmatization commonly nlp process analyze text spacy popular python library nlp tasks providing tools text processing analysis includes models speech tagging parsing build nlp model use techniques like bag words bow word embeddings bow represents text vector word frequencies word embeddings capture semantic relationships words feel free ask questions nlp spacy related topics

Question: Tell me about NLP tasks.
Answer: natural language processing nlp field artificial intelligence focuses interaction computers humans natural language involves development algorithms models enable machin