In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def query_aware_chunks_simple(document, query, max_words=10):
    # Simple chunking
    words = document.split()
    chunks = [" ".join(words[i:i+max_words]) for i in range(0, len(words), max_words)]
    
    # TF-IDF vectors
    vectorizer = TfidfVectorizer()
    chunk_vectors = vectorizer.fit_transform(chunks)
    query_vector = vectorizer.transform([query])
    
    # Cosine similarity
    scores = cosine_similarity(query_vector, chunk_vectors)[0]
    
    # Select the most relevant chunk
    best_idx = scores.argmax()
    return chunks[best_idx]


# Example
doc = "AI is transforming industries. Machine learning is core. Deep learning is used in vision and speech recognition."
query = "core part of AI" 

best_chunk = query_aware_chunks_simple(doc, query)
print("Most Relevant Chunk:", best_chunk)


Most Relevant Chunk: AI is transforming industries. Machine learning is core. Deep learning
