In [None]:
!pip install langchain
!pip install chromadb
!pip install sentence-transformers

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from langchain_community.embeddings import HuggingFaceEmbeddings
import numpy as np

# Initialize the embeddings model
model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

# Function to read textbook content
def read_textbook(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Preprocess and segment the textbook content
def preprocess_and_segment(text, segment_size=200):
    # Simple example of segmentation by splitting into segments based on size
    # More sophisticated segmentation can be used based on chapter titles or natural breaks in the text
    return [text[i:i+segment_size] for i in range(0, len(text), segment_size)]

# Calculate similarity
def calculate_similarity(text_embeddings, objectives_embeddings):
    similarity_scores = cosine_similarity(text_embeddings, objectives_embeddings)
    return similarity_scores

# Read and preprocess textbook content
textbook_path = 'path_to_your_textbook.txt'  # Update this path
textbook_content = read_textbook(textbook_path)
segments = preprocess_and_segment(textbook_content)

# Generate embeddings for each segment
segments_embeddings = embeddings.get_embeddings(segments)

# Learning objectives
learning_objectives = [
    "Understand the basics of quantum mechanics",
    "Explore the principles of relativity",
    # Add more objectives as needed
]

# Generate embeddings for the learning objectives
objectives_embeddings = embeddings.get_embeddings(learning_objectives)

# Calculate similarity scores between each segment and the learning objectives
similarity_scores = calculate_similarity(np.array(segments_embeddings), np.array(objectives_embeddings))

# Process and print the most relevant segments for each learning objective
for obj_index, objective in enumerate(learning_objectives):
    print(f"Objective: {objective}")
    objective_scores = similarity_scores[:, obj_index]
    most_relevant_segment_index = np.argmax(objective_scores)
    print(f"Most relevant segment: {segments[most_relevant_segment_index][:150]}... [Score: {objective_scores[most_relevant_segment_index]}]\n")
