In [60]:
import os
import torch
from sentence_transformers import SentenceTransformer, util
import os
import pickle
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from transformers import pipeline

In [61]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [62]:
print("\n Loading Sentence-BERT model...")
model = SentenceTransformer("all-MiniLM-L6-v2")
print("Loaded Model...")

print("Loading Generated Questions...")
generated_qs_path = "data/intermediate/generated_questions.txt"
if os.path.exists(generated_qs_path):
    with open(generated_qs_path, "r", encoding="utf-8") as f:
        generated_questions = [line.strip() for line in f if line.strip()]
    generated_embeddings = model.encode(generated_questions, convert_to_tensor=True)
    print("Loaded Generated Questions.")
else:
    print("'generated_questions.txt' not found!")
    exit()


print("Loading Courses and Institutions Data")
with open("data/processed/data.pkl", "rb") as f:
    df = pickle.load(f)


df_unique = df.drop_duplicates(subset=["reviews"]).reset_index(drop=True)
print("Loaded courses and institutions data.")

print("Loading Intent Classifier Model..")
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
print("Loaded Intent Classifier Model")


 Loading Sentence-BERT model...
Loaded Model...
Loading Generated Questions...
Loaded Generated Questions.
Loading Courses and Institutions Data
Loaded courses and institutions data.
Loading Intent Classifier Model..


Device set to use mps:0


Loaded Intent Classifier Model


## Ask User to select a institution and Course

In [63]:
# --- Ask user to select an institution ---
institutions = sorted(df_unique["institution"].dropna().unique())
print("\n🏫 Available Institutions:")
for idx, inst in enumerate(institutions):
    print(f"{idx + 1}. {inst}")

inst_idx = int(input("\n👉 Select an institution by number: ")) - 1
selected_institution = institutions[inst_idx]
print(f"\n✅ You selected: {selected_institution}")

# --- Filter courses ---
inst_courses = sorted(df_unique[df_unique["institution"] == selected_institution]["name"].dropna().unique())
print("\n📚 Courses in this institution:")
for idx, course in enumerate(inst_courses):
    print(f"{idx + 1}. {course}")

course_idx = int(input("\n👉 Select a course by number: ")) - 1
selected_course = inst_courses[course_idx]
print(f"\n✅ You selected course: {selected_course}")


🏫 Available Institutions:
1. (ISC)²
2. Alberta Machine Intelligence Institute
3. Amazon Web Services
4. Arizona State University
5. Atlassian
6. Autodesk
7. BCG
8. Berklee College of Music
9. Birkbeck, University of London
10. California Institute of the Arts
11. Case Western Reserve University
12. Columbia University
13. Copenhagen Business School
14. DeepLearning.AI
15. Duke University
16. ESSEC Business School
17. Eindhoven University of Technology
18. Emory University
19. Erasmus University Rotterdam
20. Facebook
21. Georgia Institute of Technology
22. GitLab
23. Google
24. Google - Spectrum Sharing
25. Google Cloud
26. HEC Paris
27. Hebrew University of Jerusalem
28. IBM
29. IE School of Architecture & Design
30. Imperial College London
31. JetBrains
32. Johns Hopkins University
33. London Business School
34. Ludwig-Maximilians-Universität München (LMU)
35. Macquarie University
36. Nanyang Technological University, Singapore
37. New York University
38. Northwestern University
39.

## Evaluationg Models

In [64]:
course_review_questions = [
    "How was the instructor’s teaching?",
    "Was the course content clear?",
    "How effective were the assignments and projects?",
    "Would you recommend it?",
     "Were videos easy to follow?",
    "How well did the instructor explain complex topics?",
    "Were the assignments helpful for practice?",
    "Was the course organized and easy to navigate?",
    "Did you find the learning platform user-friendly?",
    "What improvements would you suggest for this course?"
]

non_course_review_questions = [
    "What’s your hobby?",
    "Do you like music?",
    "How are you?",
    "Beach or mountains?",
    "What do you enjoy doing in your free time?",
    "Have you traveled anywhere interesting recently?",
    "What’s your favorite way to relax after studying or working?",
    "Is there a skill you'd love to master one day?",
    "Do you prefer reading books or watching shows?",
    "Favorite movie?"
]

# Combine questions and assign labels
all_questions = course_review_questions + non_course_review_questions
true_labels = [1]*len(course_review_questions) + [0]*len(non_course_review_questions)

# Generate embeddings for reference course-related questions
reference_embeddings = generated_embeddings.cpu()

# Store predictions and scores
predicted_labels = []
cosine_scores_list = []

print("\n--- Evaluation Results ---\n")

# Calculate cosine similarity for each question
for i, question in enumerate(all_questions):
    query_embedding = model.encode(question, convert_to_tensor=True).cpu()
    cosine_scores = util.pytorch_cos_sim(query_embedding, reference_embeddings)
    max_score = torch.max(cosine_scores).item()
    cosine_scores_list.append(max_score)

# Evaluate different thresholds and calculate accuracy
thresholds = np.arange(0.5, 1.1, 0.1)
accuracy_results = []

for threshold in thresholds:
    # Predict labels based on cosine similarity threshold
    predictions = (np.array(cosine_scores_list) >= threshold).astype(int)
    
    # Calculate accuracy
    accuracy = (predictions == true_labels).mean()
    accuracy_results.append((threshold, accuracy))

# Create a DataFrame for displaying results
accuracy_df = pd.DataFrame(accuracy_results, columns=["Threshold", "Accuracy"])

# Find the best threshold and its accuracy
best_threshold = accuracy_df.loc[accuracy_df["Accuracy"].idxmax()]

# Display results in a table format
print("\nThresholds and their corresponding accuracies:")
print(accuracy_df)

print(f"\nBest Threshold: {best_threshold['Threshold']:.2f} with Accuracy: {best_threshold['Accuracy']*100:.2f}%")

print("Course Related Questions:")
for question in course_review_questions[-5:]:
    print(f" - {question}")
print()
print(" - ...")

print("Non Course Related Questions:")
for question in non_course_review_questions[-5:]:
    print(f" - {question}")
print(" - ...")


# Display the last 3 questions from each category and cosine similarity
print("Output of model from Course Related questions:")
for question in course_review_questions[:3]:
    query_embedding = model.encode(question, convert_to_tensor=True).cpu()
    cosine_scores = util.pytorch_cos_sim(query_embedding, reference_embeddings)
    max_score = torch.max(cosine_scores).item()
    predicted_label = 1 if max_score >= best_threshold['Threshold'] else 0
    related = "Related" if predicted_label == 1 else "Not Related"
    print(f"Question: {question}")
    print(f"Cosine Similarity: {max_score:.4f}")
    print(f"Prediction: {related}")
    print()

print("Output of model from Non-Course Related questions:")
for question in non_course_review_questions[:3]:
    query_embedding = model.encode(question, convert_to_tensor=True).cpu()
    cosine_scores = util.pytorch_cos_sim(query_embedding, reference_embeddings)
    max_score = torch.max(cosine_scores).item()
    predicted_label = 1 if max_score >= best_threshold['Threshold'] else 0
    related = "Related" if predicted_label == 1 else "Not Related"
    print(f"Question: {question}")
    print(f"Cosine Similarity: {max_score:.4f}")
    print(f"Prediction: {related}")
    print()


--- Evaluation Results ---


Thresholds and their corresponding accuracies:
   Threshold  Accuracy
0        0.5      0.55
1        0.6      0.70
2        0.7      0.90
3        0.8      0.70
4        0.9      0.50
5        1.0      0.50
6        1.1      0.50

Best Threshold: 0.70 with Accuracy: 90.00%
Course Related Questions:
 - How well did the instructor explain complex topics?
 - Were the assignments helpful for practice?
 - Was the course organized and easy to navigate?
 - Did you find the learning platform user-friendly?
 - What improvements would you suggest for this course?

 - ...
Non Course Related Questions:
 - Have you traveled anywhere interesting recently?
 - What’s your favorite way to relax after studying or working?
 - Is there a skill you'd love to master one day?
 - Do you prefer reading books or watching shows?
 - Favorite movie?
 - ...
Output of model from Course Related questions:
Question: How was the instructor’s teaching?
Cosine Similarity: 0.8680
Prediction:

## Intent Classification

In [66]:
intent_actions = {
    "yes_no": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    },
    "instructor": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    },
    "content": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    },
    "difficulty": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    },
    "career": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    },
    "general_opinion": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    },
    "course_overview": {
        "sentiment": False,
        "nlg": False,
        "summarization": True
    },
    "prerequisites": {
        "sentiment": False,
        "nlg": False,
        "summarization": True
    },
    "schedule": {
        "sentiment": False,
        "nlg": False,
        "summarization": True
    },
    "fees": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    },
    "certification": {
        "sentiment": True,
        "nlg": True,
        "summarization": False
    }
}

print(f"{'Intent Type':<20} {'Sentiment':<10} {'NLG':<10} {'Summarization':<15}")
print("-" * 55)
for intent, actions in intent_actions.items():
    print(f"{intent:<20} {str(actions['sentiment']):<10} {str(actions['nlg']):<10} {str(actions['summarization']):<15}")


intent_labels_readable = [
    "Is the user asking a yes or no question?",
    "Is the user asking about the instructor?",
    "Is the user asking about the course content or topics?",
    "Is the user asking about how difficult the course is?",
    "Is the user asking about career outcomes or job relevance?",
    "Is the user asking for general opinions from students?",
    "Is the user asking for a summary or overview of the course?",
    "Is the user asking about course prerequisites?",
    "Is the user asking about the course schedule or duration?",
    "Is the user asking about course fees or costs?",
    "Is the user asking about course certification or accreditation?"
]

label_map = {
    "Is the user asking a yes or no question?": "yes_no",
    "Is the user asking about the instructor?": "instructor",
    "Is the user asking about the course content or topics?": "content",
    "Is the user asking about how difficult the course is?": "difficulty",
    "Is the user asking about career outcomes or job relevance?": "career",
    "Is the user asking for general opinions from students?": "general_opinion",
    "Is the user asking for a summary or overview of the course?": "course_overview",
    "Is the user asking about course prerequisites?": "prerequisites",
    "Is the user asking about the course schedule or duration?": "schedule",
    "Is the user asking about course fees or costs?": "fees",
    "Is the user asking about course certification or accreditation?": "certification"
}

Intent Type          Sentiment  NLG        Summarization  
-------------------------------------------------------
yes_no               True       True       False          
instructor           True       True       False          
content              True       True       False          
difficulty           True       True       False          
career               True       True       False          
general_opinion      True       True       False          
course_overview      False      False      True           
prerequisites        False      False      True           
schedule             False      False      True           
fees                 True       True       False          
certification        True       True       False          


In [67]:
query = input("Enter your question related to the course: ")

# Encode the query
query_embedding = model.encode(query, convert_to_tensor=True).cpu()

# Compute cosine similarity with reference (course-related) embeddings
reference_embeddings = generated_embeddings.cpu()
cosine_scores = util.pytorch_cos_sim(query_embedding, reference_embeddings)
max_score = torch.max(cosine_scores).item()

# Set threshold
threshold = 0.7
print("Cosine distance of the question: ", max_score)
is_related = max_score >= threshold

if is_related:
    print("Related to the course reviews. Proceeding...")
    print("Question:", query)
    
    result = classifier(query, candidate_labels=intent_labels_readable, multi_label=False)
    # Print scores for each label
    print("Intent Scores:")
    for label, score in zip(result["labels"], result["scores"]):
        print(f"{label:<60} {score:.4f}")

    # Get top intent and its score
    predicted_label = result["labels"][0]
    predicted_score = result["scores"][0]
    predicted_intent = label_map[predicted_label]
    actions = intent_actions[predicted_intent]

    print(f"\nBest Intent: {predicted_intent} (Score: {predicted_score:.4f})")

    if actions["sentiment"]:
        print("Perform sentiment analysis")

    if actions["nlg"]:
        print("Generate answer using NLG")
    else:
        print("Summarize relevant content instead of generating an answer")

else:
    print(f"Question is not related to the course (Similarity Score: {max_score:.2f}). Please enter a course-related question.")


Cosine distance of the question:  0.8560590744018555
Related to the course reviews. Proceeding...
Question: Can you give me an overview of the course?
Intent Scores:
Is the user asking for a summary or overview of the course?  0.7674
Is the user asking about the course content or topics?       0.1231
Is the user asking a yes or no question?                     0.0227
Is the user asking for general opinions from students?       0.0214
Is the user asking about the course schedule or duration?    0.0194
Is the user asking about course prerequisites?               0.0119
Is the user asking about course certification or accreditation? 0.0116
Is the user asking about career outcomes or job relevance?   0.0099
Is the user asking about the instructor?                     0.0065
Is the user asking about course fees or costs?               0.0051
Is the user asking about how difficult the course is?        0.0011

Best Intent: course_overview (Score: 0.7674)
Summarize relevant content instead of

In [68]:
questions = [
    "Is this course suitable for beginners?",
    "How experienced is the instructor?",
    "What topics are covered in this course?",
    "Is this course difficult?",
    "Will this course help me in my career?",
    "What do students think about this course?",
    "Can you give me an overview of the course?",
    "What are the prerequisites for this course?",
    "What is the course schedule?",
    "How much does the course cost?",
    "Will I receive a certificate after completing the course?"
]

true_intents = [
    "yes_no",
    "instructor",
    "content",
    "difficulty",
    "career",
    "general_opinion",
    "course_overview",
    "prerequisites",
    "schedule",
    "fees",
    "certification"
]

correct_predictions = 0
total_questions = len(questions)
mispredicted_questions = []

for i, question in enumerate(questions):
    print(f"\nQuestion: {question}")
    result = classifier(question, candidate_labels=intent_labels_readable, multi_label=False)
    predicted_label = result["labels"][0]
    predicted_intent = label_map[predicted_label]
    print(f"Predicted Intent: {predicted_intent}")
    if predicted_intent == true_intents[i]:
        correct_predictions += 1
    else:
        mispredicted_questions.append((question, predicted_intent, true_intents[i]))

accuracy = (correct_predictions / total_questions) * 100
print(f"\nAccuracy: {accuracy:.2f}%")

if mispredicted_questions:
    print("\nMispredicted Questions:")
    for question, predicted, true in mispredicted_questions:
        print(f"Question: {question}\nPredicted: {predicted}, Correct: {true}\n")



Question: Is this course suitable for beginners?
Predicted Intent: general_opinion

Question: How experienced is the instructor?
Predicted Intent: instructor

Question: What topics are covered in this course?
Predicted Intent: content

Question: Is this course difficult?
Predicted Intent: difficulty

Question: Will this course help me in my career?
Predicted Intent: career

Question: What do students think about this course?
Predicted Intent: general_opinion

Question: Can you give me an overview of the course?
Predicted Intent: course_overview

Question: What are the prerequisites for this course?
Predicted Intent: prerequisites

Question: What is the course schedule?
Predicted Intent: content

Question: How much does the course cost?
Predicted Intent: fees

Question: Will I receive a certificate after completing the course?
Predicted Intent: certification

Accuracy: 81.82%

Mispredicted Questions:
Question: Is this course suitable for beginners?
Predicted: general_opinion, Correct: 

In [69]:
print("\n📄 Fetching reviews for the selected course...")
# Filter reviews for the selected course and institution
filtered_reviews = df_unique[ (df_unique["institution"] == selected_institution) & (df_unique["name"] == selected_course) ]["reviews"].dropna().tolist()

if not filtered_reviews: 
    print("No reviews found for the selected course.") 
    exit()
else:
    print("Total Reviews for the course:", len(filtered_reviews))

# Encode reviews using Sentence-BERT
print("Encoding reviews using Sentence-BERT...") 
review_embeddings = model.encode(filtered_reviews, convert_to_tensor=True).cpu()
print("Encoded all reviews.")

# Compute cosine similarity between query and each review
cosine_scores = util.pytorch_cos_sim(query_embedding, review_embeddings)[0]

# Filter reviews with similarity ≥ 0.5
threshold = 0.5
related_reviews = [
    (filtered_reviews[i], score.item())
    for i, score in enumerate(cosine_scores) if score >= threshold
]

print("Question: ", query)
print("Related Sentences Count: ", len(related_reviews))
if related_reviews:
    print(f"\nReviews related to the query (threshold ≥ {threshold}): \n")
    for review, score in sorted(related_reviews, key=lambda x: x[1], reverse=True):
        print(f"- Score: {score:.4f} | Review: {review}")
else:
    print(f"\nNo reviews found with similarity ≥ {threshold}.")


📄 Fetching reviews for the selected course...
Total Reviews for the course: 308
Encoding reviews using Sentence-BERT...
Encoded all reviews.
Question:  Can you give me an overview of the course?
Related Sentences Count:  126

Reviews related to the query (threshold ≥ 0.5): 

- Score: 0.7471 | Review: Good overview course. 
- Score: 0.7326 | Review: This course is brilliant. Organised, well paced and has really consolidated the direction of study I would like to pursue. Thank you!
- Score: 0.7266 | Review: This course is very informative and inspiring
- Score: 0.7159 | Review: This was  one of the best courses I've taken so far. Thanks for providing this great material and information to students worldwide.
- Score: 0.7138 | Review: This is a very informative course, highly recommend!
- Score: 0.7098 | Review: It is a very interesting course. The lecture is easy to understand and the lecturer is very friendly. I got a lot of interesting and useful knowledge not only for my jobs but als

In [70]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import pandas as pd

MODEL = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL)

def polarity_scores_roberta(example):
    encoded_text = tokenizer(example, return_tensors='pt', truncation=True, max_length=512)
    try:
        output = sentiment_model(**encoded_text)
    except IndexError as e:
        print(f"Error processing input: {e}")
        return {"error": "index_error", "details": str(e)}
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    return {
        'negative': scores[0],
        'neutral': scores[1],
        'positive': scores[2]
    }

In [71]:
sentiment_results = []
if actions["sentiment"]:
    print("Performing sentiment analysis...")

    for review, score in sorted(related_reviews, key=lambda x: x[1], reverse=True):
        sentiment = polarity_scores_roberta(review)
        sentiment_results.append({
            "review": review,
            "similarity_score": score,
            "positive": sentiment["positive"],
            "neutral": sentiment["neutral"],
            "negative": sentiment["negative"]
        })

        print(f"- Score: {score:.4f} | Review: {review}")
        print(f"  Sentiment → Positive: {sentiment['positive']:.3f}, Neutral: {sentiment['neutral']:.3f}, Negative: {sentiment['negative']:.3f}\n")



In [72]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import tqdm

model_name = "t5-small"
tokenizer_t5 = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def analyze_sentiment(text, max_length=512):
    """Analyze sentiment of text using RoBERTa."""
    # Handle texts that are too long
    if len(text.split()) > max_length:
        # Simple truncation for demonstration purposes
        text = " ".join(text.split()[:max_length])
        
    # Encode text
    encoded_input = tokenizer(text, return_tensors='pt', truncation=True, max_length=max_length)
    
    # Get sentiment prediction
    with torch.no_grad():
        output = sentiment_model(**encoded_input)
        scores = output.logits[0].softmax(0)
        
    # RoBERTa sentiment model typically returns: [negative, neutral, positive]
    sentiment_labels = ['negative', 'neutral', 'positive']
    sentiment_scores = {label: float(score) for label, score in zip(sentiment_labels, scores)}
    
    # Get the predicted sentiment
    predicted_sentiment = sentiment_labels[scores.argmax().item()]
    
    return {
        'sentiment': predicted_sentiment,
        'scores': sentiment_scores
    }

def chunk_text(text, max_tokens=450):
    """Split the text into manageable chunks for the model."""
    words = text.split()
    for i in range(0, len(words), max_tokens):
        yield " ".join(words[i:i + max_tokens])

def summarize_with_t5(text, sentiment_info=None):
    """Abstractive summarization using T5 with sentiment guidance."""
    # Add sentiment information to prompt if available
    if sentiment_info:
        prompt_prefix = f"summarize this {sentiment_info['sentiment']} review: "
    else:
        prompt_prefix = "summarize: "
        
    input_text = prompt_prefix + text
    
    input_ids = tokenizer_t5.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    
    # Adjust generation parameters based on sentiment
    length_penalty = 2.0
    num_beams = 4
    
    if sentiment_info:
        # Example: more detailed summaries for positive reviews
        if sentiment_info['sentiment'] == 'positive':
            length_penalty = 2.5  # Encourage slightly longer summaries for positive reviews
        # More concise summaries for negative reviews
        elif sentiment_info['sentiment'] == 'negative':
            length_penalty = 1.5  # Slightly shorter summaries for negative reviews
    
    summary_ids = model.generate(
        input_ids, 
        max_length=100, 
        min_length=30, 
        length_penalty=length_penalty,
        num_beams=num_beams, 
        early_stopping=True
    )
    
    return tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)

def summarize_in_chunks(text, sentiment_info=None):
    """Summarize large texts by dividing and merging summaries, with sentiment guidance."""
    chunks = list(chunk_text(text))
    partial_summaries = []
    print(f"Processing {len(chunks)} chunks")
    
    for chunk in chunks:
        summary = summarize_with_t5(chunk, sentiment_info)
        partial_summaries.append(summary)
        
    # Merge partial summaries and summarize again if needed
    combined_summary = " ".join(partial_summaries)
    
    # Optionally do a final summarization
    if len(combined_summary.split()) > 400:
        return summarize_with_t5(combined_summary, sentiment_info)
    else:
        return combined_summary

def process_reviews(reviews):
    if not reviews:
        return "Empty Reviews"

    # Analyze sentiment for each review
    sentiment_results = []
    print("Analyzing sentiment of reviews...")
    for review in reviews:
        if review and len(review.strip()) > 0:
            sentiment = analyze_sentiment(review)
            sentiment_results.append(sentiment)
            print(f"\n📄 Review: {review}")
            print(f"🔍 Predicted Sentiment: {sentiment['sentiment'].upper()}")
    
    # Calculate overall sentiment distribution
    sentiment_counts = {
        'positive': sum(1 for res in sentiment_results if res['sentiment'] == 'positive'),
        'neutral': sum(1 for res in sentiment_results if res['sentiment'] == 'neutral'),
        'negative': sum(1 for res in sentiment_results if res['sentiment'] == 'negative')
    }
    
    total_reviews = len(sentiment_results)
    sentiment_distribution = {
        label: count / total_reviews if total_reviews > 0 else 0 
        for label, count in sentiment_counts.items()
    }
    
    # Determine overall sentiment
    overall_sentiment = max(sentiment_distribution, key=sentiment_distribution.get)
    
    print(f"\nSentiment Analysis Results:")
    print(f"Positive: {sentiment_distribution['positive']:.2%}")
    print(f"Neutral: {sentiment_distribution['neutral']:.2%}")
    print(f"Negative: {sentiment_distribution['negative']:.2%}")
    print(f"Overall sentiment: {overall_sentiment}")
    
    # Group reviews by sentiment
    positive_reviews = [reviews[i] for i, res in enumerate(sentiment_results) if res['sentiment'] == 'positive']
    neutral_reviews = [reviews[i] for i, res in enumerate(sentiment_results) if res['sentiment'] == 'neutral']
    negative_reviews = [reviews[i] for i, res in enumerate(sentiment_results) if res['sentiment'] == 'negative']
    
    # Create sentiment information for summarization
    sentiment_info = {
        'sentiment': overall_sentiment,
        'distribution': sentiment_distribution
    }
    
    # Summarize each sentiment group
    results = {}
    
    if positive_reviews:
        print("\nSummarizing positive reviews...")
        positive_text = " ".join(positive_reviews)
        results['positive'] = summarize_in_chunks(positive_text, {'sentiment': 'positive'})
    
    if neutral_reviews:
        print("\nSummarizing neutral reviews...")
        neutral_text = " ".join(neutral_reviews)
        results['neutral'] = summarize_in_chunks(neutral_text, {'sentiment': 'neutral'})
    
    if negative_reviews:
        print("\nSummarizing negative reviews...")
        negative_text = " ".join(negative_reviews)
        results['negative'] = summarize_in_chunks(negative_text, {'sentiment': 'negative'})
    
    # Create overall summary with sentiment guidance
    print("\nCreating overall summary...")
    all_reviews_text = " ".join(reviews)
    results['overall'] = summarize_in_chunks(all_reviews_text, sentiment_info)
    
    return results

related_reviews_without_score = [review for review, _ in related_reviews]
for i in related_reviews_without_score:
    print(i)
results = process_reviews(related_reviews_without_score)
    
if isinstance(results, str):
    print(results)
else:
    print("\n📊 Sentiment-Based Summaries:")
        
    if 'positive' in results:
        print("\n✅ POSITIVE REVIEWS SUMMARY:")
        print(results['positive'])
    
    if 'neutral' in results:
        print("\n⚖️ NEUTRAL REVIEWS SUMMARY:")
        print(results['neutral'])
        
    if 'negative' in results:
        print("\n❌ NEGATIVE REVIEWS SUMMARY:")
        print(results['negative'])
        
    print("\n📌 OVERALL SUMMARY:")
    print(results['overall'])

Overall a very good course in terms of covering a broad range of topics. Suitable as a foundation to go onto to do more specialized courses in various parts of modelling. Some of the mathematical and quantitative methods could be better explained with more structured processes and formulae. For the learner that isn't familiar with these methodologies, some specific background reading on how best to approach the math would have been very useful. Having said that, well done to the lecturer and the rest of the staff and course organizers who tried their best to cover what are some quite complicated concepts. 
The course is interesting as it shows some new ways of thinking. It has some fun models which seem to be interesting to play with. The concepts given in the course may be used in many different ways. Nevertheless, too much time of the course is wasted on simple calculations like 1/3+1/3 = 2/3 which doesn't help to undrestand the concepts or to memorise them. Some practice in building