<a href="https://colab.research.google.com/github/nidjosep/student-feedback-analysis/blob/master/Model_3_Actionable_Insights_from_Student_Feedback_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Aspect Based Sentiment Analysis for Student Feedbacks**

In [1]:
!pip install --upgrade transformers sentencepiece tensorflow

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
Collecting tensorflow
  Downloading tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.16,>=2.15 (from tensorflow)
  Downloading tensorboard-2.15.1-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m73.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.16,>=2.15.0 (from tensorflow)
  Downloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl (441 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m442.0/442.0 kB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollec

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F
from transformers import pipeline
from collections import Counter
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F
from transformers import pipeline

import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

stop_words = set(stopwords.words('english'))


def extract_aspects_from_sentence(sentence):
    txt = sentence.lower()
    sentList = nltk.sent_tokenize(txt)

    aspects = set()
    noun_pairs = set()
    redundant_nouns = set()

    for line in sentList:
        txt_list = nltk.word_tokenize(line)
        taggedList = nltk.pos_tag(txt_list)

        for i in range(len(taggedList) - 1):
            if taggedList[i][1] == "NN" and taggedList[i + 1][1] == "NN":
                aspect = taggedList[i][0] + ' ' + taggedList[i + 1][0]
                noun_pairs.add(aspect)
                redundant_nouns.add(taggedList[i][0])
                redundant_nouns.add(taggedList[i + 1][0])

        # Add individual nouns if they are not part of noun pairs
        for i in range(len(taggedList)):
            if taggedList[i][1] == "NN" and taggedList[i][0] not in stop_words:
                if taggedList[i][0] not in noun_pairs and taggedList[i][0] not in redundant_nouns and taggedList[i][0].isalpha() :
                    aspects.add(taggedList[i][0])

    # Add noun pairs to aspects
    aspects.update(noun_pairs)

    return list(aspects)

# function to identify aspects from a list of feedbacks
def identify_aspects(feedbacks):
    aspect_counter = Counter()

    # Extract aspects from each feedback and count them
    for feedback in feedbacks:
        extracted_aspects = extract_aspects_from_sentence(feedback)
        aspect_counter.update(extracted_aspects)

    # Find the top 3 aspects
    top_aspects = aspect_counter.most_common(3)

    # Convert top aspects to uppercase
    return [aspect.upper() for aspect, count in top_aspects]

# Example list of feedbacks
feedbacks = [
    "The lecturing experience was really good, but the lab materials were hard to understand.",
    "The lecturing was kind of good, but I struggled a lot with the lab.",
    "I really enjoyed the course overall.",
    "Lectures were informative but labs were not up to the mark.",
    "Excellent teaching but lab sessions were not well organized.",
    "The course content was engaging, but the lab equipment was often faulty.",
    "Found the lectures to be quite enlightening, though lab support was lacking.",
    "Lectures were brilliant, but the lab assistants were not helpful.",
    "Enjoyed the teaching methods, but the lab experiments were confusing.",
    "The professor was knowledgeable, but lab sessions were often rushed.",
    "Lectures were okay, but lab sessions lacked clarity.",
    "The teaching was great, but lab facilities need improvement.",
    "Found the course content interesting, but the labs were uninspiring.",
    "The teacher was excellent, but the labs were poorly equipped.",
    "Loved the lectures, but the labs were a bit disorganized.",
    "The lectures were engaging, but I didn't learn much from the labs.",
    "Good teaching, but lab sessions were not very useful.",
    "The professor was great at teaching, but lab activities were confusing.",
    "Lectures were insightful, but labs were not very interesting.",
    "Really liked the lectures, but the labs could be better."
]

top_aspects = identify_aspects(feedbacks);
print(f"Top 3 aspects identified are : {top_aspects}\n")


# Load Aspect-Based Sentiment Analysis model
absa_tokenizer = AutoTokenizer.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")
absa_model = AutoModelForSequenceClassification \
  .from_pretrained("yangheng/deberta-v3-base-absa-v1.1")


# Process each feedback with top aspects
for feedback in feedbacks:
    print(f"\nFeedback: {feedback}\n")
    for aspect in top_aspects:
        inputs = absa_tokenizer(f"[CLS] {feedback} [SEP] {aspect} [SEP]", return_tensors="pt")
        outputs = absa_model(**inputs)
        probs = F.softmax(outputs.logits, dim=1)
        probs = probs.detach().numpy()[0]

        # Find the emotion with the highest score
        max_index = probs.argmax()
        emotion = ["negative", "neutral", "positive"][max_index]
        score = probs[max_index]

        print(f"Sentiment of aspect '{aspect}' is {emotion} with score {score:.4f}")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Top 3 aspects identified are : ['LAB', 'TEACHING', 'COURSE CONTENT']



tokenizer_config.json:   0%|          | 0.00/372 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/18.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/156 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/738M [00:00<?, ?B/s]


Feedback: The lecturing experience was really good, but the lab materials were hard to understand.

Sentiment of aspect 'LAB' is negative with score 0.9960
Sentiment of aspect 'TEACHING' is positive with score 0.9899
Sentiment of aspect 'COURSE CONTENT' is positive with score 0.8197

Feedback: The lecturing was kind of good, but I struggled a lot with the lab.

Sentiment of aspect 'LAB' is negative with score 0.9974
Sentiment of aspect 'TEACHING' is positive with score 0.9470
Sentiment of aspect 'COURSE CONTENT' is positive with score 0.3759

Feedback: I really enjoyed the course overall.

Sentiment of aspect 'LAB' is neutral with score 0.7085
Sentiment of aspect 'TEACHING' is positive with score 0.9710
Sentiment of aspect 'COURSE CONTENT' is positive with score 0.9940

Feedback: Lectures were informative but labs were not up to the mark.

Sentiment of aspect 'LAB' is negative with score 0.9947
Sentiment of aspect 'TEACHING' is positive with score 0.9670
Sentiment of aspect 'COURSE CO