In [4]:

# 1. PROJECT OVERVIEW
# -----------------------------------------------------------
# This notebook performs NLP analysis on student feedback data.
# Goals:
# - Extract top themes from Wins, Losses, and Blockers
# - Perform sentiment analysis
# - Visualize trends
# - Provide actionable recommendations
# -----------------------------------------------------------
#installing required packages
# install nltk if not already installed
!pip install nltk
#install wordcloud if not already installed
!pip install wordcloud
# install textblob if not already installed
!pip install textblob



In [5]:

# -----------------------------------------------------------
# 2. IMPORT LIBRARIES
# -----------------------------------------------------------
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from textblob import TextBlob
import seaborn as sns


In [6]:

# Download NLTK resources (run once)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Thuto\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Thuto\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Thuto\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [7]:

# -----------------------------------------------------------
# 3. LOAD AND INSPECT DATA
# -----------------------------------------------------------
# Load the CSV file
df = pd.read_csv("Copy of Umuzi XB1 Check in (Responses) - Form Responses 1 - Copy of Umuzi XB1 Check in (Responses) - Form Responses 1.csv")


In [8]:
# show first few rows
df.head()

Unnamed: 0,Timestamp,Column 2,Full name,Please enter the date today,"Share a win from the last week (what went well, something you enjoyed)",Share a loss (something that was challenging or did not go well),"Share a blocker, if any (anything that stopped you from doing what you needed to do)",Anything else you would like to share or ask
0,7/9/2025 14:34:49,,Student 1,7/9/2025,Completing my first week with Umuzi gave me co...,I didn’t get opportunities from two companies ...,Being financially unstable has been draining m...,I appreciate Umuzi for this opportunity to sho...
1,7/9/2025 14:43:15,,Student 2,7/9/2025,I enjoyed introspecting myself on the basis of...,Except for being sick and experiencing challen...,"None, only temporary set backs (reception and ...",Nothing for now.
2,7/9/2025 14:49:40,,Student 3,7/9/2025,Submitting all my work in time and completing ...,I don’t have any,"Data , I couldn’t join some meetings because I...",No thank you
3,7/9/2025 14:50:41,,Student 4,7/9/2025,I submitted most of the assigned assignments,I did not understand some assignments s well a...,Spending most time in class leading to having ...,"In overall, I am doing well and trying to do a..."
4,7/9/2025 15:14:46,,Student 5,7/9/2025,I enjoyed the Life Lifeline activity. I got to...,,"I forgot to login to Google classroom, until I...",No.


In [9]:

# Check columns
print(df.columns)


Index(['Timestamp', 'Column 2', 'Full name', 'Please enter the date today',
       'Share a win from the last week (what went well, something you enjoyed)',
       'Share a loss (something that was challenging or did not go well)',
       'Share a blocker, if any (anything that stopped you from doing what you needed to do)',
       'Anything else you would like to share or ask'],
      dtype='object')


In [16]:

# Focus on relevant columns
Win = "Share a win from the last week (what went well, something you enjoyed)"
Loss = "Share a loss (something that was challenging or did not go well)"
Blocker = "Share a blocker, if any (anything that stopped you from doing what you needed to do)"
#copy_df = df[[Win, Loss, Blocker]]
print(copy_df.head())

  Share a win from the last week (what went well, something you enjoyed)  \
0  Completing my first week with Umuzi gave me co...                       
1  I enjoyed introspecting myself on the basis of...                       
2  Submitting all my work in time and completing ...                       
3       I submitted most of the assigned assignments                       
4  I enjoyed the Life Lifeline activity. I got to...                       

  Share a loss (something that was challenging or did not go well)  \
0  I didn’t get opportunities from two companies ...                 
1  Except for being sick and experiencing challen...                 
2                                   I don’t have any                 
3  I did not understand some assignments s well a...                 
4                                                NaN                 

  Share a blocker, if any (anything that stopped you from doing what you needed to do)  
0  Being financially unstable has

In [17]:

# -----------------------------------------------------------
# 4. DATA CLEANING & PREPROCESSING
# -----------------------------------------------------------
# ensure we're worrking on a copy
df = copy_df.copy()

# Handle missing values

df[Win] = df[Win].fillna("", inplace=True)
df[Loss] = df[Loss].fillna("", inplace=True)
df[Blocker] = df[Blocker].fillna("", inplace=True)
# Combine all feedback copy into one column for analysis
df['All_Copy'] = df[Win].astype(str) + " " + df[Loss].astype(str) + " " + df[Blocker].astype(str)
print(df['All_Copy'].head())

0    None None None
1    None None None
2    None None None
3    None None None
4    None None None
Name: All_Copy, dtype: object


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[Win] = df[Win].fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[Loss] = df[Loss].fillna("", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alway

In [None]:

# -----------------------------------------------------------
# 5. TOKENIZATION, STOPWORD REMOVAL, LEMMATIZATION
# -----------------------------------------------------------
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    # Lowercase
    text = text.lower()
    # Remove punctuation and numbers
    text = re.sub(r'[^a-z\\s]', '', text)
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return tokens


In [None]:

# Apply preprocessing
wins_tokens = [token for sentence in df[win_col] for token in preprocess(sentence)]
losses_tokens = [token for sentence in df[loss_col] for token in preprocess(sentence)]
blockers_tokens = [token for sentence in df[blocker_col] for token in preprocess(sentence)]


In [None]:

# -----------------------------------------------------------
# 6. FREQUENCY ANALYSIS (TOP THEMES)
# -----------------------------------------------------------
wins_top5 = Counter(wins_tokens).most_common(5)
losses_top5 = Counter(losses_tokens).most_common(5)
blockers_top5 = Counter(blockers_tokens).most_common(5)

print("Top 5 Wins Themes:", wins_top5)
print("Top 5 Losses Themes:", losses_top5)
print("Top 5 Blockers Themes:", blockers_top5)


In [None]:

# -----------------------------------------------------------
# 7. SENTIMENT ANALYSIS
# -----------------------------------------------------------
wins_sentiment = [TextBlob(str(text)).sentiment.polarity for text in df[win_col]]
losses_sentiment = [TextBlob(str(text)).sentiment.polarity for text in df[loss_col]]

wins_summary = {
    'positive': sum(1 for s in wins_sentiment if s > 0),
    'neutral': sum(1 for s in wins_sentiment if s == 0),
    'negative': sum(1 for s in wins_sentiment if s < 0)
}

losses_summary = {
    'positive': sum(1 for s in losses_sentiment if s > 0),
    'neutral': sum(1 for s in losses_sentiment if s == 0),
    'negative': sum(1 for s in losses_sentiment if s < 0)
}

print("Wins Sentiment Summary:", wins_summary)
print("Losses Sentiment Summary:", losses_summary)


In [None]:

# -----------------------------------------------------------
# 8. VISUALIZATIONS
# -----------------------------------------------------------
# Bar charts for top themes
sns.barplot(x=[w[0] for w in wins_top5], y=[w[1] for w in wins_top5])
plt.title("Top 5 Wins Themes")
plt.show()

sns.barplot(x=[l[0] for l in losses_top5], y=[l[1] for l in losses_top5])
plt.title("Top 5 Losses Themes")
plt.show()

sns.barplot(x=[b[0] for b in blockers_top5], y=[b[1] for b in blockers_top5])
plt.title("Top 5 Blockers Themes")
plt.show()


In [None]:

# Word Clouds
for name, tokens in [('Wins', wins_tokens), ('Losses', losses_tokens), ('Blockers', blockers_tokens)]:
    wc = WordCloud(width=800, height=400, background_color='white').generate(' '.join(tokens))
    plt.figure(figsize=(10,5))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.title(f"{name} Word Cloud")
    plt.show()

# -----------------------------------------------------------
# 9. INSIGHTS & RECOMMENDATIONS
# -----------------------------------------------------------
print(\"\\nRecommendations:\")
print(\"1. Improve internet/data support for students (frequent blocker).\")
print(\"2. Offer time management workshops (common loss theme).\")
print(\"3. Provide financial assistance or guidance (blocker and loss theme).\")
print(\"4. Enhance clarity in instructions and resources (loss theme).\")
print(\"5. Continue motivational and career planning activities (win theme).\")

# -----------------------------------------------------------
# 10. EXPORT RESULTS
# -----------------------------------------------------------
# Save summary as JSON
import json
summary = {
    'wins_top5': wins_top5,
    'losses_top5': losses_top5,
    'blockers_top5': blockers_top5,
    'wins_sentiment': wins_summary,
    'losses_sentiment': losses_summary
}
with open('analysis_summary.json', 'w') as f:
    json.dump(summary, f, indent=4)
