In [31]:
from google.colab import files
uploaded = files.upload()  # Select reviews.csv from Downloads or another folder

Saving reviews.csv to reviews (2).csv


In [32]:
import pandas as pd
import spacy
from transformers import pipeline
from google.colab import files
import os

# Install dependencies
!pip install pandas transformers spacy google-play-scraper
!python -m spacy download en_core_web_sm

# Load data
if os.path.exists('/content/reviews_with_analysis.csv'):
    df = pd.read_csv('/content/reviews_with_analysis.csv')
else:
    df = pd.read_csv('/content/reviews.csv')  # Fallback to reviews.csv if needed
df['review'] = df['review'].fillna('').astype(str)

# Sentiment analysis
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device="cpu")
def get_sentiment(text):
    try:
        result = sentiment_analyzer(text[:512])[0]
        return result['label'], result['score']
    except:
        return "NEUTRAL", 0.0

# Apply sentiment if not already in df
if 'sentiment_label' not in df.columns:
    df['sentiment_label'], df['sentiment_score'] = zip(*df['review'].apply(get_sentiment))

# Thematic analysis with keyword-based themes
themes = {
    "Account Access Issues": ["login", "access", "account", "password", "registration"],
    "Transaction Performance": ["transfer", "payment", "deposit", "withdrawal", "fee"],
    "User Interface & Experience": ["ui", "design", "navigation", "layout", "usability"],
    "Customer Support": ["support", "help", "contact", "response", "service"],
    "Security": ["security", "safe", "fraud", "protection", "privacy"],
    "Features": ["feature", "function", "update", "new", "improvement"],
    "Other": []
}

def assign_themes(review):
    review_lower = review.lower()
    assigned_themes = []
    for theme, keywords in themes.items():
        if any(keyword in review_lower for keyword in keywords):
            assigned_themes.append(theme)
    if not assigned_themes:
        assigned_themes.append("Other")
    return ";".join(assigned_themes)

df['theme_categories'] = df['review'].apply(assign_themes)

# Add review_id
df['review_id'] = range(1, len(df) + 1)

# Aggregate sentiment scores
aggregated = df.groupby(['bank', 'rating'])['sentiment_score'].mean().reset_index()
aggregated.to_csv('/content/sentiment_aggregation.csv', index=False)

# Save results
df.to_csv('/content/reviews_with_analysis.csv', index=False)
print(f"Saved {len(df)} reviews with sentiment and themes to reviews_with_analysis.csv")

# Download files
files.download('/content/reviews_with_analysis.csv')
files.download('/content/sentiment_aggregation.csv')

# Verify files
!ls /content

# Display first few rows
print(df.head())

# Sentiment distribution
print("Sentiment Label Counts:")
print(df['sentiment_label'].value_counts())

# Theme distribution
print("Theme Category Counts:")
print(df['theme_categories'].value_counts())

# Total number of reviews
print(f"Number of reviews: {len(df)}")  # Should be 1185

# Display DataFrame columns
print("Columns in DataFrame:")
print(df.columns)  # Should include review_id, sentiment_label, sentiment_score, theme_categories

# Aggregated sentiment score by bank and rating
print("Aggregated Sentiment Scores:")
print(aggregated)
print("Missing values:")
print(df.isnull().sum())

print("Sample of themes and sentiments:")
print(df[['review', 'sentiment_label', 'theme_categories']].sample(5))

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m49.4 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


Device set to use cpu


Saved 1185 reviews with sentiment and themes to reviews_with_analysis.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

 10-academy-week2   reviews.csv		        sentiment_aggregation.csv
'reviews (1).csv'   reviews_with_analysis.csv
'reviews (2).csv'   sample_data
                                            review  rating        date  \
0                      what is this app problem???       1  2025-06-05   
1     the app is proactive and a good connections.       5  2025-06-05   
2  I cannot send to cbebirr app. through this app.       3  2025-06-05   
3                                             good       4  2025-06-05   
4                                   not functional       1  2025-06-05   

                          bank       source sentiment_label  sentiment_score  \
0  Commercial Bank of Ethiopia  Google Play        NEGATIVE         0.999623   
1  Commercial Bank of Ethiopia  Google Play        POSITIVE         0.999868   
2  Commercial Bank of Ethiopia  Google Play        NEGATIVE         0.995335   
3  Commercial Bank of Ethiopia  Google Play        POSITIVE         0.999816   
4  Commerc

In [None]:
from google.colab import files
uploaded = files.upload()  # Select reviews.csv from Downloads or another folder

In [36]:
!pwd
!ls -R /content
!mv /content/Week2_Task2.ipynb /content/10-academy-week2/
!ls /content/10-academy-week2

/content
/content:
 10-academy-week2   reviews.csv		        sentiment_aggregation.csv
'reviews (1).csv'   reviews_with_analysis.csv
'reviews (2).csv'   sample_data

/content/10-academy-week2:
README.md    reviews_with_analysis.csv	task2_notes.md
reviews.csv  sentiment_aggregation.csv

/content/sample_data:
anscombe.json		      mnist_test.csv
california_housing_test.csv   mnist_train_small.csv
california_housing_train.csv  README.md
mv: cannot stat '/content/Week2_Task2.ipynb': No such file or directory
README.md    reviews_with_analysis.csv	task2_notes.md
reviews.csv  sentiment_aggregation.csv


# New Section

# New Section