<h1 style="font-size:3rem;">App Sentiment Analysis</h1>
<h3 style="font-size:2rem;">Extract reviews for various apps</h3>
<h5 style="font-size:1rem;">Catima — Loyalty Card Wallet</h5>

In [1]:
!pip install google-play-scraper
!pip install pandas numpy plotly



In [2]:
pip install pyyaml



In [3]:
import pandas as pd
import numpy as np
import time
from google_play_scraper import app, Sort, reviews_all
import plotly.express as px

In [4]:
app_id = 'me.hackerchick.catima'
reviews = reviews_all(
    app_id,
    sleep_milliseconds=0,
    lang='en',
    country='US',
    sort=Sort.NEWEST
)

if not reviews:
    print("No reviews found for the specified app ID.")
else:
    # Parse reviews
    parsed_reviews = []
    for review in reviews:
        reviewer_name = review.get('userName', '')
        review_text = review.get('content', '')
        rating = review.get('score', '')
        parsed_reviews.append({
            'Reviewer Name': reviewer_name,
            'Review Text': review_text,
            'Rating': rating
        })

    df = pd.DataFrame(parsed_reviews)

    file_name = "Catima_Reviews.csv"
    df.to_csv(file_name, index=False)
    print(f"CSV file '{file_name}' has been created successfully.")


CSV file 'Catima_Reviews.csv' has been created successfully.


<h5 style="font-size:1rem;">Stocard - Rewards Cards Wallet</h5>

In [5]:
def fetch_reviews(app_id, min_reviews=1000, retry_attempts=3):
    fetched_reviews = []
    total_reviews = 0

    for attempt in range(1, retry_attempts + 1):
        try:
            while total_reviews < min_reviews:
                # Fetch reviews in batches
                reviews = reviews_all(
                    app_id,
                    sleep_milliseconds=0,
                    lang='en',
                    country='US',
                    sort=Sort.NEWEST,
                    count=min_reviews - total_reviews
                )

                if not reviews:
                    break

                parsed_reviews = []
                for review in reviews:
                    reviewer_name = review['userName']
                    review_text = review['content']
                    rating = review['score']
                    parsed_reviews.append({
                        'Reviewer Name': reviewer_name,
                        'Review Text': review_text,
                        'Rating': rating
                    })

                total_reviews += len(parsed_reviews)
                fetched_reviews.extend(parsed_reviews)
                print(f"Fetched {total_reviews} reviews...")
                if total_reviews >= min_reviews:
                    break

            df = pd.DataFrame(fetched_reviews)
            file_name = "Stocard_reviews.csv"
            df.to_csv(file_name, index=False)
            print(f"CSV file '{file_name}' has been created successfully.")

            return True  # Reviews fetched successfully

        except Exception as e:
            print(f"Attempt {attempt}/{retry_attempts} failed. Error: {e}")
            if attempt < retry_attempts:
                print("Retrying after 5 seconds...")
                time.sleep(5)
            else:
                print("All retry attempts failed. Unable to fetch reviews.")
                return False  # Failed to fetch reviews

app_id = 'de.stocard.stocard'
fetch_reviews(app_id, min_reviews=1000)


Fetched 46929 reviews...
CSV file 'Stocard_reviews.csv' has been created successfully.


True

<h5 style="font-size:1rem;">Key Ring: Loyalty Card App</h5>

In [6]:
def fetch_reviews(app_id, min_reviews=1000, retry_attempts=3):
    fetched_reviews = []
    total_reviews = 0

    for attempt in range(1, retry_attempts + 1):
        try:
            while total_reviews < min_reviews:
                # Fetch reviews in batches
                reviews = reviews_all(
                    app_id,
                    sleep_milliseconds=0,
                    lang='en',
                    country='US',
                    sort=Sort.NEWEST,
                    count=min_reviews - total_reviews
                )

                if not reviews:
                    break

                parsed_reviews = []
                for review in reviews:
                    reviewer_name = review['userName']
                    review_text = review['content']
                    rating = review['score']
                    parsed_reviews.append({
                        'Reviewer Name': reviewer_name,
                        'Review Text': review_text,
                        'Rating': rating
                    })

                total_reviews += len(parsed_reviews)
                fetched_reviews.extend(parsed_reviews)
                print(f"Fetched {total_reviews} reviews...")
                if total_reviews >= min_reviews:
                    break

            df = pd.DataFrame(fetched_reviews)
            file_name = "KeyRing_reviews.csv"
            df.to_csv(file_name, index=False)
            print(f"CSV file '{file_name}' has been created successfully.")

            return True  # Reviews fetched successfully

        except Exception as e:
            print(f"Attempt {attempt}/{retry_attempts} failed. Error: {e}")
            if attempt < retry_attempts:
                print("Retrying after 5 seconds...")
                time.sleep(5)
            else:
                print("All retry attempts failed. Unable to fetch reviews.")
                return False  # Failed to fetch reviews

app_id = 'com.froogloid.kring.google.zxing.client.android'
fetch_reviews(app_id, min_reviews=1000)


Fetched 12981 reviews...
CSV file 'KeyRing_reviews.csv' has been created successfully.


True

<h5 style="font-size:1rem;">FidMe Loyalty Cards & Cashback</h5>

In [7]:
def fetch_reviews(app_id, min_reviews=1000, retry_attempts=3):
    fetched_reviews = []
    total_reviews = 0  #

    for attempt in range(1, retry_attempts + 1):
        try:
            while total_reviews < min_reviews:
                # Fetch reviews in batches
                reviews = reviews_all(
                    app_id,
                    sleep_milliseconds=0,
                    lang='en',
                    country='US',
                    sort=Sort.NEWEST,
                    count=min_reviews - total_reviews
                )

                if not reviews:
                    break

                parsed_reviews = []
                for review in reviews:
                    reviewer_name = review['userName']
                    review_text = review['content']
                    rating = review['score']
                    parsed_reviews.append({
                        'Reviewer Name': reviewer_name,
                        'Review Text': review_text,
                        'Rating': rating
                    })

                total_reviews += len(parsed_reviews)
                fetched_reviews.extend(parsed_reviews)
                print(f"Fetched {total_reviews} reviews...")

                if total_reviews >= min_reviews:
                    break

            df = pd.DataFrame(fetched_reviews)
            file_name = "FidMe_reviews.csv"
            df.to_csv(file_name, index=False)
            print(f"CSV file '{file_name}' has been created successfully.")

            return True

        except Exception as e:
            print(f"Attempt {attempt}/{retry_attempts} failed. Error: {e}")
            if attempt < retry_attempts:
                print("Retrying after 5 seconds...")
                time.sleep(5)
            else:
                print("All retry attempts failed. Unable to fetch reviews.")
                return False  # Failed to fetch reviews

app_id = 'fr.snapp.fidme'
fetch_reviews(app_id, min_reviews=1350)


Fetched 726 reviews...
Fetched 1452 reviews...
CSV file 'FidMe_reviews.csv' has been created successfully.


True

<h5 style="font-size:1rem;">Pass2U Wallet - digitize cards</h5>

In [8]:
def fetch_reviews(app_id, min_reviews=1000, retry_attempts=3):
    fetched_reviews = []
    total_reviews = 0

    for attempt in range(1, retry_attempts + 1):
        try:
            while total_reviews < min_reviews:
                # Fetch reviews in batches
                reviews = reviews_all(
                    app_id,
                    sleep_milliseconds=0,
                    lang='en',
                    country='US',
                    sort=Sort.NEWEST,
                    count=min_reviews - total_reviews
                )

                if not reviews:
                    break

                parsed_reviews = []
                for review in reviews:
                    reviewer_name = review['userName']
                    review_text = review['content']
                    rating = review['score']
                    parsed_reviews.append({
                        'Reviewer Name': reviewer_name,
                        'Review Text': review_text,
                        'Rating': rating
                    })

                total_reviews += len(parsed_reviews)
                fetched_reviews.extend(parsed_reviews)
                print(f"Fetched {total_reviews} reviews...")
                if total_reviews >= min_reviews:
                    break

            df = pd.DataFrame(fetched_reviews)
            file_name = "Pass2U_reviews.csv"
            df.to_csv(file_name, index=False)
            print(f"CSV file '{file_name}' has been created successfully.")
            return True

        except Exception as e:
            print(f"Attempt {attempt}/{retry_attempts} failed. Error: {e}")
            if attempt < retry_attempts:
                print("Retrying after 5 seconds...")
                time.sleep(5)
            else:
                print("All retry attempts failed. Unable to fetch reviews.")
                return False  # Failed to fetch reviews

# Example usage
app_id = 'com.passesalliance.wallet'
fetch_reviews(app_id, min_reviews=1000)


Fetched 1155 reviews...
CSV file 'Pass2U_reviews.csv' has been created successfully.


True

<h3 style="font-size:2rem;">Processing words in reviews</h3>

<h5 style="font-size:1rem;">Stocard - Rewards Cards Wallet</h5>

In [9]:
!pip install num2words
!pip install nltk



In [10]:
import pandas as pd
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from num2words import num2words

# Download nltk resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [11]:
# Function to preprocess text
def preprocess_text(text):
    if isinstance(text, str):
        # Remove punctuations
        text = text.translate(str.maketrans('', '', string.punctuation))

        # Remove special characters and emojis
        text = re.sub(r'[^\w\s]', '', text)

        # Turn numbers into text
        words = []
        for word in nltk.word_tokenize(text):
            if re.fullmatch(r"\d+(\.\d+)?", word):
                try:
                    words.append(num2words(float(word)))
                except (InvalidOperation, ValueError):
                    words.append(word)
            else:
                words.append(word)
        text = ' '.join(words)

        # Remove extra white spaces
        text = ' '.join(text.split())

        # Turn all words into lowercase
        text = text.lower()

        # Remove stop words
        stop_words = set(stopwords.words('english'))
        text = ' '.join([word for word in nltk.word_tokenize(text) if word not in stop_words])

        # Lemmatize the reviews
        lemmatizer = WordNetLemmatizer()
        text = ' '.join([lemmatizer.lemmatize(word) for word in nltk.word_tokenize(text)])

    return text

input_file = 'Stocard_reviews.csv'
output_file = 'Stocard_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['Preprocessed Text'] = df['Review Text'].apply(preprocess_text)

# Output 15 sample preprocessed reviews
sample_preprocessed_reviews = df['Preprocessed Text'].sample(15).tolist()
for i, review in enumerate(sample_preprocessed_reviews, 1):
    print(f"Preprocessed Review {i}: {review}")

df.to_csv(output_file, index=False)
print(f"Preprocessed data saved to '{output_file}'")


Preprocessed Review 1: best card application
Preprocessed Review 2: outstanding app far
Preprocessed Review 3: carrying around loyalty card everywhere
Preprocessed Review 4: handy app dont carry heap card around
Preprocessed Review 5: easy use
Preprocessed Review 6: much convenient carrying around store card shame rarely scan need manual entry
Preprocessed Review 7: put scanner detect barcode phone
Preprocessed Review 8: convient allows room key
Preprocessed Review 9: app great carry card around card transferred new phone log account suck reload card
Preprocessed Review 10: simple easy time space saving love sure cloud backup
Preprocessed Review 11: brilliant app used lot sold klarna app closed moved card google wallet deleted account deleted app dont want klarna account shame
Preprocessed Review 12: take photo card enter number create barcodes shop accept image barcode reinstall app today automatic backup restore worked logged installing
Preprocessed Review 13: good app
Preprocessed R

<h5 style="font-size:1rem;">Key Ring: Loyalty Card App</h5>

In [12]:
# Function to preprocess text
def preprocess_text(text):
    if isinstance(text, str):
        # Remove punctuations
        text = text.translate(str.maketrans('', '', string.punctuation))

        # Remove special characters and emojis
        text = re.sub(r'[^\w\s]', '', text)

        # Turn numbers into text
        words = []
        for word in nltk.word_tokenize(text):
            if word.isdigit():
                words.append(num2words(word))
            else:
                words.append(word)
        text = ' '.join(words)

        # Remove extra white spaces
        text = ' '.join(text.split())

        # Turn all words into lowercase
        text = text.lower()

        # Remove stop words
        stop_words = set(stopwords.words('english'))
        text = ' '.join([word for word in nltk.word_tokenize(text) if word not in stop_words])

        # Lemmatize the reviews
        lemmatizer = WordNetLemmatizer()
        text = ' '.join([lemmatizer.lemmatize(word) for word in nltk.word_tokenize(text)])

    return text

input_file = 'KeyRing_reviews.csv'
output_file = 'KeyRing_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['Preprocessed Text'] = df['Review Text'].apply(preprocess_text)

# Output 15 sample preprocessed reviews
sample_preprocessed_reviews = df['Preprocessed Text'].sample(15).tolist()
for i, review in enumerate(sample_preprocessed_reviews, 1):
    print(f"Preprocessed Review {i}: {review}")

df.to_csv(output_file, index=False)
print(f"Preprocessed data saved to '{output_file}'")


Preprocessed Review 1: 
Preprocessed Review 2: always count able get back copy reward card
Preprocessed Review 3: convenient
Preprocessed Review 4: loved getting rid card much convenient
Preprocessed Review 5: one useful apps ever created
Preprocessed Review 6: app work great faded one scanned
Preprocessed Review 7: great way keep loyalty card
Preprocessed Review 8: cool idea scanner dont read bar code phone food lion wasnt able make work
Preprocessed Review 9: cant scan store boo
Preprocessed Review 10: great app ad android wear support
Preprocessed Review 11: google wallet loyalty card wont work without internet connection king ring reward
Preprocessed Review 12: get new one hold money id thats reward card key wallet try agree promise u get coupon even better
Preprocessed Review 13: love app sooo convenient dont carry around annoying tag
Preprocessed Review 14: work
Preprocessed Review 15: great app
Preprocessed data saved to 'KeyRing_reviews_preprocessed_reviews.csv'


<h5 style="font-size:1rem;">FidMe Loyalty Cards & Cashback</h5>

In [13]:
# Function to preprocess text
def preprocess_text(text):
    if isinstance(text, str):
        # Remove punctuations
        text = text.translate(str.maketrans('', '', string.punctuation))

        # Remove special characters and emojis
        text = re.sub(r'[^\w\s]', '', text)

        # Turn numbers into text
        words = []
        for word in nltk.word_tokenize(text):
            if word.isdigit():
                words.append(num2words(word))
            else:
                words.append(word)
        text = ' '.join(words)

        # Remove extra white spaces
        text = ' '.join(text.split())

        # Turn all words into lowercase
        text = text.lower()

        # Remove stop words
        stop_words = set(stopwords.words('english'))
        text = ' '.join([word for word in nltk.word_tokenize(text) if word not in stop_words])

        # Lemmatize the reviews
        lemmatizer = WordNetLemmatizer()
        text = ' '.join([lemmatizer.lemmatize(word) for word in nltk.word_tokenize(text)])

    return text

input_file = 'FidMe_reviews.csv'
output_file = 'FidMe_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['Preprocessed Text'] = df['Review Text'].apply(preprocess_text)

# Output 15 sample preprocessed reviews
sample_preprocessed_reviews = df['Preprocessed Text'].sample(15).tolist()
for i, review in enumerate(sample_preprocessed_reviews, 1):
    print(f"Preprocessed Review {i}: {review}")

df.to_csv(output_file, index=False)
print(f"Preprocessed data saved to '{output_file}'")


Preprocessed Review 1: country listed take time fifteen sec connect server sometimes failed connect say fidme currently trying server please try second barcode generates manual feeding look different loyalty card happens ten loyalty card asks lot permission consume lot battery including gps option customize camera picture specially logo setup option add back card loyalty card info back satisfied customer service trying best help definitely use sometime face problem reason uninstall
Preprocessed Review 2: cest cool davoir une version française de stocard mais je suis assez déçu que tout tourne autour de grandes chaînes de magasins je pense quil est temp de mettre davantage en avant no commerce de proximité comme lapp fidelatoo le fait très bien avec une app simple rapide et sans pub
Preprocessed Review 3: here thing even though app design seems made photoshop fails terribly material guideline terribly ugly even probably least ugly app kind keep forced use mobilepocket competitor support

<h5 style="font-size:1rem;">Pass2U Wallet - digitize cards</h5>

In [14]:
# Function to preprocess text
def preprocess_text(text):
    if isinstance(text, str):
        # Remove punctuations
        text = text.translate(str.maketrans('', '', string.punctuation))

        # Remove special characters and emojis
        text = re.sub(r'[^\w\s]', '', text)

        # Turn numbers into text
        words = []
        for word in nltk.word_tokenize(text):
            if word.isdigit():
                words.append(num2words(word))
            else:
                words.append(word)
        text = ' '.join(words)

        # Remove extra white spaces
        text = ' '.join(text.split())

        # Turn all words into lowercase
        text = text.lower()

        # Remove stop words
        stop_words = set(stopwords.words('english'))
        text = ' '.join([word for word in nltk.word_tokenize(text) if word not in stop_words])

        # Lemmatize the reviews
        lemmatizer = WordNetLemmatizer()
        text = ' '.join([lemmatizer.lemmatize(word) for word in nltk.word_tokenize(text)])

    return text

input_file = 'Pass2U_reviews.csv'
output_file = 'Pass2U_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['Preprocessed Text'] = df['Review Text'].apply(preprocess_text)

# Output 15 sample preprocessed reviews
sample_preprocessed_reviews = df['Preprocessed Text'].sample(15).tolist()
for i, review in enumerate(sample_preprocessed_reviews, 1):
    print(f"Preprocessed Review {i}: {review}")

df.to_csv(output_file, index=False)
print(f"Preprocessed data saved to '{output_file}'")


Preprocessed Review 1: great
Preprocessed Review 2: buena para eventos
Preprocessed Review 3: track date expected track payment okay app
Preprocessed Review 4: good handy app loyalty card
Preprocessed Review 5: nice app developer really listens user
Preprocessed Review 6: convenient app
Preprocessed Review 7: convenient app easy store discount card add disturbing great option storing visit card
Preprocessed Review 8: would great add auto restore option like existing auto backup option
Preprocessed Review 9: great app easy use
Preprocessed Review 10: quick access
Preprocessed Review 11: improve
Preprocessed Review 12: wouldnt upload pkpass file wouldnt upload photo tried add loyalty card kept upload loop useful
Preprocessed Review 13: default setting app apparently popup full screen every time unlock phone annoying however alarm went seven get flight physically unable turn due app popping instead even annoying know stupid setting disabled app get one star interfering phone basic behavio

<h3 style="font-size:2rem;">Generate Review Polarity</h3>

<h5 style="font-size:1rem;">Stocard - Rewards Cards Wallet</h5>

In [15]:
!pip install textblob



In [16]:
from textblob import TextBlob
from IPython.display import display


In [17]:
# Load preprocessed data from CSV
preprocessed_file = 'Stocard_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Function to calculate polarity using TextBlob
def calculate_polarity(review):
    blob = TextBlob(review)
    polarity = blob.sentiment.polarity
    return polarity

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'de.stocard.stocard'

# Create a DataFrame with the explicit app package name
output_df = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Display at least 250 reviews as required
display(output_df.head(250))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,de.stocard.stocard,understand taking perfectly working app perfec...,0.568182
1,de.stocard.stocard,ok sold klarna,0.500000
2,de.stocard.stocard,great app usable anymore moved klarna app acce...,0.400000
3,de.stocard.stocard,love app,0.500000
4,de.stocard.stocard,used great connected payment company supercard...,0.900000
...,...,...,...
245,de.stocard.stocard,stocard great easy use whats hear youre moving...,-0.193333
246,de.stocard.stocard,used best bought klarna disintegrating good lu...,0.850000
247,de.stocard.stocard,best loyalty card app imho used year shut bad ...,0.200000
248,de.stocard.stocard,best service,1.000000


<h5 style="font-size:1rem;">Key Ring: Loyalty Card App</h5>

In [18]:
# Load preprocessed data from CSV
preprocessed_file = 'KeyRing_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Function to calculate polarity using TextBlob
def calculate_polarity(review):
    blob = TextBlob(review)
    polarity = blob.sentiment.polarity
    return polarity

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'com.froogloid.kring.google.zxing.client.android'

# Create a DataFrame with the explicit app package name
output_df = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Display at least 250 reviews as required
display(output_df.head(250))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,com.froogloid.kring.google.zxing.client.android,great,0.800000
1,com.froogloid.kring.google.zxing.client.android,work,0.000000
2,com.froogloid.kring.google.zxing.client.android,good app would great app could access card via...,0.700000
3,com.froogloid.kring.google.zxing.client.android,app work like five star,0.000000
4,com.froogloid.kring.google.zxing.client.android,best app ive app year ive never issue app cant...,0.350000
...,...,...,...
245,com.froogloid.kring.google.zxing.client.android,work fine,0.416667
246,com.froogloid.kring.google.zxing.client.android,ever came app hat youit made life easier,0.000000
247,com.froogloid.kring.google.zxing.client.android,love electronic copy add insurance card covid ...,0.500000
248,com.froogloid.kring.google.zxing.client.android,excellent place track membership loyalty card ...,1.000000


<h5 style="font-size:1rem;">FidMe Loyalty Cards & Cashback</h5>

In [19]:
# Load preprocessed data from CSV
preprocessed_file = 'FidMe_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Function to calculate polarity using TextBlob
def calculate_polarity(review):
    blob = TextBlob(review)
    polarity = blob.sentiment.polarity
    return polarity

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'fr.snapp.fidme'

# Create a DataFrame with the explicit app package name
output_df = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Display at least 250 reviews as required
display(output_df.head(250))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,fr.snapp.fidme,import card copy paste number ikea card recogn...,0.000000
1,fr.snapp.fidme,lento leggere coidici barre per importarli e p...,0.000000
2,fr.snapp.fidme,import screenshot sounded like useful feature ...,-0.166667
3,fr.snapp.fidme,u retailer,0.000000
4,fr.snapp.fidme,ik op zoek naar een vervanger voor stocard omd...,0.000000
...,...,...,...
245,fr.snapp.fidme,amazing support ending changing email lost old...,0.272727
246,fr.snapp.fidme,since update cant scan card anymore old one wo...,0.300000
247,fr.snapp.fidme,dont really find coupon useful idea point app ...,0.200000
248,fr.snapp.fidme,great idea tried didnt work card wouldnt scan ...,0.060000


<h5 style="font-size:1rem;">Pass2U Wallet - digitize cards</h5>

In [20]:
# Load preprocessed data from CSV
preprocessed_file = 'FidMe_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Function to calculate polarity using TextBlob
def calculate_polarity(review):
    blob = TextBlob(review)
    polarity = blob.sentiment.polarity
    return polarity

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'com.passesalliance.wallet'

# Create a DataFrame with the explicit app package name
output_df = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Display at least 250 reviews as required
display(output_df.head(250))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,com.passesalliance.wallet,import card copy paste number ikea card recogn...,0.000000
1,com.passesalliance.wallet,lento leggere coidici barre per importarli e p...,0.000000
2,com.passesalliance.wallet,import screenshot sounded like useful feature ...,-0.166667
3,com.passesalliance.wallet,u retailer,0.000000
4,com.passesalliance.wallet,ik op zoek naar een vervanger voor stocard omd...,0.000000
...,...,...,...
245,com.passesalliance.wallet,amazing support ending changing email lost old...,0.272727
246,com.passesalliance.wallet,since update cant scan card anymore old one wo...,0.300000
247,com.passesalliance.wallet,dont really find coupon useful idea point app ...,0.200000
248,com.passesalliance.wallet,great idea tried didnt work card wouldnt scan ...,0.060000


<h3 style="font-size:2rem;">VADER and TextBlob Sentiment Analysis</h3>

<h5 style="font-size:1rem;">Stocard - Rewards Cards Wallet</h5>

In [21]:
!pip install vaderSentiment



In [22]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from IPython.display import display

In [23]:
# Load preprocessed data from CSV
preprocessed_file = 'Stocard_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Function to calculate polarity using VADER
def calculate_polarity_vader(review):
    scores = sid.polarity_scores(review)
    return scores['compound']

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity_vader(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'de.stocard.stocard'

# Create a DataFrame with the explicit app package name
output_df_vader = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Limit to 250 reviews as required
output_df_vader = output_df_vader.head(250)

# Display
pd.set_option('display.max_rows', None)
display(output_df_vader)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,de.stocard.stocard,understand taking perfectly working app perfec...,0.9062
1,de.stocard.stocard,ok sold klarna,0.296
2,de.stocard.stocard,great app usable anymore moved klarna app acce...,0.7906
3,de.stocard.stocard,love app,0.6369
4,de.stocard.stocard,used great connected payment company supercard...,0.8316
5,de.stocard.stocard,great app easy place quickly pull reward card ...,0.8074
6,de.stocard.stocard,good useful used long time,0.7003
7,de.stocard.stocard,ease use,0.3612
8,de.stocard.stocard,shame one best loyalty apps destroyed klarna w...,0.0936
9,de.stocard.stocard,used said tin easy set perfect move klarna rui...,0.9217


<h5 style="font-size:1rem;">Key Ring: Loyalty Card App</h5>

In [24]:
# Load preprocessed data from CSV
preprocessed_file = 'KeyRing_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Function to calculate polarity using VADER
def calculate_polarity_vader(review):
    scores = sid.polarity_scores(review)
    return scores['compound']

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity_vader(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'com.froogloid.kring.google.zxing.client.android'

# Create a DataFrame with the explicit app package name
output_df_vader = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Limit to 250 reviews as required
output_df_vader = output_df_vader.head(250)

# Display
pd.set_option('display.max_rows', None)
display(output_df_vader)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,com.froogloid.kring.google.zxing.client.android,great,0.6249
1,com.froogloid.kring.google.zxing.client.android,work,0.0
2,com.froogloid.kring.google.zxing.client.android,good app would great app could access card via...,0.8957
3,com.froogloid.kring.google.zxing.client.android,app work like five star,0.3612
4,com.froogloid.kring.google.zxing.client.android,best app ive app year ive never issue app cant...,0.8658
5,com.froogloid.kring.google.zxing.client.android,love new key chain wallet,0.6369
6,com.froogloid.kring.google.zxing.client.android,great,0.6249
7,com.froogloid.kring.google.zxing.client.android,simple easy claim,0.4404
8,com.froogloid.kring.google.zxing.client.android,used app long time far best app market ease us...,0.7906
9,com.froogloid.kring.google.zxing.client.android,ive app long time seemed great however get lot...,0.4767


<h5 style="font-size:1rem;">FidMe Loyalty Cards & Cashback</h5>

In [25]:
# Load preprocessed data from CSV
preprocessed_file = 'FidMe_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Function to calculate polarity using VADER
def calculate_polarity_vader(review):
    scores = sid.polarity_scores(review)
    return scores['compound']

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity_vader(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'fr.snapp.fidme'

# Create a DataFrame with the explicit app package name
output_df_vader = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Limit to 250 reviews as required
output_df_vader = output_df_vader.head(250)

pd.set_option('display.max_rows', None)
display(output_df_vader)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,fr.snapp.fidme,import card copy paste number ikea card recogn...,0.0772
1,fr.snapp.fidme,lento leggere coidici barre per importarli e p...,0.0
2,fr.snapp.fidme,import screenshot sounded like useful feature ...,0.3182
3,fr.snapp.fidme,u retailer,0.0
4,fr.snapp.fidme,ik op zoek naar een vervanger voor stocard omd...,0.0
5,fr.snapp.fidme,buggy interface cant import properly seems mix...,0.4019
6,fr.snapp.fidme,fidme removed company logo rebooted phone toda...,0.0
7,fr.snapp.fidme,widget dont work sits wont give log screen,0.0
8,fr.snapp.fidme,application très pratique surtout pour enregis...,0.0
9,fr.snapp.fidme,add dark mode two thousand twenty-four add dar...,0.0772


<h5 style="font-size:1rem;">Pass2U Wallet - digitize cards</h5>

In [26]:
# Load preprocessed data from CSV
preprocessed_file = 'Pass2U_reviews_preprocessed_reviews.csv'
df = pd.read_csv(preprocessed_file)

# Initialize VADER sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Function to calculate polarity using VADER
def calculate_polarity_vader(review):
    scores = sid.polarity_scores(review)
    return scores['compound']

# Replace missing values in 'Preprocessed Text' column with an empty string
df['Preprocessed Text'].fillna('', inplace=True)

# Calculate polarity
df['Polarity'] = df['Preprocessed Text'].apply(lambda x: calculate_polarity_vader(x) if isinstance(x, str) else 0)

# app's package name
app_package_name = 'com.passesalliance.wallet'

# Create a DataFrame with the explicit app package name
output_df_vader = pd.DataFrame({
    'App’s package name': [app_package_name] * len(df),
    'Review': df['Preprocessed Text'],
    'Polarity': df['Polarity']
})

# Limit to 250 reviews as required
output_df_vader = output_df_vader.head(250)

# Display
pd.set_option('display.max_rows', None)
display(output_df_vader)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Preprocessed Text'].fillna('', inplace=True)


Unnamed: 0,App’s package name,Review,Polarity
0,com.passesalliance.wallet,zoo belong say use app hold membership card fi...,0.3182
1,com.passesalliance.wallet,good simple effective update happened ui geniu...,0.9238
2,com.passesalliance.wallet,everything work layout seems old android versi...,0.5267
3,com.passesalliance.wallet,work,0.0
4,com.passesalliance.wallet,easy use,0.4404
5,com.passesalliance.wallet,updated recently midnov two thousand twenty-fo...,-0.1531
6,com.passesalliance.wallet,love,0.6369
7,com.passesalliance.wallet,app simple easy use love clean interface altho...,0.959
8,com.passesalliance.wallet,folk one thousand ten,0.0
9,com.passesalliance.wallet,use wear app upgrade please purchase phone,0.3182


<h3 style="font-size:2rem;">Sentiment Analysis Comparison</h3>

One can find various trends when we compare sentiment analysis data from tools such as TextBlob and VADER with an app's rating. App ratings usually reflect the thoughts of the collective user base. However, sentiment analysis technologies such as TextBlob and VADER evaluate the emotional content of individual reviews, providing a more detailed picture of user sentiment. One can frequently observe a relationship between sentiment analysis findings and app ratings. Sentiment analysis algorithms typically give higher-rated apps higher sentiment scores, while lower-rated apps may exhibit a greater prevalence of negative sentiment in user reviews. Nonetheless, disparities may emerge from a multitude of sources. For example, an app with a good overall rating may contain individual ratings that show discontent or specific difficulties. In such circumstances, sentiment analysis techniques may detect negative attitudes despite the app's overall positive rating. Conversely, an app with a lower rating may have some positive reviews highlighting specific features or experiences, leading to a more balanced sentiment analysis outcome. An in-depth knowledge of the rating distribution of the app as well as the attitude expressed in individual reviews is necessary to interpret these similarities and discrepancies. Though sentiment analysis offers more in-depth insights into the underlying feelings and ideas influencing those evaluations, app ratings still give a comprehensive picture of user happiness. Stakeholders can uncover opportunities for improvement and more research by combining the two methods to have a thorough grasp of user feedback.

<h3 style="font-size:2rem;">Prompt Engineering</h3>

a) The prompt that I would use for for GPT-3 sentiment evaluation is the following: "Please provide a sentiment analysis for the given the review: (insert review (preprocessed text review))."
b) I would make a table with the review text, the TextBlob sentiment polarity score, and the sentiment label (e.g., positive, negative, or neutral) in order to compare the sentiment produced by the GPT model with the results of TextBlob. After that, we can include a second column for the sentiment label that GPT-3 produced.
c) Similarly, I would make another table with the review text, Vader sentiment polarity score, and sentiment label in order to compare the sentiment produced by the GPT model with the results of Vader. I will once more add a column to represent the sentiment label produced by GPT-3 and contrast it with Vader's labels. This comparison will shed light on how well the sentiment analysis carried out by GPT-3 matches the Vader findings.


GPT-3 vs. TextBlob Examples

In [27]:
data = {
    'App Package Name': ['de.stocard.stocard', 'com.froogloid.kring.google.zxing.client.android', 'fr.snapp.fidme'],
    #'Review Text': ['Great option', 'Never a problem, easy to use, very helpful. I tell my friends', 'I was unable to scan it in any of the shops I went to.'],
    'Preprocessed Text': ['great option', 'never problem easy use helpful tell friend', 'unable scan shop went'],
    'TextBlob Polarity': [0.800000, 0.433333, -0.500000],
    'GPT-3 Sentiment': ['positive', 'positive', 'negative']
}

df = pd.DataFrame(data)
print(df.to_string(index=False))


                               App Package Name                          Preprocessed Text  TextBlob Polarity GPT-3 Sentiment
                             de.stocard.stocard                               great option           0.800000        positive
com.froogloid.kring.google.zxing.client.android never problem easy use helpful tell friend           0.433333        positive
                                 fr.snapp.fidme                      unable scan shop went          -0.500000        negative


GPT-3 vs. Vader Examples

In [28]:
data = {
    'App Package Name': ['de.stocard.stocard', 'fr.snapp.fidme', 'com.passesalliance.wallet'],
    'Preprocessed Text': ['convenient', 'junk freezing phone watch', 'uninstalled within two min awful'],
    'Vader': [0.0000, -0.1027, -0.4588],
    'GPT-3 Sentiment': ['positive', 'negative', 'negative']
}

df = pd.DataFrame(data)
print(df.to_string(index=False))


         App Package Name                Preprocessed Text   Vader GPT-3 Sentiment
       de.stocard.stocard                       convenient  0.0000        positive
           fr.snapp.fidme        junk freezing phone watch -0.1027        negative
com.passesalliance.wallet uninstalled within two min awful -0.4588        negative


<h3 style="font-size:2rem;">Create LDA Model</h3>

In [29]:
!pip install --upgrade --force-reinstall numpy
!pip install --upgrade --force-reinstall gensim

Collecting numpy
  Downloading numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gensim 4.3.3 requires numpy<2.0,>=1.18.5, but you have numpy 2.2.4 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.2.4 which is incom

In [30]:
from gensim import corpora
from gensim.models import LdaModel

<h5 style="font-size:1rem;">Stocard - Rewards Cards Wallet</h5>

In [31]:
# Load preprocessed data from CSV
input_file = 'Stocard_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['split_reviews'] = df['Preprocessed Text'].str.split()
df.dropna(subset=['split_reviews'], inplace=True)

corpora_reviews = df['split_reviews'].tolist()
dictionary = corpora.Dictionary(corpora_reviews)
corpus = [dictionary.doc2bow(review) for review in corpora_reviews]

num_topics = 15
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, passes=12)

for topic in lda_model.print_topics(num_words=7):
    print(topic)


(0, '0.124*"really" + 0.079*"code" + 0.078*"better" + 0.053*"bar" + 0.050*"problem" + 0.048*"using" + 0.044*"app"')
(1, '0.124*"scanner" + 0.111*"key" + 0.046*"barcodes" + 0.043*"brilliant" + 0.041*"little" + 0.037*"chain" + 0.032*"needed"')
(2, '0.128*"card" + 0.075*"one" + 0.067*"wallet" + 0.053*"app" + 0.048*"place" + 0.045*"much" + 0.042*"handy"')
(3, '0.466*"great" + 0.241*"app" + 0.027*"cloud" + 0.025*"wallet" + 0.011*"must" + 0.009*"everyone" + 0.009*"car"')
(4, '0.072*"phone" + 0.069*"card" + 0.053*"get" + 0.047*"stocard" + 0.039*"wallet" + 0.029*"got" + 0.027*"new"')
(5, '0.342*"work" + 0.108*"well" + 0.043*"app" + 0.041*"excellent" + 0.027*"doesnt" + 0.026*"job" + 0.020*"time"')
(6, '0.340*"use" + 0.310*"easy" + 0.087*"convenient" + 0.065*"simple" + 0.019*"set" + 0.018*"cool" + 0.012*"user"')
(7, '0.058*"card" + 0.047*"app" + 0.020*"would" + 0.017*"time" + 0.015*"store" + 0.014*"like" + 0.014*"one"')
(8, '0.130*"card" + 0.061*"scan" + 0.060*"store" + 0.032*"like" + 0.027*"num

<h5 style="font-size:1rem;">Key Ring: Loyalty Card App</h5>

In [32]:
input_file = 'KeyRing_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['split_reviews'] = df['Preprocessed Text'].str.split()
df.dropna(subset=['split_reviews'], inplace=True)

corpora_reviews = df['split_reviews'].tolist()
dictionary = corpora.Dictionary(corpora_reviews)
corpus = [dictionary.doc2bow(review) for review in corpora_reviews]

num_topics = 15
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, passes=12)

for topic in lda_model.print_topics(num_words=7):
    print(topic)


(0, '0.121*"code" + 0.100*"bar" + 0.034*"fix" + 0.027*"please" + 0.018*"galaxy" + 0.017*"must" + 0.017*"ok"')
(1, '0.088*"nice" + 0.071*"yet" + 0.067*"havent" + 0.038*"app" + 0.028*"working" + 0.026*"really" + 0.024*"notification"')
(2, '0.112*"droid" + 0.058*"card" + 0.048*"tried" + 0.036*"scanned" + 0.031*"worked" + 0.028*"store" + 0.023*"retailer"')
(3, '0.062*"would" + 0.049*"great" + 0.038*"star" + 0.038*"five" + 0.038*"concept" + 0.030*"could" + 0.026*"idea"')
(4, '0.144*"scan" + 0.095*"scanner" + 0.074*"store" + 0.049*"screen" + 0.044*"barcode" + 0.043*"read" + 0.037*"cant"')
(5, '0.418*"work" + 0.118*"doesnt" + 0.093*"great" + 0.038*"well" + 0.029*"evo" + 0.026*"didnt" + 0.018*"tried"')
(6, '0.061*"app" + 0.038*"card" + 0.030*"update" + 0.023*"force" + 0.022*"time" + 0.021*"close" + 0.020*"try"')
(7, '0.073*"card" + 0.049*"like" + 0.045*"cool" + 0.037*"app" + 0.025*"htc" + 0.022*"one" + 0.021*"store"')
(8, '0.204*"great" + 0.141*"app" + 0.091*"idea" + 0.087*"key" + 0.050*"ring"

<h5 style="font-size:1rem;">FidMe Loyalty Cards & Cashback</h5>

In [33]:
input_file = 'FidMe_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['split_reviews'] = df['Preprocessed Text'].str.split()
df.dropna(subset=['split_reviews'], inplace=True)

corpora_reviews = df['split_reviews'].tolist()
dictionary = corpora.Dictionary(corpora_reviews)
corpus = [dictionary.doc2bow(review) for review in corpora_reviews]

num_topics = 15
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, passes=12)

for topic in lda_model.print_topics(num_words=7):
    print(topic)


(0, '0.074*"card" + 0.050*"app" + 0.027*"easy" + 0.024*"loyalty" + 0.021*"dont" + 0.019*"carry" + 0.018*"store"')
(1, '0.049*"de" + 0.022*"et" + 0.013*"le" + 0.013*"carte" + 0.011*"je" + 0.011*"card" + 0.011*"cant"')
(2, '0.058*"card" + 0.045*"work" + 0.040*"app" + 0.020*"scan" + 0.015*"doesnt" + 0.014*"great" + 0.013*"barcode"')
(3, '0.064*"card" + 0.044*"app" + 0.011*"please" + 0.010*"watch" + 0.010*"update" + 0.010*"add" + 0.010*"get"')
(4, '0.046*"app" + 0.022*"work" + 0.021*"good" + 0.020*"card" + 0.014*"time" + 0.013*"logo" + 0.011*"account"')
(5, '0.031*"work" + 0.026*"card" + 0.025*"use" + 0.020*"app" + 0.017*"tried" + 0.017*"useful" + 0.016*"scan"')
(6, '0.022*"card" + 0.017*"good" + 0.010*"number" + 0.009*"use" + 0.009*"dont" + 0.009*"discount" + 0.009*"different"')
(7, '0.036*"great" + 0.022*"card" + 0.018*"app" + 0.016*"use" + 0.013*"way" + 0.012*"lot" + 0.010*"language"')
(8, '0.041*"card" + 0.035*"app" + 0.022*"phone" + 0.016*"wallet" + 0.015*"time" + 0.015*"store" + 0.01

<h3 style="font-size:1rem;">Topic Modelling</h3>

<h5 style="font-size:1rem;">Pass2U Wallet - digitize cards</h5>

In [34]:
input_file = 'Pass2U_reviews_preprocessed_reviews.csv'
df = pd.read_csv(input_file)

df['split_reviews'] = df['Preprocessed Text'].str.split()
df.dropna(subset=['split_reviews'], inplace=True)

corpora_reviews = df['split_reviews'].tolist()
dictionary = corpora.Dictionary(corpora_reviews)
corpus = [dictionary.doc2bow(review) for review in corpora_reviews]

num_topics = 15
lda_model = LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, passes=12)

for topic in lda_model.print_topics(num_words=7):
    print(topic)


(0, '0.027*"card" + 0.023*"app" + 0.012*"ticket" + 0.012*"add" + 0.010*"brilliant" + 0.010*"really" + 0.009*"would"')
(1, '0.033*"app" + 0.024*"like" + 0.018*"background" + 0.016*"best" + 0.014*"wallet" + 0.014*"pass" + 0.014*"scan"')
(2, '0.034*"app" + 0.030*"convenient" + 0.027*"handy" + 0.022*"time" + 0.020*"one" + 0.018*"get" + 0.012*"ten"')
(3, '0.035*"app" + 0.021*"add" + 0.020*"ticket" + 0.018*"boarding" + 0.017*"pas" + 0.015*"like" + 0.014*"pass"')
(4, '0.083*"good" + 0.048*"app" + 0.048*"great" + 0.021*"cant" + 0.021*"version" + 0.019*"restore" + 0.016*"card"')
(5, '0.105*"use" + 0.101*"easy" + 0.062*"work" + 0.029*"well" + 0.012*"app" + 0.011*"dont" + 0.010*"helpful"')
(6, '0.041*"pas" + 0.038*"app" + 0.029*"boarding" + 0.027*"awesome" + 0.019*"use" + 0.018*"easy" + 0.017*"work"')
(7, '0.022*"app" + 0.017*"work" + 0.016*"pass" + 0.012*"far" + 0.011*"ticket" + 0.010*"cant" + 0.010*"passbook"')
(8, '0.034*"love" + 0.027*"app" + 0.017*"pas" + 0.012*"great" + 0.011*"used" + 0.011

<h5 style="font-size:1rem;">Stocard - Rewards Cards Wallet</h5>

In [35]:
data = {
    'Functionality': [
        'Digitize Your Rewards Cards',
        'Collect Rewards Points in Stocard',
        'Discover Exclusive Offers',
        'Use Advanced Features'
    ],
    'Related Topic IDs': [
        '0, 12',
        '3, 11',
        '5, 10',
        '2, 8'
    ]
}

df = pd.DataFrame(data)
pd.set_option('display.max_colwidth', None)
print(df.to_string(index=False))

                    Functionality Related Topic IDs
      Digitize Your Rewards Cards             0, 12
Collect Rewards Points in Stocard             3, 11
        Discover Exclusive Offers             5, 10
            Use Advanced Features              2, 8


<h5 style="font-size:1rem;">Key Ring: Loyalty Card App</h5>

In [36]:
data = {
    'Functionality': [
        'Barcode Scanner',
        'Loyalty Card Database',
        'Remote Cloud Backup',
        'Favorites and Shopping Lists',
        'Sharing and Notifications'
    ],
    'Related Topic IDs': [
        '5, 11',
        '2, 11',
        '7, 13',
        '2, 9, 10',
        '8, 10'
    ]
}

df = pd.DataFrame(data)
pd.set_option('display.max_colwidth', None)
print(df.to_string(index=False))

               Functionality Related Topic IDs
             Barcode Scanner             5, 11
       Loyalty Card Database             2, 11
         Remote Cloud Backup             7, 13
Favorites and Shopping Lists          2, 9, 10
   Sharing and Notifications             8, 10


<h5 style="font-size:1rem;">FidMe Loyalty Cards & Cashback</h5>

In [37]:
data = {
    'Functionality': [
        'Digitize Your Loyalty Cards',
        'Organize Your Paper Receipts',
        'Discover Deals and Discounts'
    ],
    'Related Topic IDs': [
        '0, 4, 7, 9, 11, 12, 13',
        '2, 6',
        '1, 3, 5, 8, 10, 14'
    ]
}
df = pd.DataFrame(data)
pd.set_option('display.max_colwidth', None)
print(df.to_string(index=False))

               Functionality      Related Topic IDs
 Digitize Your Loyalty Cards 0, 4, 7, 9, 11, 12, 13
Organize Your Paper Receipts                   2, 6
Discover Deals and Discounts     1, 3, 5, 8, 10, 14


<h5 style="font-size:1rem;">Pass2U Wallet - digitize cards</h5>

In [38]:
data = {
    'Functionality': [
        'Collect and manage membership cards, coupons, event tickets, movies tickets, transport cards, and etc.',
        'Support QR Code, Aztec, PDF417 2D barcodes, and Code 128 1D barcode.',
        'Show the relevant passes on the lock screen according to current location or time.',
        'Support for iBeacon.',
        'Support changing notifications of Apple Wallet pass.',
        'Localization of passes.',
        'Scan/Enter the barcodes on your cards or tickets to make passes and save them in Pass2U Wallet.',
        'Free Pass update API for Pass2U Wallet issuers.',
        'Google Drive backup and restore.',
        'Wear OS app support for pro user.'
    ],
    'Related Topic IDs': [
        '1, 4, 6, 7, 11, 12, 14',
        '3',
        '5',
        '3',
        '5',
        '6',
        '7',
        '8',
        '8',
        '10'
    ]
}

df = pd.DataFrame(data)
pd.set_option('display.max_colwidth', None)
print(df.to_string(index=False))

                                                                                         Functionality      Related Topic IDs
Collect and manage membership cards, coupons, event tickets, movies tickets, transport cards, and etc. 1, 4, 6, 7, 11, 12, 14
                                  Support QR Code, Aztec, PDF417 2D barcodes, and Code 128 1D barcode.                      3
                    Show the relevant passes on the lock screen according to current location or time.                      5
                                                                                  Support for iBeacon.                      3
                                                  Support changing notifications of Apple Wallet pass.                      5
                                                                               Localization of passes.                      6
       Scan/Enter the barcodes on your cards or tickets to make passes and save them in Pass2U Wallet.                

<h3 style="font-size:2rem;">About Topic Modelling</h3>

The topics comparing to the topics extracted from reviews of other apps (similar/competitor) is done above.

Topic modeling is a powerful technique that allows us to uncover hidden patterns and topics within large volumes of text data efficiently. By applying topic modeling to user reviews of mobile applications like Stocard, KeyRing, FidMe, and Pass2U, we can gain valuable insights into how users perceive these apps and how their functionalities align with the feedback provided.

Reviews from users of Stocard have been examined to extract a variety of themes, including the app's performance, ease of use, convenience, and digitization of loyalty cards. These subjects closely resemble the features that Stocard has been promoting on Google Play. For instance, Stocard's feature that lets users scan and save their loyalty cards digitally, clearing up space in their physical wallets and streamlining the rewards collection procedure, is a perfect fit with the highlighted subjects about digitizing rewards cards and earning rewards points.

In the same way, topic modeling provides information about how users perceive and interact with Key Ring, FidMe, and Pass2U.

User reviews on barcode scanning and loyalty card databases are consistent with Key Ring's capabilities of scanning barcode loyalty cards and offering remote cloud backup.

FidMe's emphasis on deal-finding, receipt organization, and card management is seen in its themes on finding special offers, managing cards, and dealings.

Features of Pass2U, like its simplicity of use and support for multiple pass kinds, align with discussions about the app's use and compatibility with various pass types.

When it comes to comprehending user input and evaluating how well an app's features match users' expectations and experiences, topic modeling is a useful technique. Developers can increase user satisfaction, fix bugs, and improve their apps by identifying recurring themes and issues in user evaluations.

In [39]:
def format_topics_sentences(ldamodel, corpus, texts):

    sent_topics_df = pd.DataFrame()


    for i, row in enumerate(ldamodel[corpus]):
        row = sorted(row, key=lambda x: (x[1]), reverse=True)

        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  # Dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                row_data = [int(topic_num), round(prop_topic, 4), topic_keywords]
                sent_topics_df = pd.concat([sent_topics_df, pd.DataFrame([row_data], columns=['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords'])], ignore_index=True)
                break

    sent_topics_df['Review'] = texts

    sent_topics_df = sent_topics_df.sort_values(by=['Dominant_Topic', 'Perc_Contribution'], ascending=[False, False])

    return sent_topics_df

df_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, corpus=corpus, texts=data)

print(df_topic_sents_keywords.head(1200))


      Dominant_Topic  Perc_Contribution  \
604               14             0.9783   
85                14             0.9689   
551               14             0.9641   
395               14             0.9556   
371               14             0.9533   
229               14             0.9481   
539               14             0.9417   
542               14             0.9417   
1002              14             0.9417   
368               14             0.9151   
744               14             0.9151   
471               14             0.9067   
532               14             0.9067   
283               14             0.8963   
112               14             0.8833   
422               14             0.8833   
684               14             0.8833   
56                14             0.8667   
822               14             0.8667   
247               14             0.8444   
375               14             0.8444   
416               14             0.8444   
394        

The review with the highest percentage contribution is review # 271.

The dominant topic is: 15 (because the first one starts from 0).

The highest percent contribution is: 0.9806.

The dominant topic #15 is: card, app, store, excellent, need, carry, wallet, phone, one, love.

The preprocessed review is: scanned store card day later wallet stolen convenient card barcodes qr code smartphone used virtual nectar card sainsburys selfcheckout tonight worked dream recommend stocard intuitive scanned physical card phone camera using scan interface built app even mini version card scanned problem thats one le thing carry around.