In [1]:
#Import der Bibliotheken & des spacy-Packets
import pandas as pd
import re
import spacy
import numpy as np
import gensim
from gensim.models.coherencemodel import CoherenceModel
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA, TruncatedSVD
from collections import Counter

nlp = spacy.load('en_core_web_sm')

In [2]:
#Kontrolle und Anpassen der Stoppwörter:
print("Original Stoppwörter:")
print(nlp.Defaults.stop_words)

Original Stoppwörter:
{'back', 'name', 'such', 'over', 'during', 'against', 'become', "'re", 'otherwise', 'none', 'also', 'until', '‘ll', 'below', 'was', 'indeed', 'hereafter', 'go', 'two', 'empty', 'them', 'however', 'thereupon', 'are', 'mostly', 'beforehand', 'just', 'others', 'per', 'am', 'him', 'not', 'enough', 'never', 'latter', 'serious', 'few', 'ours', 'between', 'have', 'five', 'seem', 'must', 'her', 'whither', 'yourselves', 'nobody', '’m', 'see', 'than', 'becomes', 'out', 'somehow', 'cannot', 'thru', 'hers', 'can', 'beyond', 'these', 'on', 'himself', 'say', 'might', 'well', 'its', 'where', 'which', 'herein', 'former', 'if', 'nevertheless', 'did', '’ll', '’ve', 'to', 'along', 'whereafter', 'something', 'seemed', 'whereas', 'the', 'is', 'through', 'everyone', 'he', 'toward', 'no', 'you', 'rather', 'behind', 'whence', 'above', 'we', 'same', 'do', 'who', 'each', 'wherever', 'nine', 'in', 'hereby', 'yet', 'hundred', 'further', 'either', 'else', 'does', 'front', 'wherein', 'or', 'to

In [3]:
additional_stop_words = {'card', 'sandisk', 'phone'}
nlp.Defaults.stop_words |= additional_stop_words

remove_stop_words = {'no', 'not'}
nlp.Defaults.stop_words -= remove_stop_words

print("Stoppwörter:")
print(nlp.Defaults.stop_words)

Stoppwörter:
{'back', 'name', 'such', 'over', 'during', 'against', 'become', "'re", 'otherwise', 'none', 'also', 'until', '‘ll', 'below', 'was', 'indeed', 'hereafter', 'go', 'two', 'empty', 'them', 'however', 'thereupon', 'are', 'mostly', 'beforehand', 'just', 'others', 'per', 'am', 'him', 'enough', 'never', 'latter', 'serious', 'few', 'ours', 'between', 'have', 'five', 'seem', 'must', 'her', 'whither', 'yourselves', 'nobody', '’m', 'see', 'than', 'becomes', 'out', 'somehow', 'cannot', 'thru', 'hers', 'can', 'beyond', 'these', 'on', 'himself', 'say', 'might', 'well', 'its', 'where', 'which', 'herein', 'former', 'if', 'nevertheless', 'did', '’ll', '’ve', 'to', 'along', 'whereafter', 'something', 'seemed', 'whereas', 'the', 'is', 'through', 'everyone', 'he', 'toward', 'you', 'rather', 'behind', 'whence', 'above', 'we', 'same', 'do', 'who', 'each', 'wherever', 'nine', 'in', 'hereby', 'yet', 'hundred', 'further', 'either', 'else', 'does', 'front', 'wherein', 'or', 'together', 'noone', 'any

In [4]:
#Vorverarbeitung des Textes:
def preprocess_text(text):
    #Textformat muss ein String sein:
    if not isinstance(text, str):
        text = ""
    #Sonderzeichen entfernen:
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    #Umwandlung in Kleinbuchstaben: 
    doc = nlp(text.lower())
    #Stoppwörter entfernen und Lemmatisierung:
    tokens = [token.lemma_ for token in doc if not token.is_stop]
    return ' '.join(tokens)

In [5]:
#CSV-Datei einlesen & ausführen der definierten Vorverarbeitung:
csv_datei = 'amazon_reviews_I.csv'

try:
    df = pd.read_csv(csv_datei, on_bad_lines='skip')
    print("Einlesen erfolgreich.")
except FileNotFoundError:
    print(f"Fehler: Datei unter '{csv_datei}' nicht gefunden.")
except pd.errors.ParserError:
    print("Fehler beim Parsen der CSV-Datei. Bitte überprüfen Sie das Dateiformat.")
except Exception as e:
    print(f"Ein unerwarteter Fehler ist aufgetreten: {e}")
    
if 'df' in locals():
    #Auswahl der Spalte 'reviewText'
    if 'reviewText' in df.columns:
        #Überprüfen und Bereinigen der Daten:
        df['reviewText'] = df['reviewText'].astype(str)
        df['cleaned_reviewText'] = df['reviewText'].apply(preprocess_text)
    else:
        print("Fehler")

Einlesen erfolgreich.


In [6]:
#Implementierung der BoW-Methode mittels scikit-Learn:
if 'cleaned_reviewText' in df.columns:
    vectorizer = CountVectorizer()
    X_bow = vectorizer.fit_transform(df['cleaned_reviewText'])
else:
    print("Fehler")

In [7]:
#Implementierung der BoW-Methode mittels numpy:
def bow_with_numpy(texts):
    #Erstellen des Vokabulars
    vocab = list(set(" ".join(texts).split()))
    vocab.sort()
    vocab_dict = {word: idx for idx, word in enumerate(vocab)}
    
    #Erstellen der BoW-Matrix
    bow_matrix = np.zeros((len(texts), len(vocab)))
    for i, text in enumerate(texts):
        word_count = Counter(text.split())
        for word, count in word_count.items():
            if word in vocab_dict:
                bow_matrix[i, vocab_dict[word]] = count
    return bow_matrix, vocab

if 'cleaned_reviewText' in df.columns:
    bow_matrix_np, vocab_np = bow_with_numpy(df['cleaned_reviewText'])
else:
    print("Fehler")

In [8]:
#Anzeigen der beiden erstellten Vektoren:
#scikit-Learn:
if 'X_bow' in locals():
    print("BoW-Vektoren (scikit-learn):")
    print(X_bow.toarray())
else:
    print("Fehler")

#numpy:
if 'bow_matrix_np' in locals():
    print("BoW-Vektoren (numpy):")
    print(bow_matrix_np)
else:
    print("Fehler")

BoW-Vektoren (scikit-learn):
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
BoW-Vektoren (numpy):
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [9]:
#Implementierung der Tf-idf-Methode:
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['cleaned_reviewText'])

In [10]:
#Funktion zum Anzeigen der Themen:
def print_topics(model, vectorizer, top_n=10):
    words = vectorizer.get_feature_names_out()
    for idx, topic in enumerate(model.components_):
        print(f"Topic {idx}:")
        topic_words = [words[i] for i in topic.argsort()[:-top_n - 1:-1] if i < len(words)]
        print(" ".join(topic_words)) 

#Häufigste Themen mittels LSA:
n_topics = 3

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

In [11]:
#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.7297212921835815
LDA Coherence-Score: 0.5727879294566075


In [12]:
#Häufigste Themen mittels LSA:
n_topics = 4

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.7175079177344561
LDA Coherence-Score: 0.5098698133379772


In [13]:
#Häufigste Themen mittels LSA:
n_topics = 5

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.6789818732117567
LDA Coherence-Score: 0.5725315427436661


In [14]:
#Häufigste Themen mittels LSA:
n_topics = 6

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.7054908253615059
LDA Coherence-Score: 0.6779561882538624


In [15]:
#Häufigste Themen mittels LSA:
n_topics = 7

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.7220728892583133
LDA Coherence-Score: 0.6161614261493206


In [16]:
#Häufigste Themen mittels LSA:
n_topics = 8

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.71978819600301
LDA Coherence-Score: 0.4976688488594736


In [17]:
#Häufigste Themen mittels LSA:
n_topics = 9

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.7119185436527763
LDA Coherence-Score: 0.5247521596564902


In [18]:
#Häufigste Themen mittels LSA:
n_topics = 10

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.6666381583967793
LDA Coherence-Score: 0.5608451153894137


In [19]:
#Häufigste Themen mittels LSA:
n_topics = 11

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.6780945350992795
LDA Coherence-Score: 0.514846427000723


In [20]:
#Häufigste Themen mittels LSA:
n_topics = 12

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.6410692530401212
LDA Coherence-Score: 0.45025296141795645


In [21]:
#Häufigste Themen mittels LSA:
n_topics = 13

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.6087004739276651
LDA Coherence-Score: 0.4217297128186494


In [22]:
#Häufigste Themen mittels LSA:
n_topics = 14

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.621554438480831
LDA Coherence-Score: 0.5312768740209545


In [23]:
#Häufigste Themen mittels LSA:
n_topics = 15

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.6149188633005382
LDA Coherence-Score: 0.43836645263402835


In [24]:
#Häufigste Themen mittels LSA:
n_topics = 16

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.5684022502488646
LDA Coherence-Score: 0.5081699573959502


In [25]:
#Häufigste Themen mittels LSA:
n_topics = 17

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.6072140441162283
LDA Coherence-Score: 0.43860700914416073


In [26]:
#Häufigste Themen mittels LSA:
n_topics = 18

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.5651366734664416
LDA Coherence-Score: 0.40078863012475013


In [27]:
#Häufigste Themen mittels LSA:
n_topics = 19

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.5549158277418408
LDA Coherence-Score: 0.39990911823655356


In [28]:
#Häufigste Themen mittels LSA:
n_topics = 20

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

else:
    print("Fehler")

#Berechnung des Coherence-Scores für LSA: 
lsa_topic_words = []
for topic_idx, topic in enumerate(lsa_model.components_):
    lsa_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

dictionary = gensim.corpora.Dictionary([text.split() for text in df['cleaned_reviewText']])
corpus = [dictionary.doc2bow(text.split()) for text in df['cleaned_reviewText']]

lsa_coherence_model = CoherenceModel(topics=lsa_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lsa_coherence_score = lsa_coherence_model.get_coherence()

#Berechnung des Coherence-Scores für LDA: 
lda_topic_words = []
for topic_idx, topic in enumerate(lda_model.components_):
    lda_topic_words.append([tfidf_vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-10 - 1:-1]])

lda_coherence_model = CoherenceModel(topics=lda_topic_words, texts=[text.split() for text in df['cleaned_reviewText']], dictionary=dictionary, coherence='c_v')
lda_coherence_score = lda_coherence_model.get_coherence()

#Ausgabe der berechneten Scores für LSA & LDA:
print(f"LSA Coherence-Score: {lsa_coherence_score}")
print(f"LDA Coherence-Score: {lda_coherence_score}")

LSA Coherence-Score: 0.5547395176102348
LDA Coherence-Score: 0.43644073872704603


In [31]:
#Häufigste Themen mittels LSA:
n_topics = 6

if 'tfidf_matrix' in locals():
    lsa_model = TruncatedSVD(n_components=n_topics, random_state=42)
    lsa_topic_matrix = lsa_model.fit_transform(tfidf_matrix)
    
    print("LSA Topics:")
    print_topics(lsa_model, vectorizer)
    
#Häufigste Themen mittels LDA:
    lda_model = LDA(n_components=n_topics, random_state=42)
    lda_topic_matrix = lda_model.fit_transform(tfidf_matrix)

    print("LDA Topics:")
    print_topics(lda_model, vectorizer)
else:
    print("Fehler")

LSA Topics:
Topic 0:
nan tablet price sd good micro stuff time speed lag
Topic 1:
work great buy sd samsung memory no galaxy use product
Topic 2:
sd micro product card recommend transfer samsung galaxy adapter great
Topic 3:
good no gb price fast speed ve month sd issue
Topic 4:
no issue problem video month ve camera transfer hd record
Topic 5:
tablet samsung memory tab gb expand no purchase galaxy happy
LDA Topics:
Topic 0:
ve work card problem buy no gopro tablet hero month
Topic 1:
price no great work issue memory good sd tablet storage
Topic 2:
nan speak feature car key entire rt microsdxc hold drive
Topic 3:
work samsung galaxy great speed use not format buy space
Topic 4:
great work product sd camera micro want adapter happy recommend
Topic 5:
work fast buy memory video far smartphone good stuff sd


In [32]:
#Funktion zum Zuweisen der Themen zu den Dokumenten:
def assign_topics(topic_matrix, n_top_documents=5):
    assignments = {}
    for topic_idx, topic in enumerate(topic_matrix.T):
        assignments[f'Topic {topic_idx}'] = []
        top_document_indices = topic.argsort()[:-n_top_documents - 1:-1]
        for doc_index in top_document_indices:
            assignments[f'Topic {topic_idx}'].append(df['cleaned_reviewText'].iloc[doc_index])
    return assignments

#Funktion zum Erstellen eines DataFrames:
def create_topic_dataframe(model, vectorizer, n_top_words=10):
    words = vectorizer.get_feature_names_out()
    topics = []
    for topic_idx, topic in enumerate(model.components_):
        topic_words = [words[i] for i in topic.argsort()[:-n_top_words - 1:-1] if i < len(words)]
        topics.append(f"Topic {topic_idx + 1}: " + ", ".join(topic_words))
    return pd.DataFrame(topics, columns=["Top Words"])

if 'lsa_topic_matrix' in locals() and 'lda_topic_matrix' in locals():
    #Dokumentzuweisungen für LSA:
    print("Dokumentenzuweisungen für LSA:")
    lsa_assignments = assign_topics(lsa_topic_matrix)
    lsa_assignments_df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in lsa_assignments.items()]))
    display(lsa_assignments_df)

    #Dokumentzuweisungen für LDA:
    print("Dokumentenzuweisungen für LDA:")
    lda_assignments = assign_topics(lda_topic_matrix)
    lda_assignments_df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in lda_assignments.items()]))
    display(lda_assignments_df)

    #DataFrames für die Themen:
    lsa_topics_df = create_topic_dataframe(lsa_model, vectorizer)
    lda_topics_df = create_topic_dataframe(lda_model, vectorizer)

    print("LSA Topics:")
    display(lsa_topics_df)

    print("LDA Topics:")
    display(lda_topics_df)
else:
    print("Fehler")

Dokumentenzuweisungen für LSA:


Unnamed: 0,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5
0,,great quality product purchase gopro hero wo...,micro sd work great describe beware micro sd a...,good price,no issue,purchase expand memory tablet work tablet ca...
1,,purchase use samsung camera work great picture...,great quality product purchase gopro hero wo...,good value money class rate fast speed good ...,buy surface pro ve month ve no problem fast st...,instal samsung tablet tab definitely perfor...
2,,work great galaxy s not need format pop work g...,ultra gb micro sd xc perfect card,ultra gb memory show no sign exactly need go...,item ve month record hd video gopro hero problem,purchase use samsung tablet work perfectly sto...
3,,buy new samsung galaxy tab work great allow ...,read write speed well samsung sd previously he...,good product get tired take picture memory goo...,store record video information dash camera n...,expand samsung tab tablet gb gb work fast no...
4,,purchase use samsung tablet work perfectly sto...,gb micro sd class reliable sd amazing transf...,class speed way fly gb usable space think good,month galaxy s no issue plenty fast transfer c...,second gb microsdhc card buy instal samsun...


Dokumentenzuweisungen für LDA:


Unnamed: 0,Topic 0,Topic 1,Topic 2,Topic 3,Topic 4,Topic 5
0,purchase version expand memory gb android tabl...,review counting need justification buy loo...,,work expect high transfer speed nice extra...,month work finei sd encrypt little bit long tr...,come nice little adapter microsdhc use normal ...
1,not micro sd work not buy install work fine fi...,fast microsd compare class speed look nice w...,,initially insert samsung galaxy s recognize no...,year go strong boost memory tablet store movie...,read write speed well samsung sd previously he...
2,job fairly cheap worth return exchange defecti...,skip beat file transfer speedy corruption issu...,,second gb microsdhc card buy instal samsun...,company trust offer good product low pricecome...,standard micro sd recommend purchase dad expan...
3,expand samsung tab tablet gb gb work fast no...,product supply describe gb micro sdhc class ...,,load home movie samsung galaxy tab copy proc...,work fine instal new ms surface protoo bad win...,purchase new nokia lumia smart recently purc...
4,ve order sd card good overall shipping excelle...,get gb fill keep get insufficient disk space...,,amazed speed sd smart hd video no long jump bu...,arrive quickly perfect condition work camera...,work expect spring high capacity think bit c...


LSA Topics:


Unnamed: 0,Top Words
0,"Topic 1: nan, tablet, price, sd, good, micro, ..."
1,"Topic 2: work, great, buy, sd, samsung, memory..."
2,"Topic 3: sd, micro, product, card, recommend, ..."
3,"Topic 4: good, no, gb, price, fast, speed, ve,..."
4,"Topic 5: no, issue, problem, video, month, ve,..."
5,"Topic 6: tablet, samsung, memory, tab, gb, exp..."


LDA Topics:


Unnamed: 0,Top Words
0,"Topic 1: ve, work, card, problem, buy, no, gop..."
1,"Topic 2: price, no, great, work, issue, memory..."
2,"Topic 3: nan, speak, feature, car, key, entire..."
3,"Topic 4: work, samsung, galaxy, great, speed, ..."
4,"Topic 5: great, work, product, sd, camera, mic..."
5,"Topic 6: work, fast, buy, memory, video, far, ..."
