# LDA DEMO

In [4]:
# Import necessary libraries
import spacy
import gensim
from gensim import corpora
from pprint import pprint

# Load spaCy's English NLP model
nlp = spacy.load('en_core_web_sm')

# Sample documents for demonstration

In [5]:
documents = [
    "Death Stranding is a new strand-type game genre.",
    "Learning how to draw is a difficult task.",
    "Godot is a free open-source game engine.",
    "The Game Awards was a failure of an event.",
]

# Preprocess the documents

In [10]:
def preprocess(text):
    # Tokenize and lemmatize using spaCy
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return tokens

# Apply preprocessing to all documents
processed_documents = [preprocess(doc) for doc in documents]

# Create a dictionary and corpus for LDA
dictionary = corpora.Dictionary(processed_documents)
corpus = [dictionary.doc2bow(doc) for doc in processed_documents]

# Build LDA model
lda_model_12 = gensim.models.LdaModel(corpus, num_topics=12, id2word=dictionary, passes=15)

# Print topics and their keywords

In [11]:
print('12 Topics')
pprint(lda_model_12.print_topics())

12 Topics
[(0,
  '0.050*"Godot" + 0.050*"learn" + 0.050*"event" + 0.050*"Game" + '
  '0.050*"Awards" + 0.050*"source" + 0.050*"open" + 0.050*"free" + '
  '0.050*"engine" + 0.050*"death"'),
 (1,
  '0.125*"game" + 0.125*"stranding" + 0.125*"genre" + 0.125*"new" + '
  '0.125*"strand" + 0.125*"death" + 0.125*"type" + 0.010*"Awards" + '
  '0.010*"open" + 0.010*"event"'),
 (2,
  '0.050*"Godot" + 0.050*"learn" + 0.050*"event" + 0.050*"Game" + '
  '0.050*"Awards" + 0.050*"source" + 0.050*"open" + 0.050*"free" + '
  '0.050*"engine" + 0.050*"death"'),
 (3,
  '0.191*"failure" + 0.191*"event" + 0.191*"Game" + 0.191*"Awards" + '
  '0.015*"Godot" + 0.015*"source" + 0.015*"open" + 0.015*"free" + '
  '0.015*"engine" + 0.015*"death"'),
 (4,
  '0.050*"Godot" + 0.050*"learn" + 0.050*"event" + 0.050*"Game" + '
  '0.050*"Awards" + 0.050*"source" + 0.050*"open" + 0.050*"free" + '
  '0.050*"engine" + 0.050*"death"'),
 (5,
  '0.141*"Godot" + 0.141*"source" + 0.141*"open" + 0.141*"free" + '
  '0.141*"engine" +

# Assign topics to documents

In [13]:
for i, doc in enumerate(processed_documents):
    print(f"Document {i+1} - Topic: {lda_model_12.get_document_topics(corpus[i])}")

Document 1 - Topic: [(0, 0.010416683), (1, 0.8854164), (2, 0.010416683), (3, 0.010416683), (4, 0.010416683), (5, 0.010416769), (6, 0.010416683), (7, 0.010416683), (8, 0.010416683), (9, 0.010416683), (10, 0.010416683), (11, 0.010416683)]
Document 2 - Topic: [(0, 0.016666684), (1, 0.016666684), (2, 0.016666684), (3, 0.016666684), (4, 0.016666684), (5, 0.016666684), (6, 0.016666684), (7, 0.016666684), (8, 0.8166665), (9, 0.016666684), (10, 0.016666684), (11, 0.016666684)]
Document 3 - Topic: [(0, 0.011904778), (1, 0.011904866), (2, 0.011904778), (3, 0.011904778), (4, 0.011904778), (5, 0.86904734), (6, 0.011904778), (7, 0.011904778), (8, 0.011904778), (9, 0.011904778), (10, 0.011904778), (11, 0.011904778)]
Document 4 - Topic: [(0, 0.016666684), (1, 0.016666684), (2, 0.016666684), (3, 0.8166665), (4, 0.016666684), (5, 0.016666684), (6, 0.016666684), (7, 0.016666684), (8, 0.016666684), (9, 0.016666684), (10, 0.016666684), (11, 0.016666684)]


# Mini Exercise

Instructions:

Use the provided Python code to perform topic modeling on a set of sample documents.
Modify the sample documents or add your own to see how the results change.
Experiment with the number of topics (parameter: num_topics) in the LDA model. Observe how different numbers of topics impact the result

Make a small insight on what you have observe when you change, increase, or decrease some parameters.(Short Essay)

I found that when you experiment with different parameters in the LDA model, such as the number of topics, you can observe various insights into the topic structure of the sample documents. By changing the number of topics, you can either identify more specific topics within the documents or more general topics. By examining the words that are most likely to belong to a particular topic, you can gain insights into the underlying themes present in the documents.s.