In [1]:
import pandas as pd
import numpy as np
import altair as alt
from preprocessing import *
import spacy
nlp = spacy.load("en_core_web_sm")
import gensim
import gensim.corpora as corpora
from gensim.corpora import Dictionary
from gensim.models import LdaModel

In [2]:
# load pre survey data for 2021 
df = pd.read_excel("../data/raw-data/new_comp_data.xlsx")

In [3]:
df

Unnamed: 0,online_wksp_recomm,future_topics,slack_recom,faci_comments,advanced_project,workshop_style,slack_communication
0,Everything was great. Loved the breakout group...,Planning,Not sure....I'm getting answers from Donna rig...,Well done on the session. Bruce was able to co...,"Advanced Project Planning Tools (Charter, WBS,...",Online,Somewhat likely
1,Content and powerpoint slides were great! Easy...,Change Management and Project Risk Management,Easy to use and share files,I enjoy the humour and the open friendliness. ...,Project Leadership and Teambuilding;Change Man...,Online,Somewhat unlikely
2,It was a good seminar that will get better as ...,Have a hard time seeing how a more detailed wo...,None at this time,,Change Management;Advanced Project Planning To...,Online,Somewhat likely
3,I appreciated the fast pace of the workshop. ...,"Change Management, Communication",A lot like Teams! Easy to use.,"Bruce was a solid facilitator - humourous, con...",Project Leadership and Teambuilding;,Online,Somewhat unlikely
4,"Have the presentation 90 minutes, not 60 minut...",planning and closing project tools and change ...,,"I really liked some of the lines he used, for ...",Project Leadership and Teambuilding;Project Ri...,In person,Very likely
...,...,...,...,...,...,...,...
186,,,,,,,
187,Longer sessions,,,,,,
188,I would of loved to do the practice exercises ...,,,,,,
189,My struggle was the recommended practice exerc...,,,,,,


In [93]:
w_recom = pd.DataFrame()
w_recom= df[["online_wksp_recomm"]]

w_recom = w_recom.dropna(subset=['online_wksp_recomm'])
w_recom["pp_text"] = w_recom["online_wksp_recomm"].apply(preprocess)

w_recom["pp_text"] = [
    preprocess_spacy(spacy_text) for spacy_text in nlp.pipe(w_recom["pp_text"])
]
w_recom

Unnamed: 0,online_wksp_recomm,pp_text
0,Everything was great. Loved the breakout group...,great love breakout group appreciate pro group...
1,Content and powerpoint slides were great! Easy...,content powerpoint slide great easy follow clu...
2,It was a good seminar that will get better as ...,good seminar well need little tweaking comment
3,I appreciated the fast pace of the workshop. ...,appreciate fast pace workshop information clea...
4,"Have the presentation 90 minutes, not 60 minut...",presentation minute minute try cram informaito...
...,...,...
184,Content from the workshop is relevant and the ...,content workshop relevant template easy use ab...
187,Longer sessions,long session
188,I would of loved to do the practice exercises ...,love practice exercise busy work capacity
189,My struggle was the recommended practice exerc...,struggle recommend practice exercise time limi...


In [94]:
# Create a vocabulary for the lda model and
# convert our corpus into document-term matrix for Lda
preprocessed_corpus = [doc.split() for doc in w_recom["pp_text"].tolist()]
dictionary = corpora.Dictionary(preprocessed_corpus)
# Optional
# dictionary.filter_extremes(no_below=3, no_above=0.3, keep_n=100000)
len(dictionary)

doc_term_matrix = [dictionary.doc2bow(doc) for doc in preprocessed_corpus]
# print(doc_term_matrix[0])
d = [(dictionary[idx], freq) for (idx, freq) in doc_term_matrix[0]]
pd.DataFrame(d, columns=["word", "frequency"])


from gensim.models import CoherenceModel

K = [3, 4, 5, 6, 7, 8, 9, 10]

coherence_scores = []

for num_topics in K:
    lda = LdaModel(
        corpus=doc_term_matrix,
        id2word=dictionary,
        num_topics=num_topics,
        random_state=42,
        passes=10,
    )
    coherence_model_lda = CoherenceModel(
        model=lda, texts=preprocessed_corpus, dictionary=dictionary, coherence="c_v"
    )
    coherence_scores.append(coherence_model_lda.get_coherence())
    
coherence_df = pd.DataFrame(coherence_scores, index=K, columns=["Coherence score"])
coherence_df

Unnamed: 0,Coherence score
3,0.33252
4,0.357993
5,0.344084
6,0.356937
7,0.387454
8,0.371364
9,0.361812
10,0.359841


In [95]:
lda7 = LdaModel(
    corpus=doc_term_matrix,
    id2word=dictionary,
    num_topics=7,
    random_state=42,
    passes=50,
)
topic_table_1 = pd.DataFrame(lda7.print_topics(), columns=["Topic id", "Topic words"])
topic_table_1

Unnamed: 0,Topic id,Topic words
0,0,"0.016*""time"" + 0.014*""scenario"" + 0.014*""sampl..."
1,1,"0.038*""think"" + 0.027*""project"" + 0.026*""risk""..."
2,2,"0.053*""content"" + 0.050*""workshop"" + 0.035*""gr..."
3,3,"0.048*""breakout"" + 0.045*""room"" + 0.022*""examp..."
4,4,"0.032*""feel"" + 0.028*""like"" + 0.024*""time"" + 0..."
5,5,"0.037*""slide"" + 0.030*""people"" + 0.020*""sessio..."
6,6,"0.065*""time"" + 0.027*""breakout"" + 0.026*""sessi..."


In [96]:
lda7.print_topics()

[(0,
  '0.016*"time" + 0.014*"scenario" + 0.014*"sample" + 0.014*"minute" + 0.014*"information" + 0.013*"organization" + 0.012*"project" + 0.010*"content" + 0.010*"feel" + 0.009*"training"'),
 (1,
  '0.038*"think" + 0.027*"project" + 0.026*"risk" + 0.025*"slide" + 0.024*"management" + 0.020*"good" + 0.017*"great" + 0.017*"content" + 0.014*"level" + 0.014*"group"'),
 (2,
  '0.053*"content" + 0.050*"workshop" + 0.035*"great" + 0.020*"project" + 0.020*"helpful" + 0.018*"work" + 0.015*"use" + 0.015*"time" + 0.013*"good" + 0.013*"template"'),
 (3,
  '0.048*"breakout" + 0.045*"room" + 0.022*"example" + 0.020*"time" + 0.014*"work" + 0.014*"real" + 0.014*"break" + 0.014*"tool" + 0.013*"help" + 0.012*"think"'),
 (4,
  '0.032*"feel" + 0.028*"like" + 0.024*"time" + 0.023*"example" + 0.017*"learn" + 0.015*"think" + 0.015*"work" + 0.014*"relevant" + 0.014*"useful" + 0.010*"have"'),
 (5,
  '0.037*"slide" + 0.030*"people" + 0.020*"session" + 0.018*"workshop" + 0.017*"think" + 0.014*"time" + 0.014*"br

In [98]:
topic_table_1['Topics'] = ['Need sample content to follow, especially on risk management. Need to break down content into slides with more explanation for new comers to PM.',
                          'Great real life scenarios shared by Bruce, would like more sample scenarios', 
                          'Would love more examples or template projects using tools like asana or monday',
                           'Maybe have more discussions in breakout rooms about what tools are working for the audience',
                          'Overall well prepared and relevant examples but link to non-profit was unclear',
                          'Content on the slides easy to follow but would be nice to have a copy of it or PDF available',
                          'Need more time in the breakout room and for the activity as the rapport is slow in the beginning and most were unsure about how to proceed with the acitivity. Recommends better explanation of activity before breaking into rooms.']

In [99]:
topic_table_1 = topic_table_1.drop(columns=['Topic words', 'Topic id'])
topic_table_1
topic_table_1.to_csv('../results/tables/online_wksp_rec.csv', index=False)

#########################################################################################

In [17]:
ft = pd.DataFrame()
ft= df[["future_topics"]]

ft = ft.dropna(subset=['future_topics'])
ft["pp_text"] = ft["future_topics"].apply(preprocess)

ft["pp_text"] = [
    preprocess_spacy(spacy_text) for spacy_text in nlp.pipe(ft["pp_text"])
]
ft

Unnamed: 0,future_topics,pp_text
0,Planning,plan
1,Change Management and Project Risk Management,change management project risk management
2,Have a hard time seeing how a more detailed wo...,hard time see detailed workshop look work rand...
3,"Change Management, Communication",change management communication
4,planning and closing project tools and change ...,planning closing project tool change management
...,...,...
63,It would be great to get more in depth on all ...,great depth list topic change management great...
64,Project Communication planning.,project communication planning
65,Why this Non-Profits workshop is different to ...,non profits workshop different regular worksho...
66,"More depth using different pm tools, the mater...",depth different tool material great good optio...


In [19]:
# Create a vocabulary for the lda model and
# convert our corpus into document-term matrix for Lda
preprocessed_corpus = [doc.split() for doc in ft["pp_text"].tolist()]
dictionary = corpora.Dictionary(preprocessed_corpus)
# Optional
# dictionary.filter_extremes(no_below=3, no_above=0.3, keep_n=100000)
len(dictionary)

doc_term_matrix = [dictionary.doc2bow(doc) for doc in preprocessed_corpus]
# print(doc_term_matrix[0])
d = [(dictionary[idx], freq) for (idx, freq) in doc_term_matrix[0]]
pd.DataFrame(d, columns=["word", "frequency"])


from gensim.models import CoherenceModel

K = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

coherence_scores = []

for num_topics in K:
    lda = LdaModel(
        corpus=doc_term_matrix,
        id2word=dictionary,
        num_topics=num_topics,
        random_state=42,
        passes=10,
    )
    coherence_model_lda = CoherenceModel(
        model=lda, texts=preprocessed_corpus, dictionary=dictionary, coherence="c_v"
    )
    coherence_scores.append(coherence_model_lda.get_coherence())
    
coherence_df = pd.DataFrame(coherence_scores, index=K, columns=["Coherence score"])
coherence_df

Unnamed: 0,Coherence score
3,0.456864
4,0.439396
5,0.419979
6,0.412137
7,0.38671
8,0.428928
9,0.483799
10,0.424605
11,0.404669
12,0.4411


In [20]:
lda = LdaModel(
    corpus=doc_term_matrix,
    id2word=dictionary,
    num_topics=9,
    random_state=42,
    passes=50,
)
topic_table = pd.DataFrame(lda.print_topics(), columns=["Topic id", "Topic words"])
topic_table

Unnamed: 0,Topic id,Topic words
0,0,"0.074*""great"" + 0.039*""depth"" + 0.039*""topic"" ..."
1,1,"0.193*""project"" + 0.085*""tool"" + 0.025*""like"" ..."
2,2,"0.172*""planning"" + 0.068*""project"" + 0.046*""ev..."
3,3,"0.212*""communication"" + 0.044*""tools"" + 0.038*..."
4,4,"0.092*""project"" + 0.090*""planning"" + 0.073*""in..."
5,5,"0.330*""management"" + 0.186*""change"" + 0.149*""r..."
6,6,"0.043*""project"" + 0.035*""budget"" + 0.035*""topi..."
7,7,"0.045*""interest"" + 0.045*""high"" + 0.045*""leade..."
8,8,"0.083*""workshop"" + 0.043*""different"" + 0.043*""..."


In [22]:
lda.print_topics()

[(0,
  '0.074*"great" + 0.039*"depth" + 0.039*"topic" + 0.039*"change" + 0.039*"option" + 0.039*"organization" + 0.039*"list" + 0.039*"develop" + 0.039*"keep" + 0.039*"grow"'),
 (1,
  '0.193*"project" + 0.085*"tool" + 0.025*"like" + 0.025*"planning" + 0.025*"agile" + 0.020*"risk" + 0.013*"libre" + 0.013*"material" + 0.013*"methodology" + 0.013*"select"'),
 (2,
  '0.172*"planning" + 0.068*"project" + 0.046*"event" + 0.046*"workforce" + 0.005*"communication" + 0.005*"comment" + 0.005*"wbs" + 0.005*"close" + 0.005*"initation" + 0.005*"tool"'),
 (3,
  '0.212*"communication" + 0.044*"tools" + 0.038*"project" + 0.023*"closing" + 0.023*"like" + 0.023*"expand" + 0.023*"complex" + 0.023*"exercise" + 0.023*"believe" + 0.023*"pace"'),
 (4,
  '0.092*"project" + 0.090*"planning" + 0.073*"initiation" + 0.055*"work" + 0.037*"depth" + 0.037*"tool" + 0.022*"management" + 0.020*"agile" + 0.020*"feel" + 0.020*"case"'),
 (5,
  '0.330*"management" + 0.186*"change" + 0.149*"risk" + 0.100*"project" + 0.021*"

In [25]:
topic_table1 = pd.DataFrame()
topic_table1['Topics'] = ['More depth about project initiation, planning', 
                        'Change management and project risk management', 
                        'Project communication planning',
                        'Budget, financial planning (events, administrative costs)', 
                        'Other PM tools like MS project, project libre etc',
                        'Return on Investment analysis, or analysis of past projects',
                        'business writing and leadership',
                        'Difference between non-profit PM vs. regular PM']

In [26]:
topic_table1.to_csv('../results/tables/future_topics.csv', index=False)

##############################################################################################

In [27]:
sr = pd.DataFrame()
sr = df[["slack_recom"]]

sr = sr.dropna(subset=['slack_recom'])
sr["pp_text"] = sr["slack_recom"].apply(preprocess)

sr["pp_text"] = [
    preprocess_spacy(spacy_text) for spacy_text in nlp.pipe(sr["pp_text"])
]
sr

Unnamed: 0,slack_recom,pp_text
0,Not sure....I'm getting answers from Donna rig...,sure get answer donna perfect
1,Easy to use and share files,easy use share file
2,None at this time,time
3,A lot like Teams! Easy to use.,lot like team easy use
4,,
...,...,...
72,Any tool like Miro would be more useful,tool like miro useful
73,formulating small groups on the different head...,formulate small group different heading discus...
75,Nil,nil
78,Maybe do an exercise at the beginning involvin...,exercise beginning involve slack use know pres...


In [28]:
# Create a vocabulary for the lda model and
# convert our corpus into document-term matrix for Lda
preprocessed_corpus = [doc.split() for doc in sr["pp_text"].tolist()]
dictionary = corpora.Dictionary(preprocessed_corpus)
# Optional
# dictionary.filter_extremes(no_below=3, no_above=0.3, keep_n=100000)
len(dictionary)

doc_term_matrix = [dictionary.doc2bow(doc) for doc in preprocessed_corpus]
# print(doc_term_matrix[0])
d = [(dictionary[idx], freq) for (idx, freq) in doc_term_matrix[0]]
pd.DataFrame(d, columns=["word", "frequency"])


from gensim.models import CoherenceModel

K = [3, 4, 5, 6, 7, 8, 9, 10]

coherence_scores = []

for num_topics in K:
    lda = LdaModel(
        corpus=doc_term_matrix,
        id2word=dictionary,
        num_topics=num_topics,
        random_state=42,
        passes=10,
    )
    coherence_model_lda = CoherenceModel(
        model=lda, texts=preprocessed_corpus, dictionary=dictionary, coherence="c_v"
    )
    coherence_scores.append(coherence_model_lda.get_coherence())
    
coherence_df = pd.DataFrame(coherence_scores, index=K, columns=["Coherence score"])
coherence_df

Unnamed: 0,Coherence score
3,0.355649
4,0.419909
5,0.459364
6,0.392952
7,0.404107
8,0.466596
9,0.461996
10,0.472551


In [30]:
lda = LdaModel(
    corpus=doc_term_matrix,
    id2word=dictionary,
    num_topics=5,
    random_state=42,
    passes=50,
)
topic_table = pd.DataFrame(lda.print_topics(), columns=["Topic id", "Topic words"])
topic_table

Unnamed: 0,Topic id,Topic words
0,0,"0.064*""slack"" + 0.046*""good"" + 0.029*""people"" ..."
1,1,"0.063*""slack"" + 0.037*""think"" + 0.037*""work"" +..."
2,2,"0.027*""session"" + 0.027*""conversation"" + 0.019..."
3,3,"0.049*""work"" + 0.037*""use"" + 0.031*""zoom"" + 0...."
4,4,"0.057*""slack"" + 0.057*""time"" + 0.024*""session""..."


In [31]:
lda.print_topics()

[(0,
  '0.064*"slack" + 0.046*"good" + 0.029*"people" + 0.020*"lot" + 0.020*"engagement" + 0.020*"sure" + 0.020*"new" + 0.020*"document" + 0.020*"email" + 0.020*"microsoft"'),
 (1,
  '0.063*"slack" + 0.037*"think" + 0.037*"work" + 0.028*"participant" + 0.028*"time" + 0.028*"great" + 0.019*"use" + 0.019*"workshop" + 0.019*"print" + 0.019*"platform"'),
 (2,
  '0.027*"session" + 0.027*"conversation" + 0.019*"share" + 0.019*"chat" + 0.018*"time" + 0.018*"material" + 0.018*"day" + 0.018*"go" + 0.018*"print" + 0.018*"slack"'),
 (3,
  '0.049*"work" + 0.037*"use" + 0.031*"zoom" + 0.031*"tool" + 0.031*"different" + 0.031*"slack" + 0.025*"group" + 0.025*"easy" + 0.019*"like" + 0.019*"team"'),
 (4,
  '0.057*"slack" + 0.057*"time" + 0.024*"session" + 0.024*"necessary" + 0.024*"week" + 0.024*"prepare" + 0.024*"feel" + 0.024*"use" + 0.013*"lot" + 0.013*"like"')]

In [33]:
topic_table2 = pd.DataFrame()
topic_table2['Topics'] = ['Slack and emails are good/fine tools of communication',
                         'maybe do an exercise that involves slack communication to learn how to use it effectively',
                         'Invited to many slack channels so hard to keep up',
                         'slack is fine butfor zoom maybe create a process to encourage dialogue/conversation in the chat',
                         'post documents, handout any pre-reading to better prepare for the workshops']

In [34]:
topic_table2.to_csv("../results/tables/slack_Recm.csv", index=False)

##########################################################################################################

In [35]:
fc = pd.DataFrame()
fc = df[["faci_comments"]]

fc = fc.dropna(subset=['faci_comments'])
fc["pp_text"] = fc["faci_comments"].apply(preprocess)

fc["pp_text"] = [
    preprocess_spacy(spacy_text) for spacy_text in nlp.pipe(fc["pp_text"])
]
fc

Unnamed: 0,faci_comments,pp_text
0,Well done on the session. Bruce was able to co...,session bruce able convey thought use relevant...
1,I enjoy the humour and the open friendliness. ...,enjoy humour open friendliness welcome
2,,
3,"Bruce was a solid facilitator - humourous, con...",bruce solid facilitator humourous confident cl...
4,"I really liked some of the lines he used, for ...",like line example outline project start need
...,...,...
75,Thank you!,thank
78,Not on the facilitators but overall. The NGO ...,facilitator overall ngo sector leader manager ...
79,Diversifying the pool of facilitators will en...,diversify pool facilitator enrich quality work...
80,Instructors were very engaging and fun. Love t...,instructor engaging fun love commentary bill b...


In [36]:
# Create a vocabulary for the lda model and
# convert our corpus into document-term matrix for Lda
preprocessed_corpus = [doc.split() for doc in fc["pp_text"].tolist()]
dictionary = corpora.Dictionary(preprocessed_corpus)
# Optional
# dictionary.filter_extremes(no_below=3, no_above=0.3, keep_n=100000)
len(dictionary)

doc_term_matrix = [dictionary.doc2bow(doc) for doc in preprocessed_corpus]
# print(doc_term_matrix[0])
d = [(dictionary[idx], freq) for (idx, freq) in doc_term_matrix[0]]
pd.DataFrame(d, columns=["word", "frequency"])


from gensim.models import CoherenceModel

K = [3, 4, 5, 6, 7, 8, 9, 10]

coherence_scores = []

for num_topics in K:
    lda = LdaModel(
        corpus=doc_term_matrix,
        id2word=dictionary,
        num_topics=num_topics,
        random_state=42,
        passes=10,
    )
    coherence_model_lda = CoherenceModel(
        model=lda, texts=preprocessed_corpus, dictionary=dictionary, coherence="c_v"
    )
    coherence_scores.append(coherence_model_lda.get_coherence())
    
coherence_df = pd.DataFrame(coherence_scores, index=K, columns=["Coherence score"])
coherence_df

Unnamed: 0,Coherence score
3,0.512649
4,0.475277
5,0.411102
6,0.39375
7,0.41771
8,0.382787
9,0.460691
10,0.498621


In [37]:
lda = LdaModel(
    corpus=doc_term_matrix,
    id2word=dictionary,
    num_topics=3,
    random_state=42,
    passes=50,
)
topic_table = pd.DataFrame(lda.print_topics(), columns=["Topic id", "Topic words"])
topic_table

Unnamed: 0,Topic id,Topic words
0,0,"0.030*""comment"" + 0.016*""workshop"" + 0.016*""ex..."
1,1,"0.035*""good"" + 0.024*""knowledgeable"" + 0.024*""..."
2,2,"0.030*""great"" + 0.024*""facilitator"" + 0.024*""w..."


In [38]:
lda.print_topics()

[(0,
  '0.030*"comment" + 0.016*"workshop" + 0.016*"example" + 0.016*"good" + 0.012*"like" + 0.011*"keep" + 0.011*"fantastic" + 0.011*"bit" + 0.011*"quality" + 0.011*"approach"'),
 (1,
  '0.035*"good" + 0.024*"knowledgeable" + 0.024*"time" + 0.019*"bruce" + 0.018*"facilitator" + 0.018*"feel" + 0.013*"question" + 0.013*"session" + 0.013*"know" + 0.013*"pronoun"'),
 (2,
  '0.030*"great" + 0.024*"facilitator" + 0.024*"workshop" + 0.023*"thank" + 0.020*"experience" + 0.017*"think" + 0.017*"project" + 0.014*"example" + 0.014*"session" + 0.011*"appreciate"')]

In [40]:
table_topic3 = pd.DataFrame()
table_topic3['Topics'] = ['Great examples shared by Bruce, more examples and project sharing would help',
                         'Facilitator was very knowledgable, appreaciate the practice tricks and tips',
                         'Current facilitators are great, interest in inviting other facilitators to share/lead sections of learning to hear from other voices and experiences in the room ']

In [42]:
table_topic3.to_csv("../results/tables/fac_com.csv", index=False)

###################################################################################

In [43]:
ap = pd.DataFrame()
ap = df[["advanced_project"]]

ap = ap.dropna(subset=['advanced_project'])
ap["pp_text"] = ap["advanced_project"].apply(preprocess)

ap["pp_text"] = [
    preprocess_spacy(spacy_text) for spacy_text in nlp.pipe(ap["pp_text"])
]
ap

Unnamed: 0,advanced_project,pp_text
0,"Advanced Project Planning Tools (Charter, WBS,...",advanced project planning tools charter wbs et...
1,Project Leadership and Teambuilding;Change Man...,project leadership teambuildingchange management
2,Change Management;Advanced Project Planning To...,change managementadvanced project planning too...
3,Project Leadership and Teambuilding;,project leadership teambuilding
4,Project Leadership and Teambuilding;Project Ri...,project leadership teambuildingproject risk ma...
5,"Advanced Project Planning Tools (Charter, WBS,...",advanced project planning tools charter wbs et...
6,"Advanced Project Planning Tools (Charter, WBS,...",advanced project planning tools charter wbs et...
7,"Advanced Project Planning Tools (Charter, WBS,...",advanced project planning tools charter wbs etc
8,Change Management;Project Leadership and Teamb...,change managementproject leadership teambuilding
9,Change Management;Project Risk Management;,change managementproject risk management


In [44]:
# Create a vocabulary for the lda model and
# convert our corpus into document-term matrix for Lda
preprocessed_corpus = [doc.split() for doc in ap["pp_text"].tolist()]
dictionary = corpora.Dictionary(preprocessed_corpus)
# Optional
# dictionary.filter_extremes(no_below=3, no_above=0.3, keep_n=100000)
len(dictionary)

doc_term_matrix = [dictionary.doc2bow(doc) for doc in preprocessed_corpus]
# print(doc_term_matrix[0])
d = [(dictionary[idx], freq) for (idx, freq) in doc_term_matrix[0]]
pd.DataFrame(d, columns=["word", "frequency"])


from gensim.models import CoherenceModel

K = [3, 4, 5, 6, 7, 8, 9, 10]

coherence_scores = []

for num_topics in K:
    lda = LdaModel(
        corpus=doc_term_matrix,
        id2word=dictionary,
        num_topics=num_topics,
        random_state=42,
        passes=10,
    )
    coherence_model_lda = CoherenceModel(
        model=lda, texts=preprocessed_corpus, dictionary=dictionary, coherence="c_v"
    )
    coherence_scores.append(coherence_model_lda.get_coherence())
    
coherence_df = pd.DataFrame(coherence_scores, index=K, columns=["Coherence score"])
coherence_df

Unnamed: 0,Coherence score
3,0.493655
4,0.478783
5,0.477575
6,0.463217
7,0.459066
8,0.463672
9,0.478481
10,0.452858


In [45]:
lda = LdaModel(
    corpus=doc_term_matrix,
    id2word=dictionary,
    num_topics=5,
    random_state=42,
    passes=50,
)
topic_table = pd.DataFrame(lda.print_topics(), columns=["Topic id", "Topic words"])
topic_table

Unnamed: 0,Topic id,Topic words
0,0,"0.131*""leadership"" + 0.131*""management"" + 0.13..."
1,1,"0.103*""risk"" + 0.103*""management"" + 0.094*""pro..."
2,2,"0.104*""leadership"" + 0.102*""project"" + 0.090*""..."
3,3,"0.038*""managementproject"" + 0.038*""change"" + 0..."
4,4,"0.128*""project"" + 0.070*""managementproject"" + ..."


In [46]:
lda.print_topics()

[(0,
  '0.131*"leadership" + 0.131*"management" + 0.131*"teambuildingchange" + 0.130*"project" + 0.022*"managementproject" + 0.022*"change" + 0.022*"risk" + 0.022*"teambuilding" + 0.022*"wbs" + 0.022*"charter"'),
 (1,
  '0.103*"risk" + 0.103*"management" + 0.094*"project" + 0.083*"teambuildingproject" + 0.065*"leadership" + 0.057*"change" + 0.039*"etc.)project" + 0.037*"tools" + 0.037*"charter" + 0.037*"planning"'),
 (2,
  '0.104*"leadership" + 0.102*"project" + 0.090*"advanced" + 0.085*"wbs" + 0.085*"planning" + 0.085*"charter" + 0.085*"tools" + 0.069*"teambuilding" + 0.062*"etc.)project" + 0.047*"managementproject"'),
 (3,
  '0.038*"managementproject" + 0.038*"change" + 0.038*"leadership" + 0.038*"project" + 0.038*"management" + 0.038*"risk" + 0.038*"teambuilding" + 0.038*"charter" + 0.038*"planning" + 0.038*"wbs"'),
 (4,
  '0.128*"project" + 0.070*"managementproject" + 0.070*"leadership" + 0.070*"risk" + 0.070*"management" + 0.070*"tools" + 0.070*"charter" + 0.070*"planning" + 0.070

In [48]:
table_topic4 = pd.DataFrame()
table_topic4['Topic'] = ['Advanced Project Planning Tools (Charter, WBS, etc.)',
                         'Advanced Project Planning Tools (Charter, WBS, etc.)',
                         'Advanced Project Planning Tools (Charter, WBS, etc.)',
                         'Advanced Project Planning Tools (Charter, WBS, etc.)',
                         'Advanced Project Planning Tools (Charter, WBS, etc.)',
                         'Project Leadership and Teambuilding',
                         'Project Leadership and Teambuilding',
                         'Project Leadership and Teambuilding',
                         'Project Leadership and Teambuilding',
                         'Project Leadership and Teambuilding',
                         'Project Leadership and Teambuilding',
                         'Project Leadership and Teambuilding',
                         'Project Leadership and Teambuilding',
                         'Project Risk Management',
                         'Project Risk Management',
                         'Project Risk Management',
                         'Project Risk Management',
                         'Project Risk Management',
                         'Change Management',
                         'Change Management',
                         'Change Management',
                         'Change Management',
                         'Change Management',
                         'Change Management',
                         'Intro to platforms and software that support project management systems'
                         ]

In [50]:
bar = alt.Chart(table_topic4, title="Advanced topics of interest"
               ).transform_joinaggregate(
    total='count()',
    groupby=['Topic']
).transform_calculate(
    percent=alt.datum.total/11
).mark_bar().encode(
    x= alt.X('percent:Q', title = "Response percerntage (%)", axis=alt.Axis(format='%')),
    y=alt.Y('Topic', sort='x', title="")
    ).properties(
    width=400,
    height=150
    )

text = alt.Chart(table_topic4, title="Advanced topics of interest"
               ).transform_joinaggregate(
    total='count()',
    groupby=['Topic']
).transform_calculate(
    percent=alt.datum.total/11
).mark_text(
        align='right',
    ).encode(
        y = alt.Y('Topic', sort = 'x'),
        x = alt.X('percent:Q'),
        text=alt.Text('percent:Q',format='.1%')
).properties(
    width=400,
    height=150
    )

q1_plot = alt.layer(bar, text, data=table_topic4)

# Show the plot|
q1_plot

#################

In [84]:
x = df[['workshop_style']].dropna()
x['workshop_style'].unique()

array(['Online', 'In person'], dtype=object)

In [85]:
bar = alt.Chart(x, title="Preference for workshop style"
               ).transform_joinaggregate(
    total='count()',
    groupby=['workshop_style']
).transform_calculate(
    percent=alt.datum.total/13
).mark_bar().encode(
    x= alt.X('percent:Q', title = "Response percerntage (%)", axis=alt.Axis(format='%')),
    y=alt.Y('workshop_style', sort='x', title="", axis=alt.Axis(orient='left'))
    ).properties(
    width=400,
    height=150
    )

text = alt.Chart(x, title="Preference for workshop style"
               ).transform_joinaggregate(
    total='count()',
    groupby=['workshop_style']
).transform_calculate(
    percent=alt.datum.total/13
).mark_text(
        align='left', 
        dx = 5,
        baseline='middle'
    ).encode(
        y = alt.Y('workshop_style', sort = 'x', axis=alt.Axis(orient='left')),
        x = alt.X('percent:Q'),
        text=alt.Text('percent:Q',format='.1%')
).properties(
    width=500,
    height=100
    )

q2_plot = alt.layer(bar, text, data=x)

# Show the plot|
q2_plot

##############################################################################

In [89]:
y = df[['slack_communication']].dropna()
y['slack_communication'].unique()

array(['Somewhat likely', 'Somewhat unlikely', 'Very likely',
       'Very unlikely', 'Neither likely nor unlikely',
       'Lack of confidence with Slack'], dtype=object)

In [90]:
bar = alt.Chart(y, title="Preference to join/continue on slack"
               ).transform_joinaggregate(
    total='count()',
    groupby=['slack_communication']
).transform_calculate(
    percent=alt.datum.total/48
).mark_bar().encode(
    x= alt.X('percent:Q', title = "Response percerntage (%)", axis=alt.Axis(format='%')),
    y=alt.Y('slack_communication', sort='x', title="", axis=alt.Axis(orient='left'))
    ).properties(
    width=400,
    height=150
    )

text = alt.Chart(y, title="Preference to join/continue on slack"
               ).transform_joinaggregate(
    total='count()',
    groupby=['slack_communication']
).transform_calculate(
    percent=alt.datum.total/48
).mark_text(
        align='left', 
        dx = 5,
        baseline='middle'
    ).encode(
        y = alt.Y('slack_communication', sort = 'x', axis=alt.Axis(orient='left')),
        x = alt.X('percent:Q'),
        text=alt.Text('percent:Q',format='.1%')
).properties(
    width=500,
    height=100
    )

q3_plot = alt.layer(bar, text, data=y)

# Show the plot|
q3_plot