In [2]:
import pandas as pd
from bertopic import BERTopic

In [None]:
!conda-forge install bertopic
#!conda-forge install hdbscan

In [3]:
df = pd.read_csv('data/question_answers.csv')

In [15]:
import re

def replace_words_in_comments(df, replacements, column_name='Q1'):
    """
    Replaces specified words or combinations of words in the 'comment' column of a DataFrame.

    Parameters:
    - df: pandas.DataFrame with a 'comment' column.
    - replacements: A dictionary where keys are words or phrases to replace (accepts regex),
                    and values are the replacement words.

    Returns:
    - A pandas.DataFrame with the replacements made.
    """
    # Ensure the 'comment' column is a string to avoid errors with replacement
    df[column_name] = df[column_name].astype(str)
    
    # Apply replacements
    for target, replacement in replacements.items():
        df[column_name] = df[column_name].apply(lambda x: re.sub(target, replacement, x, flags=re.IGNORECASE))

    return df

# Example usage
replacements = {
    'AI Singapore': 'my organization', 'ai singapore': 'my organization' 
    # Add more replacements as needed
}

# Edxecute the function
df = replace_words_in_comments(df, replacements, column_name='Q1')
df = replace_words_in_comments(df, replacements, column_name='Q2')

In [16]:
df

Unnamed: 0,Q1,Q2,Camaraderie,Credibility,Fairness,Respect,Pride in work,topic
0,"As the Quality Assurance Manager at my organization, I appreciate the collaborative work culture and the supportive team environment. The company encourages innovation and provides opportunities for personal and professional growth.","While overall I enjoy my time at my organization, there is always room for improvement. One area that could be better is the work-life balance. As a Quality Assurance Manager, the workload can be demanding at times, and it would be beneficial to have more measures in place to ensure a healthy work-life balance for employees.",1.0,4.0,4.0,4.0,4.0,0
1,"As the Product Manager of my organization, I really appreciate the collaborative work culture that exists here. The team is supportive, inclusive, and encourages open communication. We celebrate diversity and individuals are given the autonomy to contribute their unique skills and ideas.","In terms of improvements, I believe that my organization can further enhance employee well-being by focusing on work-life balance initiatives. While the company values innovation and productivity, it's equally important to provide avenues for relaxation and personal time to avoid burnout.",1.0,4.0,4.0,5.0,4.0,0
2,"I enjoy the collaborative environment at my organization, where team members work together to solve problems and support each other in achieving our goals. The company also provides opportunities for continuous learning and personal growth, which I value greatly.","As the Team Leader of Customer Support, one aspect that I think should be improved is the sense of isolation I sometimes feel. Due to the nature of my role, I often have to work independently and may not have as much interaction with other teams or departments. It would be beneficial to have more cross-team collaboration and communication to foster a stronger sense of belonging and connection within the company.",1.0,4.0,4.0,4.0,5.0,0
3,I appreciate the collaborative and innovative work environment at my organization. The teams here are always open to sharing ideas and working together to solve challenges.,"To enhance the workplace, I believe there could be more opportunities for skill development and career growth. Providing training programs and mentorship opportunities would greatly benefit employees and help them excel in their roles.",1.0,4.0,4.0,4.0,4.0,-1
4,I like the collaborative and innovative work culture at my organization. We have a strong team spirit and everyone is always willing to support and help each other.,"In order to make the workplace better, I believe we can improve the communication channels between different teams and departments. This would facilitate better information sharing and coordination, leading to more efficient and effective work processes.",1.0,4.0,4.0,4.0,5.0,-1
...,...,...,...,...,...,...,...,...
8509,I appreciate the opportunities for professional growth and learning at my organization. The company provides various training programs and resources for employees to enhance their skills and knowledge.,"However, I feel excluded at times due to a lack of collaboration within the team and limited involvement in decision-making processes. It would be better if there were more inclusive communication channels and opportunities for all employees to contribute and participate in discussions.",1.0,,,,,1
8510,I appreciate the collaborative and supportive work culture at my organization. There is a strong sense of teamwork and everyone is encouraged to share their ideas and expertise.,I believe our workplace could benefit from more opportunities for professional growth and development. It would be great to have more training programs or mentorship initiatives to help employees enhance their skills and advance in their careers.,1.0,,,,,-1
8511,"As a Customer Service Representative at my organization, I really appreciate the collaborative work culture. Everyone is encouraged to share their ideas and opinions, which fosters a sense of teamwork and innovation.","While my organization has a great work culture, I feel that there is room for improvement in terms of communication within the organization. Sometimes, important information or updates take longer than expected to reach everyone, which can lead to confusion and delays in our work processes.",1.0,,,,,-1
8512,"I appreciate the collaborative and innovative work culture at my organization. The team members are highly skilled, and there are ample opportunities for learning and growth.","While my organization is a great workplace, there is room for improvement in terms of communication and transparency. It would be beneficial to have more open and transparent communication channels and processes to ensure everyone is well-informed about the organization's goals and decisions.",1.0,,,,,0


In [18]:
# Importing libraries
from bertopic.representation import KeyBERTInspired
from umap import UMAP
from hdbscan import HDBSCAN


representation_model_3 = KeyBERTInspired(nr_repr_docs=10)
umap_model_3 = UMAP(n_neighbors=25, n_components=60, metric='cosine', low_memory=False) # Decreasing n_components
hdbscan_model_3 = HDBSCAN(min_cluster_size=10, metric='euclidean', prediction_data=True) 


# 12 topics
topic_model_3 = BERTopic(language="english", calculate_probabilities=True, 
                       verbose=True, 
                       umap_model=umap_model_3, 
                       hdbscan_model=hdbscan_model_3, 
                       nr_topics = 15, 
                       representation_model=representation_model_3
                       )

topics, probs = topic_model_3.fit_transform(df['Q1'])


# Topic Model 1 
freq = topic_model_3.get_topic_info(); freq.head(50)

2024-03-12 07:46:13,788 - BERTopic - Embedding - Transforming documents to embeddings.


Batches:   0%|          | 0/267 [00:00<?, ?it/s]

2024-03-12 07:46:50,462 - BERTopic - Embedding - Completed ✓
2024-03-12 07:46:50,463 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-03-12 07:46:59,564 - BERTopic - Dimensionality - Completed ✓
2024-03-12 07:46:59,565 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-03-12 07:47:03,755 - BERTopic - Cluster - Completed ✓
2024-03-12 07:47:03,755 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-03-12 07:47:09,948 - BERTopic - Representation - Completed ✓
2024-03-12 07:47:09,949 - BERTopic - Topic reduction - Reducing number of topics
2024-03-12 07:47:11,134 - BERTopic - Topic reduction - Reduced number of topics from 137 to 15


Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,1926,-1_collaboration_collaborative_teamwork_organization,"[collaboration, collaborative, teamwork, organization, colleagues, workplace, innovation, company, innovative, employees]","[I appreciate the collaborative and innovative environment at my organization. The team members are supportive and always willing to share their knowledge and expertise. The company also provides ample opportunities for learning and professional growth. , I enjoy the collaborative and innovative work environment at my organization. The team members are supportive and there is a strong emphasis on learning and development. I also appreciate the opportunities to work on cutting-edge technologi..."
1,0,4629,0_collaboration_collaborative_teamwork_colleagues,"[collaboration, collaborative, teamwork, colleagues, organization, team, supportive, appreciate, employees, members]","[I appreciate the collaborative and innovative work culture at my organization. The team members are supportive and always willing to help each other. , I appreciate the supportive and collaborative work environment at my organization. The team members are always willing to help and share knowledge, which fosters a strong sense of teamwork. , I appreciate the collaborative and supportive work environment at my organization. The team members are always willing to help each other and share kn..."
2,1,865,1_company_opportunities_industry_training,"[company, opportunities, industry, training, innovation, tech, advancements, organization, development, marketing]","[I appreciate the opportunities for growth and learning in my workplace. my organization provides various resources and training programs to enhance my skills and knowledge. , I appreciate the opportunities for learning and growth in my workplace. The company provides various training programs and resources to enhance my skills and knowledge. , As the IT Manager of my organization, what I like about my workplace is the strong focus on innovation and cutting-edge technologies. We have a team ..."
3,2,301,2_ai_innovation_tech_innovative,"[ai, innovation, tech, innovative, technologies, collaborative, projects, technology, organization, development]","[I appreciate the talented and dedicated team at my organization who are passionate about pushing the boundaries of AI technology and driving innovation in Singapore., I like the collaborative and innovative work culture at my organization. We have a diverse team of talented individuals who are dedicated to pushing the boundaries of AI technology., I like the collaborative and innovative work environment at my organization. We have the opportunity to work with talented individuals who are pa..."
4,3,170,3_analyst_company_junior_developer,"[analyst, company, junior, developer, role, development, organization, workplace, opportunities, professionals]","[As a Junior Developer at my organization, I like the opportunities I have to learn and grow in my role. The company provides a supportive environment where I can enhance my skills and knowledge. , I appreciate the opportunities for learning and growth at my organization. The company provides various training programs and resources to enhance my skills as a Junior Developer. , As a Junior Analyst at my organization, I appreciate the opportunities for growth and learning in my workplace. The ..."
5,4,131,4_intern_tech_industry_assistant,"[intern, tech, industry, assistant, organization, innovation, executive, company, opportunities, training]","[As an IT intern at my organization, I appreciate the opportunity to work with cutting-edge technology and gain valuable experience in the field. The company provides a conducive and supportive environment for learning and growth. , As a Sales Intern at my organization, I appreciate the opportunities to learn and grow in the tech industry. The company provides a supportive and collaborative work environment, which allows me to enhance my skills and contribute effectively to the team. , I app..."
6,5,94,5_analyst_collaborative_collaborate_workplace,"[analyst, collaborative, collaborate, workplace, colleagues, organization, company, technologies, projects, analytics]","[As a Systems Analyst at my organization, I enjoy the collaborative work environment and the opportunity to work with cutting-edge technologies. The company encourages knowledge sharing and learning, which is highly beneficial for personal and professional growth. , As a Data Analyst at my organization, I appreciate the supportive work culture that encourages continuous learning and growth. The company fosters a collaborative environment where team members are always willing to help and shar..."
7,6,90,6_intern_finance_financial_opportunities,"[intern, finance, financial, opportunities, tech, industry, innovation, company, innovative, develop]","[As the Finance Intern of my organization, I appreciate the opportunity to gain practical experience in the finance field within a technology company. I also value the strong emphasis on technology and innovation that my organization promotes. , As a Finance Intern at my organization, I appreciate the opportunity to gain hands-on experience in the tech industry. The company provides a supportive and inclusive work environment where I can learn and grow. , I appreciate the opportunity to gain..."
8,7,78,7_creativity_designer_collaborative_design,"[creativity, designer, collaborative, design, collaborate, designs, company, organization, innovation, creative]","[I enjoy the opportunity to work with cutting-edge technology and collaborate with talented individuals from diverse backgrounds. The innovative and supportive work culture at my organization allows me to constantly push creative boundaries and improve my skills as a graphic designer. , As a Graphic Designer at my organization, I appreciate the collaborative work environment and the opportunity to work with cutting-edge technology. , As a Graphic Designer at my organization, I appreciate the..."
9,8,65,8_answer1_answer__,"[answer1, answer, , , , , , , , ]","[<answer1>, <answer1>, <answer1>]"


Now I need to create a dictionary mapping the topics to the actual topic

In [26]:
# Extracting the required data to form a dictionary mapping topic numbers to a succinct 2-word summary of the topic name
topic_summary = {
    "-1": "Unclassified",
    "0": "collaboration & teamwork",
    "1": "career training & opportunities",
    "2": "ai innovation",
    "3": "growth & development",
    "4": "intern learning",
    "5": "analytics",
    "6": "finance",
    "7": "creative innovation",
    "8": "answer_error", 
    "9": "compliance and ethics",
    "10": "cto",
    "11": "workplace sharing",
    "12": "marketing promoting",
    "13": "worklife flexibility"
}


In [36]:
# Replace the numbers in the 'Q1_topics' column with the formatted strings
df['Q1_topics_named'] = df['Q1_topics'].apply(lambda x: f"Q1_{topic_summary[str(x)]}_{x}")


In [22]:
df['Q1_topics'] = topics

In [37]:
df

Unnamed: 0,Q1,Q2,Camaraderie,Credibility,Fairness,Respect,Pride in work,Q1_topics,Q1_topics_named,Q2_topics
0,"As the Quality Assurance Manager at my organization, I appreciate the collaborative work culture and the supportive team environment. The company encourages innovation and provides opportunities for personal and professional growth.","While overall I enjoy my time at my organization, there is always room for improvement. One area that could be better is the work-life balance. As a Quality Assurance Manager, the workload can be demanding at times, and it would be beneficial to have more measures in place to ensure a healthy work-life balance for employees.",1.0,4.0,4.0,4.0,4.0,0,Q1_collaboration & teamwork_0,0
1,"As the Product Manager of my organization, I really appreciate the collaborative work culture that exists here. The team is supportive, inclusive, and encourages open communication. We celebrate diversity and individuals are given the autonomy to contribute their unique skills and ideas.","In terms of improvements, I believe that my organization can further enhance employee well-being by focusing on work-life balance initiatives. While the company values innovation and productivity, it's equally important to provide avenues for relaxation and personal time to avoid burnout.",1.0,4.0,4.0,5.0,4.0,0,Q1_collaboration & teamwork_0,0
2,"I enjoy the collaborative environment at my organization, where team members work together to solve problems and support each other in achieving our goals. The company also provides opportunities for continuous learning and personal growth, which I value greatly.","As the Team Leader of Customer Support, one aspect that I think should be improved is the sense of isolation I sometimes feel. Due to the nature of my role, I often have to work independently and may not have as much interaction with other teams or departments. It would be beneficial to have more cross-team collaboration and communication to foster a stronger sense of belonging and connection within the company.",1.0,4.0,4.0,4.0,5.0,0,Q1_collaboration & teamwork_0,3
3,I appreciate the collaborative and innovative work environment at my organization. The teams here are always open to sharing ideas and working together to solve challenges.,"To enhance the workplace, I believe there could be more opportunities for skill development and career growth. Providing training programs and mentorship opportunities would greatly benefit employees and help them excel in their roles.",1.0,4.0,4.0,4.0,4.0,0,Q1_collaboration & teamwork_0,1
4,I like the collaborative and innovative work culture at my organization. We have a strong team spirit and everyone is always willing to support and help each other.,"In order to make the workplace better, I believe we can improve the communication channels between different teams and departments. This would facilitate better information sharing and coordination, leading to more efficient and effective work processes.",1.0,4.0,4.0,4.0,5.0,0,Q1_collaboration & teamwork_0,-1
...,...,...,...,...,...,...,...,...,...,...
8509,I appreciate the opportunities for professional growth and learning at my organization. The company provides various training programs and resources for employees to enhance their skills and knowledge.,"However, I feel excluded at times due to a lack of collaboration within the team and limited involvement in decision-making processes. It would be better if there were more inclusive communication channels and opportunities for all employees to contribute and participate in discussions.",1.0,,,,,1,Q1_career training & opportunities_1,3
8510,I appreciate the collaborative and supportive work culture at my organization. There is a strong sense of teamwork and everyone is encouraged to share their ideas and expertise.,I believe our workplace could benefit from more opportunities for professional growth and development. It would be great to have more training programs or mentorship initiatives to help employees enhance their skills and advance in their careers.,1.0,,,,,0,Q1_collaboration & teamwork_0,1
8511,"As a Customer Service Representative at my organization, I really appreciate the collaborative work culture. Everyone is encouraged to share their ideas and opinions, which fosters a sense of teamwork and innovation.","While my organization has a great work culture, I feel that there is room for improvement in terms of communication within the organization. Sometimes, important information or updates take longer than expected to reach everyone, which can lead to confusion and delays in our work processes.",1.0,,,,,0,Q1_collaboration & teamwork_0,2
8512,"I appreciate the collaborative and innovative work culture at my organization. The team members are highly skilled, and there are ample opportunities for learning and growth.","While my organization is a great workplace, there is room for improvement in terms of communication and transparency. It would be beneficial to have more open and transparent communication channels and processes to ensure everyone is well-informed about the organization's goals and decisions.",1.0,,,,,0,Q1_collaboration & teamwork_0,-1


### Q2

In [31]:
# Importing libraries
from bertopic.representation import KeyBERTInspired
from umap import UMAP
from hdbscan import HDBSCAN


representation_model_3 = KeyBERTInspired(nr_repr_docs=10)
umap_model_3 = UMAP(n_neighbors=25, n_components=60, metric='cosine', low_memory=False) # Decreasing n_components
hdbscan_model_3 = HDBSCAN(min_cluster_size=10, metric='euclidean', prediction_data=True) 


# 12 topics
topic_model_3 = BERTopic(language="english", calculate_probabilities=True, 
                       verbose=True, 
                       umap_model=umap_model_3, 
                       hdbscan_model=hdbscan_model_3, 
                       nr_topics = 15, 
                       representation_model=representation_model_3
                       )

topics, probs = topic_model_3.fit_transform(df['Q2'])


# Topic Model 1 
freq = topic_model_3.get_topic_info(); freq.head(50)

2024-03-12 08:03:25,318 - BERTopic - Embedding - Transforming documents to embeddings.


Batches:   0%|          | 0/267 [00:00<?, ?it/s]

2024-03-12 08:04:14,362 - BERTopic - Embedding - Completed ✓
2024-03-12 08:04:14,363 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-03-12 08:04:26,750 - BERTopic - Dimensionality - Completed ✓
2024-03-12 08:04:26,752 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-03-12 08:04:28,922 - BERTopic - Cluster - Completed ✓
2024-03-12 08:04:28,922 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-03-12 08:04:34,835 - BERTopic - Representation - Completed ✓
2024-03-12 08:04:34,835 - BERTopic - Topic reduction - Reducing number of topics
2024-03-12 08:04:36,852 - BERTopic - Topic reduction - Reduced number of topics from 70 to 15


Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,2576,-1_communication_collaboration_improvement_productivity,"[communication, collaboration, improvement, productivity, organization, coordination, improve, improved, workplace, transparency]","[While I feel liberated in my role at my organization, there is always room for improvement. One area that could be better is communication between different teams and departments. Sometimes, important information or updates can get lost or delayed, which can hinder the efficiency of the overall workflow. Enhancing communication channels and ensuring effective cross-team collaboration would greatly benefit the workplace., Although I feel secure about my time at my organization, I believe the..."
1,0,1882,0_worklife_workload_balance_workplace,"[worklife, workload, balance, workplace, organization, management, improvement, manage, maintain, employees]","[Although my organization is a great place to work, there is room for improvement in terms of work-life balance. Sometimes, the workload can be overwhelming, and it would be beneficial to have more flexible working hours or additional support to ensure a better balance between work and personal life., While my organization is overall a great workplace, I believe that there is room for improvement in terms of work-life balance. Sometimes, the workload can be overwhelming, and it would be bene..."
2,1,1666,1_advancement_improvement_improve_training,"[advancement, improvement, improve, training, opportunities, development, mentorship, beneficial, growth, careers]","[In terms of improvement, I believe there could be more opportunities for professional development and growth. It would be beneficial for the company to invest in training and workshops to further enhance the skills and knowledge of our employees., I believe that there should be more opportunities for professional development and growth. It would be beneficial for employees if the company could provide more training programs and resources to enhance their skills and knowledge., There could b..."
3,2,1329,2_communication_improvement_communicated_productivity,"[communication, improvement, communicated, productivity, collaboration, organization, improved, coordination, workplace, feedback]","[Although my organization is a great workplace overall, I believe there is room for improvement in terms of communication and transparency. Clearer communication channels and more frequent updates on company-wide initiatives would further enhance the work environment., Although my organization is a great place to work, there is room for improvement in terms of communication and transparency. Sometimes, important information or updates are not effectively communicated to all departments, lead..."
4,3,254,3_diversity_diverse_inclusive_initiatives,"[diversity, diverse, inclusive, initiatives, organization, opportunities, improvement, inclusion, collaboration, workplace]","[While my organization is an excellent workplace overall, I believe there is room for improvement in terms of diversity and inclusion. It would be great to see more efforts in promoting a diverse workforce and creating an inclusive environment where individuals from different backgrounds feel valued and included., However, I feel alienated in my time at my organization. I believe that there could be better efforts in fostering a more inclusive and supportive work environment. It would be ben..."
5,4,169,4_collaboration_innovation_teambuilding_productivity,"[collaboration, innovation, teambuilding, productivity, organization, teamwork, improvement, knowledge, workplace, colleagues]","[While the work environment is generally positive, I believe that there could be more opportunities for collaboration and knowledge sharing among different teams within the company., In terms of improvement, I believe there should be more opportunities for cross-functional collaboration and knowledge sharing among different teams. This would help foster innovation and enhance overall productivity in the workplace., I believe that there should be better communication and collaboration among d..."
6,5,166,5_interns_intern_mentorship_improvement,"[interns, intern, mentorship, improvement, training, opportunities, organization, enhance, beneficial, roles]","[I believe that there can be more opportunities for professional growth and development in my workplace. It would be better if my organization could provide more training programs or mentorship opportunities to help interns like me enhance our skills and advance in our careers., Although my organization provides a great work environment, it could be better if there were more opportunities for career growth and professional development. It would be beneficial to have structured programs or me..."
7,6,125,6_innovation_innovative_creativity_improvement,"[innovation, innovative, creativity, improvement, organization, workplace, diversity, benefit, approaches, ideas]","[I feel that the company's work culture can be quite narrow-minded. There is a lack of diversity in terms of ideas and perspectives, which hinders creativity and innovation. I believe that fostering a more inclusive and open-minded culture would greatly benefit the company., Although there are many positive aspects of my workplace, one area that could be better is the level of innovation and experimentation. As a tech company, it is essential for us to constantly explore new ideas and techno..."
8,7,75,7_mentorship_advancement_development_career,"[mentorship, advancement, development, career, junior, opportunities, training, skills, improvement, developers]","[In terms of improvement, I believe there should be more opportunities for growth and career development. As a Junior Accountant, I feel the need for more training and advancement opportunities to further enhance my skills and progress in my career., In terms of areas for improvement, I believe there should be more opportunities for growth and career development. As a junior analyst, I would like to have more mentorship and guidance to enhance my skills and contribute more effectively to the..."
9,8,75,8_workplace_teamwork_colleagues_organization,"[workplace, teamwork, colleagues, organization, collaborative, company, team, work, innovation, supportive]","[I feel empowered about my time at my organization. What do you like about your workplace? I appreciate the collaborative and innovative work culture at my organization. There is a strong emphasis on teamwork and knowledge sharing, which allows for continuous learning and growth. What about your workplace should be better? While the work culture at my organization is generally positive, I believe that there is still room for improvement in terms of work-life balance. As an intern, I someti..."


In [34]:
df['Q2_topics'] = topics

In [35]:
df

Unnamed: 0,Q1,Q2,Camaraderie,Credibility,Fairness,Respect,Pride in work,Q1_topics,Q1_topics_named,Q2_topics
0,"As the Quality Assurance Manager at my organization, I appreciate the collaborative work culture and the supportive team environment. The company encourages innovation and provides opportunities for personal and professional growth.","While overall I enjoy my time at my organization, there is always room for improvement. One area that could be better is the work-life balance. As a Quality Assurance Manager, the workload can be demanding at times, and it would be beneficial to have more measures in place to ensure a healthy work-life balance for employees.",1.0,4.0,4.0,4.0,4.0,0,Q1_topicname_collaboration & teamwork_0,0
1,"As the Product Manager of my organization, I really appreciate the collaborative work culture that exists here. The team is supportive, inclusive, and encourages open communication. We celebrate diversity and individuals are given the autonomy to contribute their unique skills and ideas.","In terms of improvements, I believe that my organization can further enhance employee well-being by focusing on work-life balance initiatives. While the company values innovation and productivity, it's equally important to provide avenues for relaxation and personal time to avoid burnout.",1.0,4.0,4.0,5.0,4.0,0,Q1_topicname_collaboration & teamwork_0,0
2,"I enjoy the collaborative environment at my organization, where team members work together to solve problems and support each other in achieving our goals. The company also provides opportunities for continuous learning and personal growth, which I value greatly.","As the Team Leader of Customer Support, one aspect that I think should be improved is the sense of isolation I sometimes feel. Due to the nature of my role, I often have to work independently and may not have as much interaction with other teams or departments. It would be beneficial to have more cross-team collaboration and communication to foster a stronger sense of belonging and connection within the company.",1.0,4.0,4.0,4.0,5.0,0,Q1_topicname_collaboration & teamwork_0,3
3,I appreciate the collaborative and innovative work environment at my organization. The teams here are always open to sharing ideas and working together to solve challenges.,"To enhance the workplace, I believe there could be more opportunities for skill development and career growth. Providing training programs and mentorship opportunities would greatly benefit employees and help them excel in their roles.",1.0,4.0,4.0,4.0,4.0,0,Q1_topicname_collaboration & teamwork_0,1
4,I like the collaborative and innovative work culture at my organization. We have a strong team spirit and everyone is always willing to support and help each other.,"In order to make the workplace better, I believe we can improve the communication channels between different teams and departments. This would facilitate better information sharing and coordination, leading to more efficient and effective work processes.",1.0,4.0,4.0,4.0,5.0,0,Q1_topicname_collaboration & teamwork_0,-1
...,...,...,...,...,...,...,...,...,...,...
8509,I appreciate the opportunities for professional growth and learning at my organization. The company provides various training programs and resources for employees to enhance their skills and knowledge.,"However, I feel excluded at times due to a lack of collaboration within the team and limited involvement in decision-making processes. It would be better if there were more inclusive communication channels and opportunities for all employees to contribute and participate in discussions.",1.0,,,,,1,Q1_topicname_career training & opportunities_1,3
8510,I appreciate the collaborative and supportive work culture at my organization. There is a strong sense of teamwork and everyone is encouraged to share their ideas and expertise.,I believe our workplace could benefit from more opportunities for professional growth and development. It would be great to have more training programs or mentorship initiatives to help employees enhance their skills and advance in their careers.,1.0,,,,,0,Q1_topicname_collaboration & teamwork_0,1
8511,"As a Customer Service Representative at my organization, I really appreciate the collaborative work culture. Everyone is encouraged to share their ideas and opinions, which fosters a sense of teamwork and innovation.","While my organization has a great work culture, I feel that there is room for improvement in terms of communication within the organization. Sometimes, important information or updates take longer than expected to reach everyone, which can lead to confusion and delays in our work processes.",1.0,,,,,0,Q1_topicname_collaboration & teamwork_0,2
8512,"I appreciate the collaborative and innovative work culture at my organization. The team members are highly skilled, and there are ample opportunities for learning and growth.","While my organization is a great workplace, there is room for improvement in terms of communication and transparency. It would be beneficial to have more open and transparent communication channels and processes to ensure everyone is well-informed about the organization's goals and decisions.",1.0,,,,,0,Q1_topicname_collaboration & teamwork_0,-1


In [33]:
freq[['Topic','Name']]

Unnamed: 0,Topic,Name
0,-1,-1_communication_collaboration_improvement_productivity
1,0,0_worklife_workload_balance_workplace
2,1,1_advancement_improvement_improve_training
3,2,2_communication_improvement_communicated_productivity
4,3,3_diversity_diverse_inclusive_initiatives
5,4,4_collaboration_innovation_teambuilding_productivity
6,5,5_interns_intern_mentorship_improvement
7,6,6_innovation_innovative_creativity_improvement
8,7,7_mentorship_advancement_development_career
9,8,8_workplace_teamwork_colleagues_organization


In [38]:
# Creating a new dictionary for the provided names with a 2-3 word summary for each topic
Q2_topic_labels = {
    "-1": "unclassified",
    "0": "worklife balance",
    "1": "advancement training",
    "2": "communication productivity",
    "3": "diversity inclusive",
    "4": "innovation teamwork",
    "5": "intern mentorship",
    "6": "innovative creativity",
    "7": "mentorship development",
    "8": "workplace teamwork",
    "9": "nan",
    "10": "productivity improvement",
    "11": "trust collaboration",
    "12": "compliance practices",
    "13": "finance accounting"
}

# Replace the numbers in the 'Q1_topics' column with the formatted strings
df['Q2_topics_named'] = df['Q2_topics'].apply(lambda x: f"Q2_{topic_summary[str(x)]}_{x}")


In [39]:
df

Unnamed: 0,Q1,Q2,Camaraderie,Credibility,Fairness,Respect,Pride in work,Q1_topics,Q1_topics_named,Q2_topics,Q2_topics_named
0,"As the Quality Assurance Manager at my organization, I appreciate the collaborative work culture and the supportive team environment. The company encourages innovation and provides opportunities for personal and professional growth.","While overall I enjoy my time at my organization, there is always room for improvement. One area that could be better is the work-life balance. As a Quality Assurance Manager, the workload can be demanding at times, and it would be beneficial to have more measures in place to ensure a healthy work-life balance for employees.",1.0,4.0,4.0,4.0,4.0,0,Q1_collaboration & teamwork_0,0,Q2_collaboration & teamwork_0
1,"As the Product Manager of my organization, I really appreciate the collaborative work culture that exists here. The team is supportive, inclusive, and encourages open communication. We celebrate diversity and individuals are given the autonomy to contribute their unique skills and ideas.","In terms of improvements, I believe that my organization can further enhance employee well-being by focusing on work-life balance initiatives. While the company values innovation and productivity, it's equally important to provide avenues for relaxation and personal time to avoid burnout.",1.0,4.0,4.0,5.0,4.0,0,Q1_collaboration & teamwork_0,0,Q2_collaboration & teamwork_0
2,"I enjoy the collaborative environment at my organization, where team members work together to solve problems and support each other in achieving our goals. The company also provides opportunities for continuous learning and personal growth, which I value greatly.","As the Team Leader of Customer Support, one aspect that I think should be improved is the sense of isolation I sometimes feel. Due to the nature of my role, I often have to work independently and may not have as much interaction with other teams or departments. It would be beneficial to have more cross-team collaboration and communication to foster a stronger sense of belonging and connection within the company.",1.0,4.0,4.0,4.0,5.0,0,Q1_collaboration & teamwork_0,3,Q2_growth & development_3
3,I appreciate the collaborative and innovative work environment at my organization. The teams here are always open to sharing ideas and working together to solve challenges.,"To enhance the workplace, I believe there could be more opportunities for skill development and career growth. Providing training programs and mentorship opportunities would greatly benefit employees and help them excel in their roles.",1.0,4.0,4.0,4.0,4.0,0,Q1_collaboration & teamwork_0,1,Q2_career training & opportunities_1
4,I like the collaborative and innovative work culture at my organization. We have a strong team spirit and everyone is always willing to support and help each other.,"In order to make the workplace better, I believe we can improve the communication channels between different teams and departments. This would facilitate better information sharing and coordination, leading to more efficient and effective work processes.",1.0,4.0,4.0,4.0,5.0,0,Q1_collaboration & teamwork_0,-1,Q2_Unclassified_-1
...,...,...,...,...,...,...,...,...,...,...,...
8509,I appreciate the opportunities for professional growth and learning at my organization. The company provides various training programs and resources for employees to enhance their skills and knowledge.,"However, I feel excluded at times due to a lack of collaboration within the team and limited involvement in decision-making processes. It would be better if there were more inclusive communication channels and opportunities for all employees to contribute and participate in discussions.",1.0,,,,,1,Q1_career training & opportunities_1,3,Q2_growth & development_3
8510,I appreciate the collaborative and supportive work culture at my organization. There is a strong sense of teamwork and everyone is encouraged to share their ideas and expertise.,I believe our workplace could benefit from more opportunities for professional growth and development. It would be great to have more training programs or mentorship initiatives to help employees enhance their skills and advance in their careers.,1.0,,,,,0,Q1_collaboration & teamwork_0,1,Q2_career training & opportunities_1
8511,"As a Customer Service Representative at my organization, I really appreciate the collaborative work culture. Everyone is encouraged to share their ideas and opinions, which fosters a sense of teamwork and innovation.","While my organization has a great work culture, I feel that there is room for improvement in terms of communication within the organization. Sometimes, important information or updates take longer than expected to reach everyone, which can lead to confusion and delays in our work processes.",1.0,,,,,0,Q1_collaboration & teamwork_0,2,Q2_ai innovation_2
8512,"I appreciate the collaborative and innovative work culture at my organization. The team members are highly skilled, and there are ample opportunities for learning and growth.","While my organization is a great workplace, there is room for improvement in terms of communication and transparency. It would be beneficial to have more open and transparent communication channels and processes to ensure everyone is well-informed about the organization's goals and decisions.",1.0,,,,,0,Q1_collaboration & teamwork_0,-1,Q2_Unclassified_-1


## Replacing NaN values and values outside of 1-5 with average in each column

In [40]:
import pandas as pd
import numpy as np

def process_camaraderie_column(df, column_name):
    # Replace values outside the 1-5 range with NaN
    df[column_name] = df[column_name].apply(lambda x: x if x in range(1, 6) else np.nan)
    
    # Calculate the mean of the column, excluding NaNs
    mean_value = df[column_name].mean()
    
    # Replace NaNs with the calculated mean
    df[column_name].fillna(mean_value, inplace=True)
    
    return df

In [43]:
df = process_camaraderie_column(df, 'Camaraderie')
df = process_camaraderie_column(df, 'Credibility')
df = process_camaraderie_column(df, 'Fairness')
df = process_camaraderie_column(df, 'Respect')
df = process_camaraderie_column(df, 'Pride in work')

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column_name].fillna(mean_value, inplace=True)


In [44]:
df

Unnamed: 0,Q1,Q2,Camaraderie,Credibility,Fairness,Respect,Pride in work,Q1_topics,Q1_topics_named,Q2_topics,Q2_topics_named
0,"As the Quality Assurance Manager at my organization, I appreciate the collaborative work culture and the supportive team environment. The company encourages innovation and provides opportunities for personal and professional growth.","While overall I enjoy my time at my organization, there is always room for improvement. One area that could be better is the work-life balance. As a Quality Assurance Manager, the workload can be demanding at times, and it would be beneficial to have more measures in place to ensure a healthy work-life balance for employees.",1.0,4.000000,4.000000,4.000000,4.000000,0,Q1_collaboration & teamwork_0,0,Q2_collaboration & teamwork_0
1,"As the Product Manager of my organization, I really appreciate the collaborative work culture that exists here. The team is supportive, inclusive, and encourages open communication. We celebrate diversity and individuals are given the autonomy to contribute their unique skills and ideas.","In terms of improvements, I believe that my organization can further enhance employee well-being by focusing on work-life balance initiatives. While the company values innovation and productivity, it's equally important to provide avenues for relaxation and personal time to avoid burnout.",1.0,4.000000,4.000000,5.000000,4.000000,0,Q1_collaboration & teamwork_0,0,Q2_collaboration & teamwork_0
2,"I enjoy the collaborative environment at my organization, where team members work together to solve problems and support each other in achieving our goals. The company also provides opportunities for continuous learning and personal growth, which I value greatly.","As the Team Leader of Customer Support, one aspect that I think should be improved is the sense of isolation I sometimes feel. Due to the nature of my role, I often have to work independently and may not have as much interaction with other teams or departments. It would be beneficial to have more cross-team collaboration and communication to foster a stronger sense of belonging and connection within the company.",1.0,4.000000,4.000000,4.000000,5.000000,0,Q1_collaboration & teamwork_0,3,Q2_growth & development_3
3,I appreciate the collaborative and innovative work environment at my organization. The teams here are always open to sharing ideas and working together to solve challenges.,"To enhance the workplace, I believe there could be more opportunities for skill development and career growth. Providing training programs and mentorship opportunities would greatly benefit employees and help them excel in their roles.",1.0,4.000000,4.000000,4.000000,4.000000,0,Q1_collaboration & teamwork_0,1,Q2_career training & opportunities_1
4,I like the collaborative and innovative work culture at my organization. We have a strong team spirit and everyone is always willing to support and help each other.,"In order to make the workplace better, I believe we can improve the communication channels between different teams and departments. This would facilitate better information sharing and coordination, leading to more efficient and effective work processes.",1.0,4.000000,4.000000,4.000000,5.000000,0,Q1_collaboration & teamwork_0,-1,Q2_Unclassified_-1
...,...,...,...,...,...,...,...,...,...,...,...
8509,I appreciate the opportunities for professional growth and learning at my organization. The company provides various training programs and resources for employees to enhance their skills and knowledge.,"However, I feel excluded at times due to a lack of collaboration within the team and limited involvement in decision-making processes. It would be better if there were more inclusive communication channels and opportunities for all employees to contribute and participate in discussions.",1.0,3.966651,3.554106,3.912199,4.293099,1,Q1_career training & opportunities_1,3,Q2_growth & development_3
8510,I appreciate the collaborative and supportive work culture at my organization. There is a strong sense of teamwork and everyone is encouraged to share their ideas and expertise.,I believe our workplace could benefit from more opportunities for professional growth and development. It would be great to have more training programs or mentorship initiatives to help employees enhance their skills and advance in their careers.,1.0,3.966651,3.554106,3.912199,4.293099,0,Q1_collaboration & teamwork_0,1,Q2_career training & opportunities_1
8511,"As a Customer Service Representative at my organization, I really appreciate the collaborative work culture. Everyone is encouraged to share their ideas and opinions, which fosters a sense of teamwork and innovation.","While my organization has a great work culture, I feel that there is room for improvement in terms of communication within the organization. Sometimes, important information or updates take longer than expected to reach everyone, which can lead to confusion and delays in our work processes.",1.0,3.966651,3.554106,3.912199,4.293099,0,Q1_collaboration & teamwork_0,2,Q2_ai innovation_2
8512,"I appreciate the collaborative and innovative work culture at my organization. The team members are highly skilled, and there are ample opportunities for learning and growth.","While my organization is a great workplace, there is room for improvement in terms of communication and transparency. It would be beneficial to have more open and transparent communication channels and processes to ensure everyone is well-informed about the organization's goals and decisions.",1.0,3.966651,3.554106,3.912199,4.293099,0,Q1_collaboration & teamwork_0,-1,Q2_Unclassified_-1


In [46]:
# Rounding all the values 
columns_to_round = ['Camaraderie', 'Credibility', 'Fairness', 'Respect', 'Pride in work']

for column in columns_to_round:
    df[column] = df[column].round().astype(int)

In [47]:
df

Unnamed: 0,Q1,Q2,Camaraderie,Credibility,Fairness,Respect,Pride in work,Q1_topics,Q1_topics_named,Q2_topics,Q2_topics_named
0,"As the Quality Assurance Manager at my organization, I appreciate the collaborative work culture and the supportive team environment. The company encourages innovation and provides opportunities for personal and professional growth.","While overall I enjoy my time at my organization, there is always room for improvement. One area that could be better is the work-life balance. As a Quality Assurance Manager, the workload can be demanding at times, and it would be beneficial to have more measures in place to ensure a healthy work-life balance for employees.",1,4,4,4,4,0,Q1_collaboration & teamwork_0,0,Q2_collaboration & teamwork_0
1,"As the Product Manager of my organization, I really appreciate the collaborative work culture that exists here. The team is supportive, inclusive, and encourages open communication. We celebrate diversity and individuals are given the autonomy to contribute their unique skills and ideas.","In terms of improvements, I believe that my organization can further enhance employee well-being by focusing on work-life balance initiatives. While the company values innovation and productivity, it's equally important to provide avenues for relaxation and personal time to avoid burnout.",1,4,4,5,4,0,Q1_collaboration & teamwork_0,0,Q2_collaboration & teamwork_0
2,"I enjoy the collaborative environment at my organization, where team members work together to solve problems and support each other in achieving our goals. The company also provides opportunities for continuous learning and personal growth, which I value greatly.","As the Team Leader of Customer Support, one aspect that I think should be improved is the sense of isolation I sometimes feel. Due to the nature of my role, I often have to work independently and may not have as much interaction with other teams or departments. It would be beneficial to have more cross-team collaboration and communication to foster a stronger sense of belonging and connection within the company.",1,4,4,4,5,0,Q1_collaboration & teamwork_0,3,Q2_growth & development_3
3,I appreciate the collaborative and innovative work environment at my organization. The teams here are always open to sharing ideas and working together to solve challenges.,"To enhance the workplace, I believe there could be more opportunities for skill development and career growth. Providing training programs and mentorship opportunities would greatly benefit employees and help them excel in their roles.",1,4,4,4,4,0,Q1_collaboration & teamwork_0,1,Q2_career training & opportunities_1
4,I like the collaborative and innovative work culture at my organization. We have a strong team spirit and everyone is always willing to support and help each other.,"In order to make the workplace better, I believe we can improve the communication channels between different teams and departments. This would facilitate better information sharing and coordination, leading to more efficient and effective work processes.",1,4,4,4,5,0,Q1_collaboration & teamwork_0,-1,Q2_Unclassified_-1
...,...,...,...,...,...,...,...,...,...,...,...
8509,I appreciate the opportunities for professional growth and learning at my organization. The company provides various training programs and resources for employees to enhance their skills and knowledge.,"However, I feel excluded at times due to a lack of collaboration within the team and limited involvement in decision-making processes. It would be better if there were more inclusive communication channels and opportunities for all employees to contribute and participate in discussions.",1,4,4,4,4,1,Q1_career training & opportunities_1,3,Q2_growth & development_3
8510,I appreciate the collaborative and supportive work culture at my organization. There is a strong sense of teamwork and everyone is encouraged to share their ideas and expertise.,I believe our workplace could benefit from more opportunities for professional growth and development. It would be great to have more training programs or mentorship initiatives to help employees enhance their skills and advance in their careers.,1,4,4,4,4,0,Q1_collaboration & teamwork_0,1,Q2_career training & opportunities_1
8511,"As a Customer Service Representative at my organization, I really appreciate the collaborative work culture. Everyone is encouraged to share their ideas and opinions, which fosters a sense of teamwork and innovation.","While my organization has a great work culture, I feel that there is room for improvement in terms of communication within the organization. Sometimes, important information or updates take longer than expected to reach everyone, which can lead to confusion and delays in our work processes.",1,4,4,4,4,0,Q1_collaboration & teamwork_0,2,Q2_ai innovation_2
8512,"I appreciate the collaborative and innovative work culture at my organization. The team members are highly skilled, and there are ample opportunities for learning and growth.","While my organization is a great workplace, there is room for improvement in terms of communication and transparency. It would be beneficial to have more open and transparent communication channels and processes to ensure everyone is well-informed about the organization's goals and decisions.",1,4,4,4,4,0,Q1_collaboration & teamwork_0,-1,Q2_Unclassified_-1


## Creating the new dataframe of aggregated values for each topic

The goal here is to understand how topics of interest match up against the 5 aggregated metrics.

I will:

    1. Concatenate the unique topics from Q1_topics_named and Q2_topics_named.
    2. Group by the topic name.
    3. Calculate the mean for each of the aspects (Camaraderie, Credibility, Fairness, Respect, Pride in work).

In [54]:
def aggregate_mean_values_with_count(df):
    # Initialize an empty DataFrame for the aggregated values
    aggregated_df = pd.DataFrame()

    # Combine both topic columns into one Series to process all at once
    all_topics = pd.concat([df['Q1_topics_named'], df['Q2_topics_named']]).unique()

    # Loop through each unique topic
    for topic in all_topics:
        # Filter rows for the current topic from both Q1 and Q2
        topic_rows_q1 = df[df['Q1_topics_named'] == topic]
        topic_rows_q2 = df[df['Q2_topics_named'] == topic]
        topic_rows = pd.concat([topic_rows_q1, topic_rows_q2])

        # Calculating mean values for the filtered rows
        mean_values = topic_rows[['Camaraderie', 'Credibility', 'Fairness', 'Respect', 'Pride in work']].mean()

        # Counting the occurrences of the current topic
        topic_count = len(topic_rows)

        # Creating a DataFrame for the current topic's mean values and count
        current_topic_df = pd.DataFrame([mean_values])
        current_topic_df['Topic'] = topic
        current_topic_df['Count'] = topic_count

        # Appending the current topic's DataFrame to the aggregated DataFrame
        aggregated_df = pd.concat([aggregated_df, current_topic_df], ignore_index=True)

    # Reordering columns to have 'Topic' and 'Count' as the first columns
    cols = ['Topic', 'Count'] + [col for col in aggregated_df.columns if col not in ['Topic', 'Count']]
    aggregated_df = aggregated_df[cols]

    # Sorting the DataFrame by 'Count' in descending order
    aggregated_df = aggregated_df.sort_values(by='Count', ascending=False).reset_index(drop=True)

    return aggregated_df

In [55]:
# Apply function
aggregated_values_df = aggregate_mean_values_with_count(df)

In [56]:
aggregated_values_df

Unnamed: 0,Topic,Count,Camaraderie,Credibility,Fairness,Respect,Pride in work
0,Q1_collaboration & teamwork_0,4629,1.043854,3.998056,3.818751,3.989631,4.13221
1,Q2_Unclassified_-1,2576,1.037267,3.965839,3.786879,3.927795,4.175078
2,Q1_Unclassified_-1,1926,1.032191,3.981828,3.771547,3.941848,4.15784
3,Q2_collaboration & teamwork_0,1882,1.03932,4.002657,3.781084,3.986716,4.168438
4,Q2_career training & opportunities_1,1666,1.052821,3.997599,3.7497,3.987395,4.067827
5,Q2_ai innovation_2,1329,1.031603,3.980436,3.82769,3.96614,4.173062
6,Q1_career training & opportunities_1,865,1.023121,3.954913,3.685549,3.871676,4.175723
7,Q1_ai innovation_2,301,1.046512,3.953488,3.694352,3.890365,4.252492
8,Q2_growth & development_3,254,1.043307,3.976378,3.728346,3.834646,4.161417
9,Q1_growth & development_3,170,1.111765,3.894118,3.694118,3.805882,4.047059


### Exporting

In [57]:
# Export the df to csv
aggregated_values_df.to_csv('data/topics_and_aggregated_metrics.csv', index=False)


In [8]:
pd.set_option('display.max_colwidth', 400)