In [18]:
import pandas as pd
from bertopic import BERTopic

# Load your model and data
topic_model = BERTopic.load("MODEL_REDUCED_OUTLIERS")
df = pd.read_csv("FINAL_REDUCED_DATA.csv")

# IMPORTANT: Make sure 'original_documents' is a list of your texts.
# For example, if your text is in a column named 'text_content':
original_documents = df['clean_text'].tolist() # <-- Adjust 'text_content' to your column name

# Get basic topic info
topic_info_df = topic_model.get_topic_info()

# --- FIX IS HERE ---
# 1. Get the dictionary of *INDICES* for representative docs. No arguments needed.
rep_docs_indices_dict = topic_model.get_representative_docs()

# Prepare lists to hold the new data
top_keywords_list = []
ctidf_scores_list = []
rep_docs_list = []

# --- LOOP LOGIC IS UPDATED BELOW ---
for topic_id in topic_info_df["Topic"]:
    if topic_id == -1:
        top_keywords_list.append("N/A (Outliers)")
        ctidf_scores_list.append("N/A")
        rep_docs_list.append("N/A")
        continue

    # Get keywords and scores (this part is unchanged)
    words_scores = topic_model.get_topic(topic_id)
    if words_scores:
        top_words = [word for word, score in words_scores[:10]]
        top_scores = [f"{score:.4f}" for _, score in words_scores[:10]]
        top_keywords_list.append(", ".join(top_words))
        ctidf_scores_list.append(", ".join(top_scores))
    else:
        top_keywords_list.append("No words found")
        ctidf_scores_list.append("")

    # 2. Use the indices to get the representative docs from your original list
    if topic_id in rep_docs_indices_dict:
        # Get the index of the top representative document
        top_doc_index = rep_docs_indices_dict[topic_id][0]
        # Use the index to retrieve the actual document text
        rep_docs_list.append(original_documents[top_doc_index])
    else:
        rep_docs_list.append("None available")

# Add the new lists as columns to the DataFrame
topic_info_df['Top_10_Keywords'] = top_keywords_list
topic_info_df['CTIDF_Scores'] = ctidf_scores_list
topic_info_df['Representative_Doc'] = rep_docs_list

# Select and reorder columns
final_summary_df = topic_info_df[[
    'Topic', 'Count', 'Top_10_Keywords', 'CTIDF_Scores', 'Representative_Doc'
]].copy()

# Display the final result
final_summary_df

Unnamed: 0,Topic,Count,Top_10_Keywords,CTIDF_Scores,Representative_Doc
0,0,54046,"dentist, dental, teeth, tooth, my, and, the, o...","0.0124, 0.0115, 0.0086, 0.0064, 0.0062, 0.0060...",None available
1,1,9419,"pharmacy, prescription, prescriptions, pharmac...","0.0257, 0.0194, 0.0113, 0.0090, 0.0090, 0.0087...",None available
2,2,8735,"insurance, bill, billing, pay, they, paid, not...","0.0146, 0.0134, 0.0115, 0.0089, 0.0081, 0.0070...",None available
3,3,4191,"covid, test, testing, results, tested, rapid, ...","0.0439, 0.0264, 0.0150, 0.0119, 0.0107, 0.0078...",None available
4,4,7180,"staff, friendly, very, helpful, professional, ...","0.0502, 0.0353, 0.0214, 0.0171, 0.0171, 0.0147...",None available
...,...,...,...,...,...
158,158,418,"atmosphere, environment, great, friendly, nice...","0.2002, 0.0387, 0.0345, 0.0332, 0.0233, 0.0201...",None available
159,159,606,"addiction, life, program, recovery, detox, reh...","0.0157, 0.0116, 0.0113, 0.0105, 0.0104, 0.0104...",None available
160,160,654,"shoulder, exercises, therapy, physical, injury...","0.0355, 0.0200, 0.0174, 0.0135, 0.0123, 0.0114...",None available
161,161,555,"clean, prices, good, expensive, nice, selectio...","0.1473, 0.0615, 0.0286, 0.0247, 0.0232, 0.0229...",None available


In [19]:
#delete representative doc column
final_summary_df = final_summary_df.drop(columns = ['Representative_Doc'])
final_summary_df

Unnamed: 0,Topic,Count,Top_10_Keywords,CTIDF_Scores
0,0,54046,"dentist, dental, teeth, tooth, my, and, the, o...","0.0124, 0.0115, 0.0086, 0.0064, 0.0062, 0.0060..."
1,1,9419,"pharmacy, prescription, prescriptions, pharmac...","0.0257, 0.0194, 0.0113, 0.0090, 0.0090, 0.0087..."
2,2,8735,"insurance, bill, billing, pay, they, paid, not...","0.0146, 0.0134, 0.0115, 0.0089, 0.0081, 0.0070..."
3,3,4191,"covid, test, testing, results, tested, rapid, ...","0.0439, 0.0264, 0.0150, 0.0119, 0.0107, 0.0078..."
4,4,7180,"staff, friendly, very, helpful, professional, ...","0.0502, 0.0353, 0.0214, 0.0171, 0.0171, 0.0147..."
...,...,...,...,...
158,158,418,"atmosphere, environment, great, friendly, nice...","0.2002, 0.0387, 0.0345, 0.0332, 0.0233, 0.0201..."
159,159,606,"addiction, life, program, recovery, detox, reh...","0.0157, 0.0116, 0.0113, 0.0105, 0.0104, 0.0104..."
160,160,654,"shoulder, exercises, therapy, physical, injury...","0.0355, 0.0200, 0.0174, 0.0135, 0.0123, 0.0114..."
161,161,555,"clean, prices, good, expensive, nice, selectio...","0.1473, 0.0615, 0.0286, 0.0247, 0.0232, 0.0229..."


In [20]:
final_summary_df.to_csv("INSPECTED_TOPIC.csv", index = False)

## ** GABUNGIN SENTIMENT + TOPIC DATA BUAT VISUALIZATION**

In [3]:
import pandas as pd

df = pd.read_csv("C:/Users/eliza/Documents/Semester V/Sentiment Analysis/CITY/MODEL BUILDING/Interface/FINAL_REDUCED_DATA.csv")
df

Unnamed: 0,user_id,username,time,rating,original_text,translated_text,final_with_stopword,final_no_stopword,ori_wc,final_wc,...,category_16,category_17,category_18,category_19,category_20,category_21,cont_expanded,clean_text,wc,TOPIC_ID
0,1.140000e+20,Sajida Dar,1.583980e+12,5.0,"Great people,great staff and great facility to...","Great people,great staff and great facility to...",great people great staff and great facility to...,great people great staff great facility go thu...,9,10,...,,,,,,,"Great people,great staff and great facility to...",great peoplegreat staff and great facility to go,8,46
1,1.030000e+20,Tamara Walker,1.604380e+12,2.0,Security guard asked me to put a mask on my 13...,Security guard asked me to put a mask on my 13...,security guard ask me to put a mask on my 13 m...,security guard ask put mask 13 month old baby ...,65,68,...,,,,,,,Security guard asked me to put a mask on my 13...,security guard asked me to put a mask on my mo...,66,31
2,1.150000e+20,Christina Veres,1.560490e+12,4.0,One of the better Cub foods that I have shoppe...,One of the better Cub foods that I have shoppe...,one of the good cub food that i have shop at e...,one good cub food shop everything need right e...,39,39,...,,,,,,,One of the better Cub foods that I have shoppe...,one of the better cub foods that i have shoppe...,39,16
3,1.110000e+20,Keegan Leahy,1.581380e+12,1.0,Front of house has amazing staff.. Owner howev...,Front of house has amazing staff.. Owner howev...,front of house have amazing staff owner howeve...,front house amazing staff owner however lot gr...,51,51,...,,,,,,,Front of house has amazing staff.. Owner howev...,front of house has amazing staff owner however...,51,7
4,1.110000e+20,Kyle Ebert,1.592440e+12,2.0,Came in early\nNo one was there except one per...,Came in early\nNo one was there except one per...,come in early no one be there except one perso...,come early no one except one person need simpl...,60,62,...,,,,,,,Came in early\nNo one was there except one per...,came in early no one was there except one pers...,62,21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305846,1.020000e+20,Kevin Portee,1.577890e+12,5.0,I feel like a new person on every visit,I feel like a new person on every visit,i feel like a new person on every visit,feel like new person every visit,9,9,...,,,,,,,I feel like a new person on every visit,i feel like a new person on every visit,9,100
305847,1.140000e+20,James Rose,1.522870e+12,5.0,The aqua therapy I have been receiving there h...,The aqua therapy I have been receiving there h...,the aqua therapy i have be receive there have ...,aqua therapy receive helpful,12,12,...,,,,,,,The aqua therapy I have been receiving there h...,the aqua therapy i have been receiving there h...,12,71
305848,1.020000e+20,Maxwell Rosa,1.540950e+12,5.0,Had baby #1 last year in June. Staff was amaz...,Had baby #1 last year in June. Staff was amaz...,have baby 1 last year in june staff be amaze s...,baby 1 last year june staff amaze shoutout mid...,30,30,...,,,,,,,Had baby #1 last year in June. Staff was amaz...,had baby last year in june staff was amazing s...,28,48
305849,1.030000e+20,Yolonda Taylor,1.536770e+12,1.0,I gave the staff my authorization number for m...,I gave the staff my authorization number for m...,i give the staff my authorization number for m...,give staff authorization number botox injectio...,116,119,...,,,,,,,I gave the staff my authorization number for m...,i gave the staff my authorization number for m...,115,2


In [2]:
ti = pd.read_csv("C:/Users/eliza/Documents/Semester V/Sentiment Analysis/CITY/MODEL BUILDING/Interface/INSPECTED_TOPIC.csv")
ti

Unnamed: 0,topic_id,name,general_topic,count,top10keywords,ctfidf_score
0,0,Dental Service Experience,General & Specialized Medical Care,54046,"dentist, dental, teeth, tooth, my, and, the, o...","0.0124, 0.0115, 0.0086, 0.0064, 0.0062, 0.0060..."
1,1,Medicine Prescription Filling (Timeliness & Re...,Pharmacy & Medication,9419,"pharmacy, prescription, prescriptions, pharmac...","0.0257, 0.0194, 0.0113, 0.0090, 0.0090, 0.0087..."
2,2,Treatment Insurance & Billing Procedure (Insur...,Insurance & Billing,8735,"insurance, bill, billing, pay, they, paid, not...","0.0146, 0.0134, 0.0115, 0.0089, 0.0081, 0.0070..."
3,3,"Covid Vaccination, Safety Protocols, Test, Pr...",Public Health & Safety,4191,"covid, test, testing, results, tested, rapid, ...","0.0439, 0.0264, 0.0150, 0.0119, 0.0107, 0.0078..."
4,4,"Attitude, Competency & Knowledge, Efficiency",Staff & Service Quality,7180,"staff, friendly, very, helpful, professional, ...","0.0502, 0.0353, 0.0214, 0.0171, 0.0171, 0.0147..."
...,...,...,...,...,...,...
158,158,Atmosphere & Environment,Atmosphere & Environment,418,"atmosphere, environment, great, friendly, nice...","0.2002, 0.0387, 0.0345, 0.0332, 0.0233, 0.0201..."
159,159,Rehabilitation Treatment,"Wellness, Fitness & Alternative Therapies",606,"addiction, life, program, recovery, detox, reh...","0.0157, 0.0116, 0.0113, 0.0105, 0.0104, 0.0104..."
160,160,"Shoulder, Neck & Back Treatment",General & Specialized Medical Care,654,"shoulder, exercises, therapy, physical, injury...","0.0355, 0.0200, 0.0174, 0.0135, 0.0123, 0.0114..."
161,161,Price and Item Selection,Shopping Experience,555,"clean, prices, good, expensive, nice, selectio...","0.1473, 0.0615, 0.0286, 0.0247, 0.0232, 0.0229..."


In [4]:
# Assuming your topic info DataFrame is named 'ti' and your main data is 'df'

# Select only the columns you need from the topic info DataFrame
topic_info_to_merge = ti[['topic_id', 'name', 'general_topic']]

# Perform a left merge
# This keeps every row from your original 'df'
# and adds the 'name' and 'general_topic' where the IDs match.
df_merged = pd.merge(
    df,
    topic_info_to_merge,
    left_on='TOPIC_ID',    # Column from the left DataFrame (df)
    right_on='topic_id',   # Column from the right DataFrame (ti)
    how='left'             # Type of merge
)

# The result 'df_merged' will now have the new 'name' and 'general_topic' columns.
# You might have a redundant 'topic_id' column which you can drop if you want:
df_merged = df_merged.drop(columns=['topic_id'])

# Display the first few rows of the new merged DataFrame
df_merged

Unnamed: 0,user_id,username,time,rating,original_text,translated_text,final_with_stopword,final_no_stopword,ori_wc,final_wc,...,category_18,category_19,category_20,category_21,cont_expanded,clean_text,wc,TOPIC_ID,name,general_topic
0,1.140000e+20,Sajida Dar,1.583980e+12,5.0,"Great people,great staff and great facility to...","Great people,great staff and great facility to...",great people great staff and great facility to...,great people great staff great facility go thu...,9,10,...,,,,,"Great people,great staff and great facility to...",great peoplegreat staff and great facility to go,8,46,Place & Facility,Physical Environment & Cleanliness
1,1.030000e+20,Tamara Walker,1.604380e+12,2.0,Security guard asked me to put a mask on my 13...,Security guard asked me to put a mask on my 13...,security guard ask me to put a mask on my 13 m...,security guard ask put mask 13 month old baby ...,65,68,...,,,,,Security guard asked me to put a mask on my 13...,security guard asked me to put a mask on my mo...,66,31,Mask Usage & Policies,Public Health & Safety
2,1.150000e+20,Christina Veres,1.560490e+12,4.0,One of the better Cub foods that I have shoppe...,One of the better Cub foods that I have shoppe...,one of the good cub food that i have shop at e...,one good cub food shop everything need right e...,39,39,...,,,,,One of the better Cub foods that I have shoppe...,one of the better cub foods that i have shoppe...,39,16,Price and Item Selection,Shopping Experience
3,1.110000e+20,Keegan Leahy,1.581380e+12,1.0,Front of house has amazing staff.. Owner howev...,Front of house has amazing staff.. Owner howev...,front of house have amazing staff owner howeve...,front house amazing staff owner however lot gr...,51,51,...,,,,,Front of house has amazing staff.. Owner howev...,front of house has amazing staff owner however...,51,7,"Attitude, Competency & Knowledge, Efficiency",Staff & Service Quality
4,1.110000e+20,Kyle Ebert,1.592440e+12,2.0,Came in early\nNo one was there except one per...,Came in early\nNo one was there except one per...,come in early no one be there except one perso...,come early no one except one person need simpl...,60,62,...,,,,,Came in early\nNo one was there except one per...,came in early no one was there except one pers...,62,21,Test and Screening,"Diagnostic, Imaging & Lab Services"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305846,1.020000e+20,Kevin Portee,1.577890e+12,5.0,I feel like a new person on every visit,I feel like a new person on every visit,i feel like a new person on every visit,feel like new person every visit,9,9,...,,,,,I feel like a new person on every visit,i feel like a new person on every visit,9,100,Enjoyable,Overall Facility Experience
305847,1.140000e+20,James Rose,1.522870e+12,5.0,The aqua therapy I have been receiving there h...,The aqua therapy I have been receiving there h...,the aqua therapy i have be receive there have ...,aqua therapy receive helpful,12,12,...,,,,,The aqua therapy I have been receiving there h...,the aqua therapy i have been receiving there h...,12,71,Accupunture Needling Experience,"Wellness, Fitness & Alternative Therapies"
305848,1.020000e+20,Maxwell Rosa,1.540950e+12,5.0,Had baby #1 last year in June. Staff was amaz...,Had baby #1 last year in June. Staff was amaz...,have baby 1 last year in june staff be amaze s...,baby 1 last year june staff amaze shoutout mid...,30,30,...,,,,,Had baby #1 last year in June. Staff was amaz...,had baby last year in june staff was amazing s...,28,48,Pregnancy & Delivery Experience,General & Specialized Medical Care
305849,1.030000e+20,Yolonda Taylor,1.536770e+12,1.0,I gave the staff my authorization number for m...,I gave the staff my authorization number for m...,i give the staff my authorization number for m...,give staff authorization number botox injectio...,116,119,...,,,,,I gave the staff my authorization number for m...,i gave the staff my authorization number for m...,115,2,Treatment Insurance & Billing Procedure (Insur...,Insurance & Billing


In [5]:
# Define the full path where you want to save the file
output_path = r"C:\Users\eliza\Documents\Semester V\Sentiment Analysis\CITY\MODEL BUILDING\Interface\BIZ_VIZ.csv"

# Save the merged DataFrame to the specified path
# index=False prevents pandas from writing the DataFrame index as a column
df_merged.to_csv(output_path, index=False)

print(f"File successfully saved to: {output_path}")

File successfully saved to: C:\Users\eliza\Documents\Semester V\Sentiment Analysis\CITY\MODEL BUILDING\Interface\BIZ_VIZ.csv
