In [None]:
import pandas as pd
import webbrowser
from bertopic import BERTopic

# Load the sentiment dataset
df = pd.read_csv("C:\\Users\\KIIT\\Documents\\StockOracle-AI-Powered-Stock-Prediction-and-Forecasting-System\\data\\refined_textual_data.csv")  # Ensure the file is in the same directory

# Convert 'date' column to datetime format
df["date"] = pd.to_datetime(df["date"])

# Extract the processed text for topic modeling
documents = df["processed"].dropna().tolist()  # Remove NaN values if any

# Initialize BERTopic model
topic_model = BERTopic(verbose=True)

# Fit and transform the data
topics, probs = topic_model.fit_transform(documents)

# Display topic representation
topic_info = topic_model.get_topic_info()
print(topic_info.head())

# Generate visualizations
fig_cluster = topic_model.visualize_topics()
fig_barchart = topic_model.visualize_barchart(top_n_topics=10)

# Generate Topics Over Time plot
topics_over_time = topic_model.topics_over_time(documents, df["date"], nr_bins=20)
fig_time = topic_model.visualize_topics_over_time(topics_over_time)




2025-05-06 02:16:10,152 - BERTopic - Embedding - Transforming documents to embeddings.


Batches:   0%|          | 0/393 [00:00<?, ?it/s]

2025-05-06 02:18:50,126 - BERTopic - Embedding - Completed ✓
2025-05-06 02:18:50,128 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2025-05-06 02:20:19,141 - BERTopic - Dimensionality - Completed ✓
2025-05-06 02:20:19,144 - BERTopic - Cluster - Start clustering the reduced embeddings
2025-05-06 02:20:20,203 - BERTopic - Cluster - Completed ✓
2025-05-06 02:20:20,234 - BERTopic - Representation - Extracting topics from clusters using representation models.
2025-05-06 02:20:21,030 - BERTopic - Representation - Completed ✓


   Topic  Count                              Name  \
0     -1    235  -1_nvda_confident_closed_feeling   
1      0    232  0_googl_optimistic_higher_strong   
2      1    224  1_nvda_disappointed_trading_amid   
3      2    218   2_nflx_disappointed_to_dropping   
4      3    214   3_csco_optimistic_closed_strong   

                                      Representation  \
0  [nvda, confident, closed, feeling, higher, sur...   
1  [googl, optimistic, higher, strong, surging, f...   
2  [nvda, disappointed, trading, amid, are, heavy...   
3  [nflx, disappointed, to, dropping, heavy, toda...   
4  [csco, optimistic, closed, strong, surging, fe...   

                                 Representative_Docs  
0  [strong day for nvda! closed higher at 139.90,...  
1  [strong day for googl! closed higher at 119.25...  
2  [nvda struggled today, dropping to 26.48. inve...  
3  [nflx struggled today, dropping to 503.06. inv...  
4  [strong day for csco! closed higher at 48.15, ...  


20it [00:09,  2.00it/s]


In [None]:
# Save the visualization
with open("C:\\Users\\KIIT\\Documents\\StockOracle-AI-Powered-Stock-Prediction-and-Forecasting-System\\visualizations\\Topic_Visualization.html", "w", encoding="utf-8") as f:
    f.write("<h1>Topic Clusters</h1>")
    f.write(fig_cluster.to_html(full_html=False, include_plotlyjs='cdn'))
    
    f.write("<h1>Topic Bar Chart</h1>")
    f.write(fig_barchart.to_html(full_html=False, include_plotlyjs='cdn'))
    
    f.write("<h1>Topics Over Time</h1>")
    f.write(fig_time.to_html(full_html=False, include_plotlyjs='cdn'))

print("Completed! Visualization saved as 'Topic_Visualization.html'")

# (Optional) Open the saved file directly
webbrowser.open("C:\\Users\\KIIT\\Documents\\StockOracle-AI-Powered-Stock-Prediction-and-Forecasting-System\\visualizations\\Topic_Visualization.html")

Completed! Visualization saved as 'Topic_Visualization.html'


True