In [None]:
# Import modules from external Python files
from data_preprocessing import load_and_preprocess_data
from visualizations import plot_daily_publication, plot_hourly_publication, plot_heatmap
from sentiment_analysis import calculate_textblob_sentiment, calculate_vader_sentiment
from publisher_analysis import get_top_publishers, plot_top_publishers
from topic_modeling import perform_topic_modeling


In [None]:
# Define the file path for the dataset
file_path = r"D:\Kifya_training\Week 1\Technical  Content\Data\raw_analyst_ratings.csv"


In [None]:
# Load and preprocess the dataset
data = load_and_preprocess_data(file_path)

# Display the first few rows to check the preprocessing
data.head()


In [None]:
# Calculate daily publication frequency
daily_publication = data.groupby('date_only').size()

# Plot the daily publication frequency
plot_daily_publication(daily_publication)


In [None]:
# Calculate hourly publication frequency
hourly_publication = data.groupby('hour').size()

# Plot the hourly publication frequency
plot_hourly_publication(hourly_publication)


In [None]:
# Plot a heatmap to visualize frequency of data points by day and hour
plot_heatmap(data)


In [None]:
# Perform sentiment analysis using TextBlob
data = calculate_textblob_sentiment(data, 'headline')

# Display sentiment analysis results
data[['headline', 'textblob_sentiment']].head()


In [None]:
# Perform sentiment analysis using VADER
data = calculate_vader_sentiment(data, 'headline')

# Display sentiment analysis results
data[['headline', 'vader_sentiment']].head()


In [None]:
# Get the top publishers from the data
top_publishers = get_top_publishers(data, 'publisher')

# Plot the top publishers
plot_top_publishers(top_publishers)


In [None]:
# Perform topic modeling using NMF
topics = perform_topic_modeling(data, 'headline', n_topics=5)

# Display the topics and their associated words
for i, topic in enumerate(topics):
    print(f"Topic {i+1}: {', '.join(topic)}")
