In [None]:
# Import necessary libraries
import sys
import os
import pandas as pd

# Adjust the path to import from the src directory
src_path = os.path.abspath('src')
sys.path.insert(0, src_path)

# Import the DescriptiveStats class
from descriptive_stats import DescriptiveStats

# Define the paths to your datasets
analyst_data_path = 'data/raw_analyst_ratings.csv/raw_analyst_ratings.csv'
yfinance_data_folder = 'data/yfinance_data'

# Load the raw analyst ratings dataset
df_analyst = pd.read_csv(analyst_data_path)

# Initialize the DescriptiveStats object for the analyst data
stats_analyst = DescriptiveStats(df_analyst)

# Display summary statistics for the analyst dataset
print("Summary Statistics for Analyst Data:")
display(stats_analyst.get_summary_statistics())

# Analyze textual lengths (assuming there's a 'headline' column)
print("\nTextual Length Analysis for Analyst Data:")
display(stats_analyst.analyze_textual_lengths('headline'))

# Count articles per publisher (assuming there's a 'publisher' column)
print("\nArticle Count per Publisher for Analyst Data:")
display(stats_analyst.count_articles_per_publisher('publisher'))

# Analyze publication dates (assuming there's a 'publication_date' column)
print("\nPublication Date Analysis for Analyst Data:")
display(stats_analyst.analyze_publication_dates('publication_date'))

# Load multiple CSV files from the yfinance_data folder
def load_multiple_csv(folder_path):
    all_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.csv')]
    dfs = [pd.read_csv(file) for file in all_files]
    return pd.concat(dfs, ignore_index=True)

df_yfinance = load_multiple_csv(yfinance_data_folder)

# Initialize the DescriptiveStats object for the yfinance data
stats_yfinance = DescriptiveStats(df_yfinance)

# Display summary statistics for the yfinance dataset
print("\nSummary Statistics for yfinance Data:")
display(stats_yfinance.get_summary_statistics())

# Analyze textual lengths (assuming there's a 'headline' column)
print("\nTextual Length Analysis for yfinance Data:")
display(stats_yfinance.analyze_textual_lengths('headline'))

# Count articles per publisher (assuming there's a 'publisher' column)
print("\nArticle Count per Publisher for yfinance Data:")
display(stats_yfinance.count_articles_per_publisher('publisher'))

# Analyze publication dates (assuming there's a 'publication_date' column)
print("\nPublication Date Analysis for yfinance Data:")
display(stats_yfinance.analyze_publication_dates('publication_date'))
