In [None]:
#pip install -r requirements.txt

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
from datetime import datetime, timedelta
import re
import nltk
nltk.download('punkt')
nltk.download('stopwords')

In [11]:
st.set_page_config(
    page_title="FED Sentiment Analysis Dashboard",
    page_icon="📊",
    layout="wide",
    initial_sidebar_state="expanded")

OUTPUT_DIR = "dashboard_data"
PROCESSED_DIR = "processed_data"
ANALYSIS_DIR = "analysis_data"

for directory in [OUTPUT_DIR, PROCESSED_DIR, ANALYSIS_DIR]:
    os.makedirs(directory, exist_ok=True)

In [13]:
# custom CSS
st.markdown("""
<style>
.main-header {
    font-size: 2.5rem;
    color: #1E3A8A;
    text-align: center;
    margin-bottom: 1rem;
}
.sub-header {
    font-size: 1.5rem;
    color: #1E3A8A;
    margin-top: 2rem;
    margin-bottom: 1rem;
}
.chart-container {
    background-color: #f8f9fa;
    padding: 1rem;
    border-radius: 5px;
    margin-bottom: 2rem;
}
.insight-box {
    background-color: #e9f7ef;
    padding: 1rem;
    border-left: 5px solid #27ae60;
    margin-bottom: 1rem;
}
</style>
""", unsafe_allow_html=True)

DeltaGenerator()

In [18]:
# @st.cache_data
def load_data():
    """Load all necessary data files."""
    try:
        main_file = os.path.join(OUTPUT_DIR, "fed_sentiment_returns.csv")
        
        if os.path.exists(main_file):
            df = pd.read_csv(main_file)
            df['announcement_date'] = pd.to_datetime(df['announcement_date'])
            
            summary_file = os.path.join(OUTPUT_DIR, "sentiment_summary_stats.csv")
            if os.path.exists(summary_file):
                summary_df = pd.read_csv(summary_file)
            else:
                summary_df = pd.DataFrame()
                
            corr_matrices = {}
            for file in os.listdir(OUTPUT_DIR):
                if file.startswith("correlation_") and file.endswith(".csv"):
                    name = file.replace("correlation_", "").replace(".csv", "")
                    corr_matrices[name] = pd.read_csv(os.path.join(OUTPUT_DIR, file), index_col=0)
                    
            return df, summary_df, corr_matrices
        
        elif os.path.exists(os.path.join(PROCESSED_DIR, "processed_fed_documents.csv")) and \
             os.path.exists(os.path.join(ANALYSIS_DIR, "sentiment_analysis.csv")) and \
             os.path.exists(os.path.join("raw_data", "new_combined_dates.csv")):
            
            sentiment_df = pd.read_csv(os.path.join(ANALYSIS_DIR, "sentiment_analysis.csv"))
            sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
            
            returns_df = pd.read_csv(os.path.join("raw_data", "new_combined_dates.csv"))
            returns_df['announcement_date'] = pd.to_datetime(returns_df['announcement_date'])
            
            merged_df = create_merged_dataset(returns_df, sentiment_df)
            
            corr_matrices = create_correlation_matrices(merged_df)
            
            summary_df = create_summary_stats(merged_df, corr_matrices)
            
            merged_df.to_csv(os.path.join(OUTPUT_DIR, "fed_sentiment_returns.csv"), index=False)
            summary_df.to_csv(os.path.join(OUTPUT_DIR, "sentiment_summary_stats.csv"), index=False)
            for name, matrix in corr_matrices.items():
                matrix.to_csv(os.path.join(OUTPUT_DIR, f"correlation_{name}.csv"))
            
            return merged_df, summary_df, corr_matrices
        
        # If no data found, create dummy data for demonstration
        else:
            st.warning("No processed data found. Loading sample data for demonstration.")
            
            # sample data (this will be used if no real data is available)
            np.random.seed(42)
            
            dates = pd.date_range(start='2020-01-01', end='2023-01-01', freq='2M')
            
            sample_data = {
                'meeting_id': range(1, len(dates) + 1),
                'announcement_date': dates,
                'document_type': np.random.choice(['statement', 'intermeeting'], size=len(dates)),
                'full_net_sentiment': np.random.normal(0.1, 0.3, len(dates)),
                'full_ml_sentiment_score': np.random.normal(0.2, 0.4, len(dates)),
                'llm_overall_bullishness': np.random.choice([-1, 0, 1], size=len(dates), p=[0.3, 0.4, 0.3]),
                'monetary_policy_net_sentiment': np.random.normal(0.05, 0.3, len(dates)),
                'economic_conditions_net_sentiment': np.random.normal(0.1, 0.4, len(dates)),
                'forward_guidance_net_sentiment': np.random.normal(0.15, 0.3, len(dates)),
                'balance_sheet_net_sentiment': np.random.normal(0.0, 0.3, len(dates))
            }
            
            # mkt returns for sample tickers
            tickers = ['^GSPC', '^IXIC', '^DJI', 'XLF', 'XLE']
            for ticker in tickers:
                for day in range(-10, 11):
                    day_col = f'T{day}' if day < 0 else 'T0' if day == 0 else f'T+{day}'
                    column_name = f'{ticker}_{day_col}'
                    sample_data[column_name] = np.random.normal(0.0, 0.01, len(dates))
            
            df = pd.DataFrame(sample_data)
            
            summary_df = pd.DataFrame()
            corr_matrices = {'all': pd.DataFrame()}
            
            return df, summary_df, corr_matrices
    
    except Exception as e:
        st.error(f"Error loading data: {e}")
        return pd.DataFrame(), pd.DataFrame(), {}