In [None]:
# !pip install yfinance --upgrade

import yfinance as yf

# Pick a stock
ticker = yf.Ticker("AAPL")

# Get upcoming and past earnings dates
earnings_dates = ticker.get_earnings_dates(limit=10)

future_earnings = earnings_dates[earnings_dates["Reported EPS"].isna()]
next_earnings = future_earnings.index[0]

print("Next earnings date:", next_earnings.date())

print(earnings_dates)


Next earnings date: 2025-02-27
                           EPS Estimate  Reported EPS  Surprise(%) Event Type
Earnings Date                                                                
2025-05-01 16:30:00-04:00          1.63          1.65         1.41   Earnings
2025-02-27 12:00:00-05:00           NaN           NaN          NaN    Meeting
2025-01-30 16:31:00-05:00          2.35          2.40         2.15   Earnings
2024-10-31 16:31:00-04:00          1.60          1.64         2.35   Earnings
2024-08-01 16:30:00-04:00          1.35          1.40         3.99   Earnings
2024-05-02 16:31:00-04:00          1.50          1.53         1.97   Earnings
2024-02-01 16:00:00-05:00          2.10          2.18         3.90   Earnings
2023-11-02 16:30:00-04:00          1.39          1.46         4.92   Earnings
2023-08-03 16:30:00-04:00          1.19          1.26         5.49   Earnings
2023-05-04 16:30:00-04:00          1.43          1.52         6.03   Earnings


In [1]:
import yfinance as yf
import pandas as pd
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

def safe_to_csv(data, filename, ticker_symbol):
    """Safely convert data to CSV with error handling"""
    try:
        if data is not None and not data.empty:
            # Create filename with ticker symbol
            full_filename = f"{ticker_symbol}_{filename}"
            data.to_csv(full_filename)
            print(f"✓ Saved {full_filename} ({len(data)} records)")
            return True
        else:
            print(f"✗ No data for {filename}")
            return False
    except Exception as e:
        print(f"✗ Error saving {filename}: {str(e)}")
        return False

def extract_all_stock_data(symbol):
    """Extract comprehensive stock data from yfinance API"""
    print(f"\n=== Extracting data for {symbol} ===")
    ticker = yf.Ticker(symbol)
    
    # Create directory for the stock
    os.makedirs(f"{symbol}_data", exist_ok=True)
    os.chdir(f"{symbol}_data")
    
    # 1. Basic Info
    try:
        info = ticker.info
        if info:
            info_df = pd.DataFrame([info])
            safe_to_csv(info_df, "basic_info.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting basic info: {e}")
    
    # 2. Historical Data (multiple periods)
    periods = ["1mo", "3mo", "6mo", "1y", "2y", "5y", "10y", "ytd", "max"]
    for period in periods:
        try:
            hist = ticker.history(period=period)
            if not hist.empty:
                safe_to_csv(hist, f"history_{period}.csv", symbol)
        except Exception as e:
            print(f"✗ Error getting {period} history: {e}")
    
    # 3. Earnings Data
    try:
        earnings = ticker.get_earnings_dates(limit=40)  # More earnings dates
        safe_to_csv(earnings, "earnings_dates.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting earnings dates: {e}")
    
    try:
        quarterly_earnings = ticker.quarterly_earnings
        safe_to_csv(quarterly_earnings, "quarterly_earnings.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting quarterly earnings: {e}")
    
    try:
        yearly_earnings = ticker.earnings
        safe_to_csv(yearly_earnings, "yearly_earnings.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting yearly earnings: {e}")
    
    # 4. Financial Statements
    try:
        financials = ticker.financials
        safe_to_csv(financials, "financials.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting financials: {e}")
    
    try:
        quarterly_financials = ticker.quarterly_financials
        safe_to_csv(quarterly_financials, "quarterly_financials.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting quarterly financials: {e}")
    
    try:
        balance_sheet = ticker.balance_sheet
        safe_to_csv(balance_sheet, "balance_sheet.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting balance sheet: {e}")
    
    try:
        quarterly_balance_sheet = ticker.quarterly_balance_sheet
        safe_to_csv(quarterly_balance_sheet, "quarterly_balance_sheet.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting quarterly balance sheet: {e}")
    
    try:
        cashflow = ticker.cashflow
        safe_to_csv(cashflow, "cashflow.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting cashflow: {e}")
    
    try:
        quarterly_cashflow = ticker.quarterly_cashflow
        safe_to_csv(quarterly_cashflow, "quarterly_cashflow.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting quarterly cashflow: {e}")
    
    # 5. Dividend and Split Data
    try:
        dividends = ticker.dividends
        if not dividends.empty:
            safe_to_csv(dividends.to_frame(), "dividends.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting dividends: {e}")
    
    try:
        splits = ticker.splits
        if not splits.empty:
            safe_to_csv(splits.to_frame(), "splits.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting splits: {e}")
    
    # 6. Options Data
    try:
        options_dates = ticker.options
        if options_dates:
            print(f"Available options dates: {len(options_dates)}")
            # Get options for first few expiration dates
            for i, date in enumerate(options_dates[:3]):  # Limit to first 3 dates
                try:
                    options = ticker.option_chain(date)
                    if hasattr(options, 'calls') and not options.calls.empty:
                        safe_to_csv(options.calls, f"options_calls_{date}.csv", symbol)
                    if hasattr(options, 'puts') and not options.puts.empty:
                        safe_to_csv(options.puts, f"options_puts_{date}.csv", symbol)
                except Exception as e:
                    print(f"✗ Error getting options for {date}: {e}")
    except Exception as e:
        print(f"✗ Error getting options: {e}")
    
    # 7. Institutional Holders
    try:
        institutional_holders = ticker.institutional_holders
        safe_to_csv(institutional_holders, "institutional_holders.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting institutional holders: {e}")
    
    try:
        major_holders = ticker.major_holders
        safe_to_csv(major_holders, "major_holders.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting major holders: {e}")
    
    try:
        mutual_fund_holders = ticker.mutualfund_holders
        safe_to_csv(mutual_fund_holders, "mutual_fund_holders.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting mutual fund holders: {e}")
    
    # 8. Analyst Data
    try:
        recommendations = ticker.recommendations
        safe_to_csv(recommendations, "analyst_recommendations.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting recommendations: {e}")
    
    try:
        recommendations_summary = ticker.recommendations_summary
        safe_to_csv(recommendations_summary, "recommendations_summary.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting recommendations summary: {e}")
    
    try:
        analyst_price_target = ticker.analyst_price_target
        if analyst_price_target:
            target_df = pd.DataFrame([analyst_price_target])
            safe_to_csv(target_df, "analyst_price_target.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting analyst price target: {e}")
    
    # 9. Sustainability/ESG Data
    try:
        sustainability = ticker.sustainability
        safe_to_csv(sustainability, "sustainability_esg.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting sustainability data: {e}")
    
    # 10. Insider Trading
    try:
        insider_transactions = ticker.insider_transactions
        safe_to_csv(insider_transactions, "insider_transactions.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting insider transactions: {e}")
    
    try:
        insider_purchases = ticker.insider_purchases
        safe_to_csv(insider_purchases, "insider_purchases.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting insider purchases: {e}")
    
    try:
        insider_roster_holders = ticker.insider_roster_holders
        safe_to_csv(insider_roster_holders, "insider_roster_holders.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting insider roster holders: {e}")
    
    # 11. News
    try:
        news = ticker.news
        if news:
            news_df = pd.DataFrame(news)
            safe_to_csv(news_df, "news.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting news: {e}")
    
    # 12. Calendar Events
    try:
        calendar = ticker.calendar
        safe_to_csv(calendar, "calendar_events.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting calendar: {e}")
    
    # 13. ISIN
    try:
        isin = ticker.isin
        if isin:
            isin_df = pd.DataFrame([{"ISIN": isin}])
            safe_to_csv(isin_df, "isin.csv", symbol)
    except Exception as e:
        print(f"✗ Error getting ISIN: {e}")
    
    # Special: Next earnings date (from your original code)
    try:
        earnings_dates = ticker.get_earnings_dates(limit=10)
        if not earnings_dates.empty:
            future_earnings = earnings_dates[earnings_dates["Reported EPS"].isna()]
            if not future_earnings.empty:
                next_earnings = future_earnings.index[0]
                next_earnings_df = pd.DataFrame([{
                    "Next_Earnings_Date": next_earnings.date(),
                    "Extracted_On": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                }])
                safe_to_csv(next_earnings_df, "next_earnings_date.csv", symbol)
                print(f"\n🎯 Next earnings date: {next_earnings.date()}")
    except Exception as e:
        print(f"✗ Error getting next earnings date: {e}")
    
    # Go back to parent directory
    os.chdir("..")
    print(f"\n✅ Completed data extraction for {symbol}")
    print(f"📁 All files saved in '{symbol}_data' directory")

# Main execution
if __name__ == "__main__":
    # You can change this to any stock symbol
    STOCK_SYMBOL = "AAPL"
    
    # Install required package (uncomment if needed)
    # !pip install yfinance --upgrade
    
    print("🚀 Starting comprehensive stock data extraction...")
    print(f"📊 Target stock: {STOCK_SYMBOL}")
    
    extract_all_stock_data(STOCK_SYMBOL)
    
    print("\n" + "="*50)
    print("📈 EXTRACTION COMPLETE!")
    print("="*50)
    print(f"Check the '{STOCK_SYMBOL}_data' folder for all CSV files")
    print("\nFiles may include:")
    print("- Basic company info")
    print("- Historical prices (multiple timeframes)")
    print("- Financial statements (annual & quarterly)")
    print("- Earnings data")
    print("- Dividend & split history")
    print("- Options data")
    print("- Analyst recommendations")
    print("- Institutional holdings")
    print("- Insider trading")
    print("- ESG/Sustainability data")
    print("- Recent news")
    print("- And more!")

🚀 Starting comprehensive stock data extraction...
📊 Target stock: AAPL

=== Extracting data for AAPL ===
✓ Saved AAPL_basic_info.csv (1 records)
✓ Saved AAPL_history_1mo.csv (22 records)
✓ Saved AAPL_history_3mo.csv (63 records)
✓ Saved AAPL_history_6mo.csv (127 records)
✓ Saved AAPL_history_1y.csv (250 records)
✓ Saved AAPL_history_2y.csv (502 records)
✓ Saved AAPL_history_5y.csv (1256 records)
✓ Saved AAPL_history_10y.csv (2515 records)
✓ Saved AAPL_history_ytd.csv (174 records)
✓ Saved AAPL_history_max.csv (11279 records)
✓ Saved AAPL_earnings_dates.csv (40 records)
✗ No data for quarterly_earnings.csv
✗ No data for yearly_earnings.csv
✓ Saved AAPL_financials.csv (39 records)
✓ Saved AAPL_quarterly_financials.csv (33 records)
✓ Saved AAPL_balance_sheet.csv (68 records)
✓ Saved AAPL_quarterly_balance_sheet.csv (65 records)
✓ Saved AAPL_cashflow.csv (53 records)
✓ Saved AAPL_quarterly_cashflow.csv (46 records)
✓ Saved AAPL_dividends.csv (88 records)
✓ Saved AAPL_splits.csv (5 records)

In [None]:
# more concise some values not there
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

def calculate_ema(data, period):
    """Calculate Exponential Moving Average"""
    return data.ewm(span=period).mean()

def calculate_rsi(data, period=14):
    """Calculate Relative Strength Index"""
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def calculate_macd(data, fast=12, slow=26, signal=9):
    """Calculate MACD"""
    ema_fast = calculate_ema(data, fast)
    ema_slow = calculate_ema(data, slow)
    macd_line = ema_fast - ema_slow
    signal_line = calculate_ema(macd_line, signal)
    histogram = macd_line - signal_line
    return macd_line, signal_line, histogram

def detect_crossovers(fast_ma, slow_ma):
    """Detect EMA crossovers"""
    crossovers = []
    for i in range(1, len(fast_ma)):
        if pd.isna(fast_ma.iloc[i-1]) or pd.isna(slow_ma.iloc[i-1]):
            continue
        if fast_ma.iloc[i-1] <= slow_ma.iloc[i-1] and fast_ma.iloc[i] > slow_ma.iloc[i]:
            crossovers.append(('bullish', fast_ma.index[i]))
        elif fast_ma.iloc[i-1] >= slow_ma.iloc[i-1] and fast_ma.iloc[i] < slow_ma.iloc[i]:
            crossovers.append(('bearish', fast_ma.index[i]))
    return crossovers

def analyze_options_data(ticker, symbol):
    """Analyze options for IV changes and delta analysis"""
    try:
        options_dates = ticker.options[:3]  # First 3 expiration dates
        all_options = []
        
        for date in options_dates:
            try:
                chain = ticker.option_chain(date)
                
                # Process calls
                calls = chain.calls.copy()
                calls['type'] = 'call'
                calls['expiration'] = date
                calls['days_to_expiry'] = (pd.to_datetime(date) - pd.Timestamp.now()).days
                
                # Process puts  
                puts = chain.puts.copy()
                puts['type'] = 'put'
                puts['expiration'] = date
                puts['days_to_expiry'] = (pd.to_datetime(date) - pd.Timestamp.now()).days
                
                # Combine and filter for liquid options
                options = pd.concat([calls, puts])
                options = options[options['volume'] > 10]  # Only liquid options
                
                # Calculate additional metrics
                if 'impliedVolatility' in options.columns:
                    options['iv_rank'] = options['impliedVolatility'].rank(pct=True)
                    options['high_iv'] = options['impliedVolatility'] > options['impliedVolatility'].quantile(0.8)
                
                all_options.append(options)
                
            except Exception as e:
                print(f"Error processing options for {date}: {e}")
                continue
        
        if all_options:
            combined_options = pd.concat(all_options, ignore_index=True)
            # Focus on most relevant columns
            key_columns = ['strike', 'lastPrice', 'bid', 'ask', 'volume', 'openInterest', 
                          'impliedVolatility', 'delta', 'type', 'expiration', 'days_to_expiry', 
                          'iv_rank', 'high_iv']
            available_columns = [col for col in key_columns if col in combined_options.columns]
            return combined_options[available_columns]
            
    except Exception as e:
        print(f"Error analyzing options: {e}")
    
    return None

def create_consolidated_dataset(symbol):
    """Create consolidated dataset with technical indicators"""
    print(f"\n=== Creating consolidated dataset for {symbol} ===")
    ticker = yf.Ticker(symbol)
    
    # 1. CORE DATASET: Historical data with technical indicators
    print("📈 Fetching historical data and calculating indicators...")
    hist = ticker.history(period="2y")  # 2 years of data for good indicator calculation
    
    if hist.empty:
        print("❌ No historical data available")
        return
    
    # Calculate technical indicators
    hist['EMA_10'] = calculate_ema(hist['Close'], 10)
    hist['EMA_20'] = calculate_ema(hist['Close'], 20)
    hist['EMA_50'] = calculate_ema(hist['Close'], 50)
    hist['SMA_20'] = hist['Close'].rolling(window=20).mean()
    hist['SMA_50'] = hist['Close'].rolling(window=50).mean()
    
    # RSI
    hist['RSI'] = calculate_rsi(hist['Close'])
    hist['RSI_Oversold'] = hist['RSI'] < 30
    hist['RSI_Overbought'] = hist['RSI'] > 70
    
    # MACD
    macd, macd_signal, macd_hist = calculate_macd(hist['Close'])
    hist['MACD'] = macd
    hist['MACD_Signal'] = macd_signal
    hist['MACD_Histogram'] = macd_hist
    hist['MACD_Bullish'] = (hist['MACD'] > hist['MACD_Signal']) & (hist['MACD'].shift(1) <= hist['MACD_Signal'].shift(1))
    
    # Bollinger Bands
    hist['BB_Middle'] = hist['SMA_20']
    bb_std = hist['Close'].rolling(window=20).std()
    hist['BB_Upper'] = hist['BB_Middle'] + (bb_std * 2)
    hist['BB_Lower'] = hist['BB_Middle'] - (bb_std * 2)
    hist['BB_Squeeze'] = (hist['BB_Upper'] - hist['BB_Lower']) / hist['BB_Middle'] < 0.1
    
    # Volume analysis
    hist['Volume_MA'] = hist['Volume'].rolling(window=20).mean()
    hist['Volume_Spike'] = hist['Volume'] > (hist['Volume_MA'] * 2)
    
    # Price change analysis
    hist['Daily_Return'] = hist['Close'].pct_change()
    hist['Volatility_20d'] = hist['Daily_Return'].rolling(window=20).std() * np.sqrt(252)
    hist['Large_Move'] = abs(hist['Daily_Return']) > hist['Daily_Return'].rolling(window=252).std() * 2
    
    # EMA crossover signals
    hist['EMA_10_20_Bull'] = (hist['EMA_10'] > hist['EMA_20']) & (hist['EMA_10'].shift(1) <= hist['EMA_20'].shift(1))
    hist['EMA_10_20_Bear'] = (hist['EMA_10'] < hist['EMA_20']) & (hist['EMA_10'].shift(1) >= hist['EMA_20'].shift(1))
    hist['EMA_20_50_Bull'] = (hist['EMA_20'] > hist['EMA_50']) & (hist['EMA_20'].shift(1) <= hist['EMA_50'].shift(1))
    hist['EMA_20_50_Bear'] = (hist['EMA_20'] < hist['EMA_50']) & (hist['EMA_20'].shift(1) >= hist['EMA_50'].shift(1))
    
    # Support/Resistance levels (simplified)
    hist['Price_Near_High'] = hist['Close'] > (hist['High'].rolling(window=20).max() * 0.98)
    hist['Price_Near_Low'] = hist['Close'] < (hist['Low'].rolling(window=20).min() * 1.02)
    
    hist.to_csv(f"{symbol}_technical_analysis.csv")
    print(f"✅ Saved {symbol}_technical_analysis.csv ({len(hist)} records)")
    
    # 2. SUMMARY SIGNALS: Recent signals and alerts
    print("🔍 Generating recent signals...")
    recent_data = hist.tail(30)  # Last 30 days
    
    signals = []
    
    # Recent crossovers
    for i, row in recent_data.iterrows():
        if row['EMA_10_20_Bull']:
            signals.append({'Date': i, 'Signal': 'EMA_10_20_Bullish_Crossover', 'Value': row['Close'], 'Strength': 'Medium'})
        if row['EMA_10_20_Bear']:
            signals.append({'Date': i, 'Signal': 'EMA_10_20_Bearish_Crossover', 'Value': row['Close'], 'Strength': 'Medium'})
        if row['EMA_20_50_Bull']:
            signals.append({'Date': i, 'Signal': 'EMA_20_50_Bullish_Crossover', 'Value': row['Close'], 'Strength': 'Strong'})
        if row['EMA_20_50_Bear']:
            signals.append({'Date': i, 'Signal': 'EMA_20_50_Bearish_Crossover', 'Value': row['Close'], 'Strength': 'Strong'})
        if row['RSI_Oversold']:
            signals.append({'Date': i, 'Signal': 'RSI_Oversold', 'Value': row['RSI'], 'Strength': 'Medium'})
        if row['RSI_Overbought']:
            signals.append({'Date': i, 'Signal': 'RSI_Overbought', 'Value': row['RSI'], 'Strength': 'Medium'})
        if row['Volume_Spike']:
            signals.append({'Date': i, 'Signal': 'Volume_Spike', 'Value': row['Volume'], 'Strength': 'High'})
        if row['Large_Move']:
            signals.append({'Date': i, 'Signal': 'Large_Price_Move', 'Value': row['Daily_Return']*100, 'Strength': 'High'})
        if row['MACD_Bullish']:
            signals.append({'Date': i, 'Signal': 'MACD_Bullish_Crossover', 'Value': row['MACD'], 'Strength': 'Medium'})
    
    if signals:
        signals_df = pd.DataFrame(signals)
        signals_df.to_csv(f"{symbol}_recent_signals.csv", index=False)
        print(f"✅ Saved {symbol}_recent_signals.csv ({len(signals_df)} signals)")
    
    # 3. OPTIONS ANALYSIS (if available)
    print("📊 Analyzing options data...")
    options_data = analyze_options_data(ticker, symbol)
    if options_data is not None and not options_data.empty:
        options_data.to_csv(f"{symbol}_options_analysis.csv", index=False)
        print(f"✅ Saved {symbol}_options_analysis.csv ({len(options_data)} records)")
    
    # 4. KEY FUNDAMENTALS (single consolidated file)
    print("📋 Extracting key fundamentals...")
    fundamentals = {}
    
    try:
        info = ticker.info
        if info:
            key_metrics = [
                'marketCap', 'enterpriseValue', 'trailingPE', 'forwardPE', 'pegRatio',
                'priceToBook', 'priceToSalesTrailing12Months', 'enterpriseToRevenue',
                'beta', '52WeekChange', 'dividendYield', 'payoutRatio',
                'trailingEps', 'forwardEps', 'bookValue', 'priceToBook',
                'returnOnAssets', 'returnOnEquity', 'revenueGrowth', 'earningsGrowth',
                'currentRatio', 'debtToEquity', 'freeCashflow', 'operatingCashflow'
            ]
            
            for metric in key_metrics:
                if metric in info:
                    fundamentals[metric] = info[metric]
            
        # Add recent price metrics
        current_price = hist['Close'][-1]
        fundamentals['current_price'] = current_price
        fundamentals['sma_20'] = hist['SMA_20'][-1]
        fundamentals['sma_50'] = hist['SMA_50'][-1]
        fundamentals['rsi'] = hist['RSI'][-1]
        fundamentals['volume_avg_20d'] = hist['Volume_MA'][-1]
        fundamentals['volatility_20d'] = hist['Volatility_20d'][-1]
        
        # Next earnings
        try:
            earnings_dates = ticker.get_earnings_dates(limit=5)
            if not earnings_dates.empty:
                future_earnings = earnings_dates[earnings_dates["Reported EPS"].isna()]
                if not future_earnings.empty:
                    fundamentals['next_earnings_date'] = str(future_earnings.index[0].date())
        except:
            pass
            
        if fundamentals:
            fund_df = pd.DataFrame([fundamentals])
            fund_df.to_csv(f"{symbol}_key_fundamentals.csv", index=False)
            print(f"✅ Saved {symbol}_key_fundamentals.csv")
        
    except Exception as e:
        print(f"❌ Error extracting fundamentals: {e}")
    
    # 5. SUMMARY REPORT
    print("📄 Generating summary report...")
    
    latest = hist.iloc[-1]
    summary = {
        'Symbol': symbol,
        'Date': str(latest.name.date()),
        'Current_Price': latest['Close'],
        'Daily_Change_Pct': latest['Daily_Return'] * 100,
        'RSI': latest['RSI'],
        'EMA_10': latest['EMA_10'],
        'EMA_20': latest['EMA_20'],
        'EMA_50': latest['EMA_50'],
        'Volume_vs_Avg': latest['Volume'] / latest['Volume_MA'] if not pd.isna(latest['Volume_MA']) else None,
        'Volatility_20d': latest['Volatility_20d'],
        'Days_Since_EMA_Cross': None,  # Could be calculated
        'Recent_Signals_Count': len(signals) if signals else 0,
        'Analysis_Date': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    
    summary_df = pd.DataFrame([summary])
    summary_df.to_csv(f"{symbol}_summary.csv", index=False)
    print(f"✅ Saved {symbol}_summary.csv")
    
    print(f"\n🎯 ANALYSIS COMPLETE for {symbol}")
    print("📁 Created files:")
    print("  1. technical_analysis.csv - Full price history with all indicators")
    print("  2. recent_signals.csv - Trading signals from last 30 days") 
    print("  3. options_analysis.csv - Options data with IV analysis")
    print("  4. key_fundamentals.csv - Important company metrics")
    print("  5. summary.csv - Current snapshot")

# Main execution
if __name__ == "__main__":
    STOCK_SYMBOL = "AAPL"  # Change this to any stock symbol
    
    print("🚀 Starting streamlined technical analysis...")
    print(f"📊 Target stock: {STOCK_SYMBOL}")
    
    create_consolidated_dataset(STOCK_SYMBOL)
    
    print("\n" + "="*60)
    print("📈 STREAMLINED ANALYSIS COMPLETE!")
    print("="*60)
    print("This approach gives you:")
    print("✅ Much fewer files (5 vs 20+)")
    print("✅ All technical indicators calculated")
    print("✅ Trading signals identified") 
    print("✅ Options analysis with IV data")
    print("✅ Key fundamentals consolidated")
    print("✅ Ready for further analysis/backtesting")

🚀 Starting streamlined technical analysis...
📊 Target stock: AAPL

=== Creating consolidated dataset for AAPL ===
📈 Fetching historical data and calculating indicators...
✅ Saved AAPL_technical_analysis.csv (501 records)
🔍 Generating recent signals...
✅ Saved AAPL_recent_signals.csv (12 signals)
📊 Analyzing options data...
✅ Saved AAPL_options_analysis.csv (175 records)
📋 Extracting key fundamentals...
✅ Saved AAPL_key_fundamentals.csv
📄 Generating summary report...
✅ Saved AAPL_summary.csv

🎯 ANALYSIS COMPLETE for AAPL
📁 Created files:
  1. technical_analysis.csv - Full price history with all indicators
  2. recent_signals.csv - Trading signals from last 30 days
  3. options_analysis.csv - Options data with IV analysis
  4. key_fundamentals.csv - Important company metrics
  5. summary.csv - Current snapshot

📈 STREAMLINED ANALYSIS COMPLETE!
This approach gives you:
✅ Much fewer files (5 vs 20+)
✅ All technical indicators calculated
✅ Trading signals identified
✅ Options analysis with 

In [None]:
# uses Dolphin
import pandas as pd
import subprocess
import json
import logging

logging.basicConfig(level=logging.INFO)

# ==================================================
# OLLAMA HELPER
# ==================================================
def ollama_run(prompt: str, model: str = "dolphin-phi:2.7b") -> str:
    """
    Run a prompt against Ollama model and return text response.
    Requires `ollama` to be running locally.
    """
    try:
        result = subprocess.run(
            ["ollama", "run", model],
            input=prompt.encode("utf-8"),
            capture_output=True,
            check=True
        )
        return result.stdout.decode("utf-8").strip()
    except subprocess.CalledProcessError as e:
        logging.error(f"Ollama call failed: {e.stderr.decode('utf-8')}")
        return ""

# ==================================================
# 1. CSV Q&A WITH OLLAMA
# ==================================================
def ask_csv_question(csv_path: str, question: str) -> str:
    """
    Ask Dolphin-phi a SQL-like query about a CSV.
    """
    df = pd.read_csv(csv_path)
    csv_text = df.head(200).to_csv(index=False)  # limit rows for context
    prompt = f"""
You are a trading data analyst.

Here is a CSV (first 200 rows shown):
{csv_text}

Question: {question}

Answer concisely and clearly. If numeric filtering is needed, 
show the relevant rows as JSON.
"""
    return ollama_run(prompt)

# ==================================================
# 2. CROSSOVER RANKING WITH FUNDAMENTALS
# ==================================================
def rank_crossovers(crossover_df: pd.DataFrame, fundamentals_df: pd.DataFrame) -> str:
    """
    Rank crossovers using fundamentals context.
    Returns structured JSON output.
    """
    crossover_text = crossover_df.to_csv(index=False)
    fundamentals_text = fundamentals_df.to_csv(index=False)

    prompt = f"""
You are a financial analyst. 

Crossover events:
{crossover_text}

Fundamentals:
{fundamentals_text}

Task:
- Rank the TOP 5 crossover events by strength.
- Assign each a numeric strength score (1–10).
- Explain briefly in 1–2 sentences why, referencing fundamentals.

Return the output as a JSON array with fields:
[{{"date": "...", "signal": "...", "score": 8, "reason": "..."}}]
"""
    return ollama_run(prompt)

# ==================================================
# 3. DEMO USAGE
# ==================================================
if __name__ == "__main__":
    # Example CSV query
    answer = ask_csv_question(
        "AAPL_technical_analysis.csv",
        "Find EMA crossovers where MACD histogram was positive within 5 days."
    )
    print("\nCSV Q&A Answer:\n", answer)

    # Example crossover ranking
    try:
        crossovers = pd.read_csv("master_crossovers.csv")
        fundamentals = pd.read_csv("AAPL_key_fundamentals.csv")
        ranked = rank_crossovers(crossovers, fundamentals)
        print("\nRanked Crossovers:\n", ranked)
    except FileNotFoundError:
        print("CSV files not found — run your crossover detection script first.")



CSV Q&A Answer:
 "The key to understanding the value of a new technology in the context of the entire system lies in the way it works on different elements of the system."

The text was written by a scientist who has knowledge and expertise in the field of technology and its application within a system. The author presents their analysis and thoughts about the significance of a new technology for the entire system, focusing on its functionality and impact on various aspects of the system. They emphasize that understanding the value of the technology requires an examination of how it interacts with different parts of the system and its effects on those components.

The statement highlights the importance of considering the context and application of the new technology in order to fully comprehend its value within the broader framework of the system. The author's perspective showcases their ability to think critically about the subject and provide a comprehensive understanding of the te

In [None]:
import pandas as pd
import subprocess
import logging

logging.basicConfig(level=logging.INFO)

# ==================================================
# OLLAMA HELPER
# ==================================================
def ollama_run(prompt: str, model: str = "dolphin-phi:2.7b") -> str:
    """
    Run a prompt against Ollama model and return text response.
    """
    result = subprocess.run(
        ["ollama", "run", model],
        input=prompt.encode("utf-8"),
        capture_output=True,
        check=True
    )
    return result.stdout.decode("utf-8").strip()

# ==================================================
# STRICT CSV Q&A (uses pandas first, then LLM)
# ==================================================
def ask_csv_question(csv_path: str, question: str) -> str:
    df = pd.read_csv(csv_path)

    # EXAMPLE HARDCODED query: EMA crossover + MACD histogram positive within 5 days
    # You could expand this with a natural language -> pandas mapping later
    if "EMA" in question and "MACD" in question:
        if {"EMA_12", "EMA_26", "MACD_Histogram"}.issubset(df.columns):
            crossover = (df["EMA_12"] > df["EMA_26"]) & (df["EMA_12"].shift(1) <= df["EMA_26"].shift(1))
            df["crossover"] = crossover
            results = df[df["crossover"]]

            # check 5-day window for MACD positive
            valid = []
            for idx in results.index:
                window = df.loc[idx: idx+5]
                if (window["MACD_Histogram"] > 0).any():
                    valid.append(df.loc[idx])

            if valid:
                selected = pd.DataFrame(valid)
                return selected.to_json(orient="records", lines=True)

    # fallback: just summarize with LLM
    csv_text = df.head(50).to_csv(index=False)
    prompt = f"""
You are a CSV analysis assistant. 
Here is sample data:
{csv_text}

Question: {question}

If possible, describe how to filter this dataset with pandas (not a generic essay).
"""
    return ollama_run(prompt)

# ==================================================
# CROSSOVER RANKING
# ==================================================
def rank_crossovers(crossover_df: pd.DataFrame, fundamentals_df: pd.DataFrame) -> str:
    crossover_text = crossover_df.to_csv(index=False)
    fundamentals_text = fundamentals_df.to_csv(index=False)

    prompt = f"""
You are a financial analyst. 

Crossover events:
{crossover_text}

Fundamentals:
{fundamentals_text}

Task:
- Rank the TOP 5 crossover events by strength.
- Assign each a numeric strength score (1–10).
- Explain briefly in 1–2 sentences why.

Return as JSON:
[{{"date": "...", "signal": "...", "score": 8, "reason": "..."}}]
"""
    return ollama_run(prompt)

# ==================================================
# DEMO
# ==================================================
if __name__ == "__main__":
    try:
        # Pandas does the filtering, LLM only summarizes if fallback
        answer = ask_csv_question(
            "AAPL_technical_analysis.csv",
            "Find EMA crossovers where MACD histogram was positive within 5 days."
        )
        print("\nCSV Q&A Answer:\n", answer)

        # Crossover ranking
        crossovers = pd.read_csv("master_crossovers.csv")
        fundamentals = pd.read_csv("AAPL_key_fundamentals.csv")
        ranked = rank_crossovers(crossovers, fundamentals)
        print("\nRanked Crossovers:\n", ranked)

    except FileNotFoundError:
        print("CSV files not found — run your crossover detection script first.")


python(61879) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.



CSV Q&A Answer:
 ], [
    [0.1 for i in range(len(numbers))], [0.2 for i in range(len(numbers))],
]], 'json')


# Create an empty list called 'results' that will store the outputted data:
results = []

# Iterate through all three lists:
for index, value in enumerate(list_1):
    # If the length of values are different then return a non list-like object.
    if len(list_2[index]) != len(list_3[index]):
        return "Error, The lengths of the lists are not equal."
    # Add the current value to the 'results' list:
    results.append((list_1[index][0], list_2[index][0], list_3[index][0]))

    # Return the results:
    return results
CSV files not found — run your crossover detection script first.


In [4]:
!pip install yahoo_earnings_calendar

import datetime
from yahoo_earnings_calendar import YahooEarningsCalendar

yec = YahooEarningsCalendar()
future_ts = yec.get_next_earnings_date("AAPL")  # returns timestamp
future_date = datetime.datetime.fromtimestamp(future_ts)
print("Next earnings:", future_date)

Collecting yahoo_earnings_calendar
  Downloading yahoo_earnings_calendar-0.6.0-py3-none-any.whl.metadata (3.3 kB)
Downloading yahoo_earnings_calendar-0.6.0-py3-none-any.whl (7.9 kB)
Installing collected packages: yahoo_earnings_calendar
Successfully installed yahoo_earnings_calendar-0.6.0


Exception: Invalid Symbol or Unavailable Earnings Date