<a href="https://colab.research.google.com/github/gkjrtech/initial-setup/blob/main/Stock_AI_NEWS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install yfinance newspaper3k transformers torch plotly pandas scikit-learn lxml_html_clean

Collecting lxml_html_clean
  Downloading lxml_html_clean-0.4.3-py3-none-any.whl.metadata (2.3 kB)
Downloading lxml_html_clean-0.4.3-py3-none-any.whl (14 kB)
Installing collected packages: lxml_html_clean
Successfully installed lxml_html_clean-0.4.3


In [7]:
#@title üöÄ Fully Integrated & Error-Proof Market Hub
ticker = "RIVN" #@param {type:"string"}
timeframe = "3mo" #@param ["1mo", "3mo", "6mo", "1y", "2y"]
interval = "1d" #@param ["1d", "60m"]
show_advanced_indicators = True #@param {type:"boolean"}

import yfinance as yf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from newspaper import Article
from transformers import pipeline
import warnings

warnings.filterwarnings("ignore")

def run_market_hub():
    print(f"üìä Analyzing {ticker.upper()}...")

    # --- 1. DATA FETCH ---
    try:
        df = yf.download(ticker, period=timeframe, interval=interval, progress=False)
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = df.columns.get_level_values(0)
        data = df.dropna().copy()

        # Predictive Stats (RSI)
        delta = data['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        rs = gain / (loss + 1e-9) # avoid division by zero
        data['RSI'] = 100 - (100 / (1 + rs))

    except Exception as e:
        print(f"‚ùå Data Error: {e}"); return

    # --- 2. MULTI-LAYER CHART ---
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.08,
                       subplot_titles=(f'{ticker} Analysis', 'RSI (Overbought/Oversold)'),
                       row_heights=[0.7, 0.3])

    # Price Chart
    fig.add_trace(go.Candlestick(x=data.index, open=data['Open'], high=data['High'],
                                 low=data['Low'], close=data['Close'], name='Price'), row=1, col=1)

    # 20-Day Moving Average
    data['MA20'] = data['Close'].rolling(window=20).mean()
    fig.add_trace(go.Scatter(x=data.index, y=data['MA20'], name='20D MA', line=dict(color='orange')), row=1, col=1)

    # RSI
    fig.add_trace(go.Scatter(x=data.index, y=data['RSI'], name='RSI', line=dict(color='magenta')), row=2, col=1)
    fig.add_hline(y=70, line_dash="dash", line_color="red", row=2, col=1)
    fig.add_hline(y=30, line_dash="dash", line_color="green", row=2, col=1)

    fig.update_layout(template="plotly_dark", xaxis_rangeslider_visible=False, height=700)
    fig.show()

    # --- 3. AI NEWS (WITH TOKEN PROTECTION) ---
    print(f"\nüì∞ Finding latest news...")
    try:
        raw_news = yf.Ticker(ticker).news
        if not raw_news:
            print("No news found.")
            return

        # Fetch URL securely
        latest = raw_news[0]
        url = latest.get('link') or latest.get('content', {}).get('canonicalUrl', {}).get('url')

        article = Article(url)
        article.download()
        article.parse()

        # FIX: Manual Truncation to avoid "Index out of range"
        # 1024 tokens is roughly 3000-4000 characters.
        safe_text = article.text[:3500]

        print("ü§ñ AI is summarizing...")
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

        # ADDED: truncation=True to prevent the crash you saw
        summary = summarizer(safe_text, max_length=150, min_length=60,
                             do_sample=False, truncation=True)

        print(f"\n{'='*60}\nTITLE: {article.title}\n{'-'*60}")
        print(f"AI SUMMARY (~100 words):\n{summary[0]['summary_text']}\n{'='*60}")

    except Exception as e:
        print(f"AI News Error: {e}")

run_market_hub()

üìä Analyzing RIVN...



üì∞ Finding latest news...
ü§ñ AI is summarizing...


Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



TITLE: Rivian and Shake Shack upgraded, Lennar gets several downgrades
------------------------------------------------------------
AI SUMMARY (~100 words):
JP Morgan lifts its rating on Shake Shack from underweight to neutral. Bank of America, RBC and Evercore ISI all lowering their ratings on that stock to underperform. Lennar getting hit with several downgrades downgrades on Wall Street. The moves come after Lennar's fourth quarter results, the company delivered lower than expected earnings.


In [9]:
#@title üèõÔ∏è Integrated Quantitative Intelligence Hub v4.0
#@markdown Fill in the parameters and hit play. This engine calculates weighted confidence scores.
ticker = "IBIT" #@param {type:"string"}
timeframe = "6mo" #@param ["1mo", "3mo", "6mo", "1y", "2y"]
interval = "1d" #@param ["1d", "60m"]

import yfinance as yf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from newspaper import Article
from transformers import pipeline, AutoTokenizer
import warnings

warnings.filterwarnings("ignore")

def calculate_indicators(data):
    # Trend: EMA 9/21 & MACD
    data['EMA9'] = data['Close'].ewm(span=9, adjust=False).mean()
    data['EMA21'] = data['Close'].ewm(span=21, adjust=False).mean()
    data['MACD'] = data['Close'].ewm(span=12, adjust=False).mean() - data['Close'].ewm(span=26, adjust=False).mean()
    data['Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()

    # Momentum: RSI
    delta = data['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / (loss + 1e-9)
    data['RSI'] = 100 - (100 / (1 + rs))

    # Volatility: Bollinger Bands
    data['MA20'] = data['Close'].rolling(window=20).mean()
    data['STD20'] = data['Close'].rolling(window=20).std()
    data['Upper'] = data['MA20'] + (data['STD20'] * 2)
    data['Lower'] = data['MA20'] - (data['STD20'] * 2)

    # Volume: OBV
    data['OBV'] = (np.sign(data['Close'].diff()) * data['Volume']).fillna(0).cumsum()

    return data

def analyze_and_score(data):
    last = data.iloc[-1]
    prev = data.iloc[-2]
    scores = []

    # Formula Component 1: RSI (Weight 10)
    rsi_val = last['RSI']
    if rsi_val < 30:
        rsi_sig, rsi_conf = "BUY (Oversold)", 10
    elif rsi_val > 70:
        rsi_sig, rsi_conf = "SELL (Overbought)", 10
    else:
        rsi_sig, rsi_conf = "Neutral", 5
    scores.append(("RSI", rsi_sig, rsi_conf, 10))

    # Formula Component 2: MACD Crossover (Weight 15)
    # Check if a cross happened in the last 48 hours for max confidence
    macd_bull = last['MACD'] > last['Signal'] and prev['MACD'] <= prev['Signal']
    macd_bear = last['MACD'] < last['Signal'] and prev['MACD'] >= prev['Signal']

    if macd_bull:
        macd_sig, macd_conf = "BUY (Bull Cross)", 15
    elif macd_bear:
        macd_sig, macd_conf = "SELL (Bear Cross)", 15
    else:
        # Holding the trend gets partial credit
        macd_sig = "Bullish Hold" if last['MACD'] > last['Signal'] else "Bearish Hold"
        macd_conf = 8
    scores.append(("MACD", macd_sig, macd_conf, 15))

    # Formula Component 3: EMA 9/21 Cross (Weight 15)
    ema_bull = last['EMA9'] > last['EMA21']
    ema_cross = (last['EMA9'] > last['EMA21'] and prev['EMA9'] <= prev['EMA21']) or \
                (last['EMA9'] < last['EMA21'] and prev['EMA9'] >= prev['EMA21'])

    ema_sig = "BUY" if ema_bull else "SELL"
    ema_conf = 15 if ema_cross else 10
    scores.append(("EMA Cross", ema_sig, ema_conf, 15))

    # Final Weighted Calculation
    current_total = sum([s[2] for s in scores])
    max_possible = sum([s[3] for s in scores])
    confidence_pct = (current_total / max_possible) * 100

    overall_bias = "BULLISH" if last['Close'] > last['MA20'] else "BEARISH"

    return scores, confidence_pct, overall_bias

def run_hub():
    print(f"üì° Accessing Market Data for {ticker.upper()}...")
    df = yf.download(ticker, period=timeframe, interval=interval, progress=False)
    if df.empty:
        print("‚ùå No data found."); return
    if isinstance(df.columns, pd.MultiIndex): df.columns = df.columns.get_level_values(0)
    data = calculate_indicators(df.dropna().copy())

    # Create Professional Layout
    fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.05,
                       subplot_titles=('Price & Bollinger Bands', 'On-Balance Volume (OBV)', 'RSI Momentum'),
                       row_heights=[0.5, 0.25, 0.25])

    # Subplot 1: Price
    fig.add_trace(go.Candlestick(x=data.index, open=data['Open'], high=data['High'], low=data['Low'], close=data['Close'], name='Price'), row=1, col=1)
    fig.add_trace(go.Scatter(x=data.index, y=data['Upper'], line=dict(color='rgba(255,255,255,0.2)', dash='dot'), showlegend=False), row=1, col=1)
    fig.add_trace(go.Scatter(x=data.index, y=data['Lower'], line=dict(color='rgba(255,255,255,0.2)', dash='dot'), fill='tonexty', fillcolor='rgba(173,216,230,0.05)', name='Bollinger'), row=1, col=1)

    # Subplot 2: OBV
    fig.add_trace(go.Scatter(x=data.index, y=data['OBV'], name='OBV', line=dict(color='yellow')), row=2, col=1)

    # Subplot 3: RSI
    fig.add_trace(go.Scatter(x=data.index, y=data['RSI'], name='RSI', line=dict(color='magenta')), row=3, col=1)
    fig.add_hline(y=70, line_dash="dash", line_color="red", annotation_text="Overbought", row=3, col=1)
    fig.add_hline(y=30, line_dash="dash", line_color="green", annotation_text="Oversold", row=3, col=1)

    fig.update_layout(template="plotly_dark", xaxis_rangeslider_visible=False, height=900, title=f"Intelligence Dashboard: {ticker.upper()}")
    fig.show()

    # Scorecard Deep Dive
    scores, final_conf, bias = analyze_and_score(data)
    print(f"\n{'='*65}\nüî¨ QUANTITATIVE CONFIDENCE SCORECARD: {ticker.upper()}\n{'='*65}")
    print(f"{'Statistic':<15} | {'Signal':<20} | {'Score Weight'}")
    print(f"{'-'*15}-|-{'-'*20}-|-{'-'*12}")
    for s in scores:
        print(f"{s[0]:<15} | {s[1]:<20} | {s[2]}/{s[3]}")

    print(f"\nOVERALL BIAS: {bias}")
    print(f"AGGREGATE CONFIDENCE SCORE: {final_conf:.1f}%")
    print(f"{'‚≠ê HIGH CONVICTION' if final_conf > 75 else '‚ö†Ô∏è MODERATE CONVICTION' if final_conf > 50 else '‚ùå LOW CONVICTION'}")
    print("="*65)

    # AI Integration
    try:
        news = yf.Ticker(ticker).news
        if news:
            url = news[0].get('link') or news[0].get('content', {}).get('canonicalUrl', {}).get('url')
            art = Article(url); art.download(); art.parse()
            tok = AutoTokenizer.from_pretrained("facebook/bart-large-cnn", model_max_length=1024)
            sumz = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=tok)
            summary = sumz(art.text[:3000], max_length=140, min_length=70, truncation=True)
            print(f"\nüì∞ RECENT NEWS SUMMARY: {art.title}\n{summary[0]['summary_text']}")
    except Exception as e: print(f"\nNews processing skipped or failed: {e}")

run_hub()

üì° Accessing Market Data for IBIT...



üî¨ QUANTITATIVE CONFIDENCE SCORECARD: IBIT
Statistic       | Signal               | Score Weight
----------------|----------------------|-------------
RSI             | Neutral              | 5/10
MACD            | Bullish Hold         | 8/15
EMA Cross       | SELL                 | 10/15

OVERALL BIAS: BEARISH
AGGREGATE CONFIDENCE SCORE: 57.5%
‚ö†Ô∏è MODERATE CONVICTION


Device set to use cpu



üì∞ RECENT NEWS SUMMARY: BC-Most Active Stocks
Nasdaq composite, NYSE and NYSE American listed most active stocks. Canopy Growth Corp. was the most popular stock. Tesla was the least popular stock, followed by Apple and Amazon. Apple was the only stock to post a positive change in the last 24 hours, with a 0.3 percent rise. Amazon.com was down 0.2 percent, while Netflix was up 0.4 percent.
