In [28]:
from google.cloud import bigquery
import pandas as pd

# Initialize the BigQuery client
client = bigquery.Client()

# SQL Query to clean and aggregate the data
query = """
WITH cleaned_data AS (
  SELECT
    ticker,
    DATE(date) AS date,
    Stock_Category,
    Aggregated_Score,
    Daily_Percent_Difference,
    Next_Daily_Percent_Difference,
    AI_Score,
    `Sentiment Score`,
    Health_Score
  FROM
    `trendsense.combined_data.step_3_predictive_1`
  WHERE
    Aggregated_Score IS NOT NULL AND Aggregated_Score != 0
    -- AND Daily_Percent_Difference IS NOT NULL AND Daily_Percent_Difference != 0
    -- AND Next_Daily_Percent_Difference IS NOT NULL AND Next_Daily_Percent_Difference != 0
    AND AI_Score IS NOT NULL AND AI_Score != 0
    AND `Sentiment Score` IS NOT NULL AND `Sentiment Score` != 0
    AND Health_Score IS NOT NULL AND Health_Score != 0
)

SELECT
  ticker,
  date,
  Stock_Category,
  AVG(Aggregated_Score) AS Avg_Aggregated_Score,
  AVG(Daily_Percent_Difference) AS Avg_Daily_Percent_Difference,
  AVG(Next_Daily_Percent_Difference) AS Avg_Next_Daily_Percent_Difference,
  AVG(AI_Score) AS Avg_AI_Score,
  AVG(`Sentiment Score`) AS Avg_Sentiment_Score,
  AVG(Health_Score) AS Avg_Health_Score
FROM
  cleaned_data
GROUP BY
  ticker,
  date,
  Stock_Category
ORDER BY
  ticker,
  date;
"""

# Run the query and convert results to DataFrame
query_job = client.query(query)
df = query_job.to_dataframe()

# Count number of rows per date
df['date_count'] = df.groupby('date')['date'].transform('count')

# Filter out rows where there are fewer than 5 rows for a given date
df = df[df['date_count'] >= 5]

# Drop the helper column
df = df.drop(columns=['date_count'])

# Sort the DataFrame by ticker and date to ensure correct calculations
df = df.sort_values(['ticker', 'date'])

# Calculate 7-day rolling average for each ticker separately
df['Rolling_7day_Avg'] = df.groupby('ticker')['Avg_Aggregated_Score'].transform(
    lambda x: x.rolling(window=7, min_periods=1).mean()
)

# Add daily ranking based on Rolling_7day_Avg (1 is highest)
df['Rolling_Avg_Rank'] = df.groupby('date')['Rolling_7day_Avg'].rank(
    method='min',     # Use minimum rank for ties
    ascending=False,  # Higher values get lower rank numbers
)

# Calculate percent difference from previous day for each ticker
df['Pct_Change_From_Previous'] = df.groupby('ticker')['Avg_Aggregated_Score'].pct_change() * 100

# Add daily ranking based on percent change (1 is highest percent increase)
df['Pct_Change_Rank'] = df.groupby('date')['Pct_Change_From_Previous'].rank(
    method='min',     # Use minimum rank for ties
    ascending=False,  # Higher values get lower rank numbers
)

# Calculate the average of both rankings
df['Composite_Rank'] = (df['Rolling_Avg_Rank'] + df['Pct_Change_Rank']) / 2

# Create temporary dataframe with top 10 averages per day
daily_top_10_avg_next = (
    df.groupby('date')
    .apply(lambda x: x.nsmallest(10, 'Composite_Rank')['Avg_Next_Daily_Percent_Difference'].mean())
    .reset_index()
    .rename(columns={0: 'Top_10_Next_Day_Avg'})
)

daily_top_10_avg_today = (
    df.groupby('date')
    .apply(lambda x: x.nsmallest(10, 'Composite_Rank')['Avg_Daily_Percent_Difference'].mean())
    .reset_index()
    .rename(columns={0: 'Top_10_Today_Day_Avg'})
)

# Merge the daily averages back to the main dataframe
df = df.merge(daily_top_10_avg_next, on='date', how='left')
df = df.merge(daily_top_10_avg_today, on='date', how='left')


# Ensure data is sorted by date
df = df.sort_values('date')

# Aggregate by date: take the average of Top_10_Next_Day_Avg per day
daily_cumulative = (
    df.groupby('date')['Top_10_Next_Day_Avg']
    .mean()  # Take the average instead of sum
    .cumsum()  # Compute cumulative sum of daily averages
    .reset_index()
    .rename(columns={'Top_10_Next_Day_Avg': 'Cumulative_Top_10_Score'})
)

# Merge back to main dataframe
df = df.merge(daily_cumulative, on='date', how='left')


# Save the DataFrame to a CSV file
csv_file_path = 'aggregated_data_with_top_10_analysis.csv'
df.to_csv(csv_file_path, index=False)

print(f"Data has been exported to {csv_file_path}")

# Display some summary statistics
print("\nSample of top 10 performance:")
sample_days = df['date'].unique()[:3]  # Get first 3 unique dates
for day in sample_days:
    print(f"\nDate: {day}")
    print(df[df['date'] == day].nsmallest(10, 'Composite_Rank')[
        ['ticker', 'Composite_Rank', 'Avg_Next_Daily_Percent_Difference', 'Top_10_Next_Day_Avg']
    ])

# Calculate overall average performance of top 10 strategy
overall_avg = df['Top_10_Next_Day_Avg'].mean()
print(f"\nOverall average next-day performance of top 10 strategy: {overall_avg:.2f}%")

Data has been exported to aggregated_data_with_top_10_analysis.csv

Sample of top 10 performance:

Date: 2024-12-17
  ticker  Composite_Rank  Avg_Next_Daily_Percent_Difference  \
0   AAPL             NaN                          -2.142178   
1     MU             NaN                          -4.327806   
2     ZG             NaN                           1.878184   
3    TDS             NaN                          -2.015413   
4   PLTR             NaN                          -3.871484   
5    SMR             NaN                         -11.976041   
6    HAL             NaN                          -3.824939   
7   META             NaN                          -3.591954   
8   ACHR             NaN                          -1.540158   
9   NVDA             NaN                          -1.135053   

   Top_10_Next_Day_Avg  
0             -4.29219  
1             -4.29219  
2             -4.29219  
3             -4.29219  
4             -4.29219  
5             -4.29219  
6             -

  .apply(lambda x: x.nsmallest(10, 'Composite_Rank')['Avg_Next_Daily_Percent_Difference'].mean())
  .apply(lambda x: x.nsmallest(10, 'Composite_Rank')['Avg_Daily_Percent_Difference'].mean())


In [36]:
# Rev 2

import openai
import pandas as pd
import re

# Initialize the OpenAI client (replace 'your_api_key' with your actual key)
client = openai.OpenAI(api_key="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")

def extract_numeric_score(response):
    """Extracts the first valid float from the OpenAI response."""
    match = re.search(r"-?\d+(\.\d+)?", response)
    if match:
        return float(match.group(0))
    return None

def get_financial_impact(title, ticker):
    """Analyzes financial market impact from the perspective of a specific ticker."""
    try:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are an expert financial analyst specializing in stock market movements."},
                {"role": "user", "content": f"""
                Evaluate the financial market impact of this headline from the perspective of **{ticker}**. Assign a **precise** numerical score between -10 and 10.

                **GUIDELINES:**
                - **Perspective Matters**: The score should reflect how this affects **{ticker}**, not just the industry as a whole.
                - **Competitive Awareness**: If the news benefits a competitor, it may negatively impact **{ticker}**.
                - **Market Sentiment**: Weigh factors like stock movements, investor reactions, regulatory concerns, or sector trends.
                - **STRICT FORMAT**: The response must contain only a **single** numerical value (e.g., -7.5, 3.2, 0). No explanations or extra text.

                **News Headline:** "{title}"

                **ONLY OUTPUT A SINGLE NUMBER:**"""}
            ],
            max_tokens=10,
            temperature=0.5
        )

        raw_response = completion.choices[0].message.content.strip()
        
        score = extract_numeric_score(raw_response)
        if score is not None and -10 <= score <= 10:
            return score
        else:
            print(f"Invalid numeric response for '{title}' (Ticker: {ticker}): {raw_response}")
            return None

    except Exception as e:
        print(f"Error processing title: {title} (Ticker: {ticker})\nError: {e}")
        return None

# Load dataset
df = pd.read_csv("financial_news.csv")

# Ensure the dataset contains the required columns
if "title" in df.columns and "ticker" in df.columns:
    df["financial_impact_score"] = df.apply(lambda row: get_financial_impact(row["title"], row["ticker"]), axis=1)

# Save the updated dataset
df.to_csv("financial_news_with_scoresCL4.csv", index=False)

print("Data saved to financial_news_with_scores.csv")














Data saved to financial_news_with_scores.csv


In [None]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from google.cloud import bigquery
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define BigQuery dataset and table
PROJECT_ID = "trendsense"
DATASET_ID = "stock_data"
TABLE_ID = "stock_data_history"

# Define the list of stock tickers
TICKERS = [
    'AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR',
    'BWXT', 'ARBK', 'AMD', 'NVDA', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG',
    'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZN', 'CRM', 'NOW', 'CHTR', 'TDS', 'META', 'RGTI','QUBT',
    'LX', 'OKLO', 'PSIX', 'QFIN', 'RTX', 'TWLO'
]

def extract_stock_close(request):
    """Cloud Function to fetch current day stock data and save to BigQuery."""
    try:
        # Define today's date and previous business day
        today = datetime.today()
        
        # Adjust for weekends and market holidays
        start_date = today - timedelta(days=3)
        end_date = today
        
        logger.info(f"Fetching stock data from {start_date} to {end_date}")
        
        # Fetch stock data using a date range to ensure data availability
        try:
            stock_data = yf.download(TICKERS, start=start_date, end=end_date, group_by='ticker', threads=True)
        except Exception as download_error:
            logger.error(f"Failed to download stock data: {download_error}")
            return f"Failed to download stock data: {download_error}"

        # Check if data was returned
        if stock_data.empty:
            logger.warning(f"No data available for date range {start_date} to {end_date}")
            return f"No data available for date range {start_date} to {end_date}"

        # Create an empty list to store reformatted data
        formatted_data = []

        # Process each ticker to extract relevant information
        for ticker in TICKERS:
            try:
                if ticker in stock_data.columns.get_level_values(0):  # Ensure ticker exists in data
                    # Select the most recent day's data
                    ticker_data = stock_data[ticker].iloc[-1]
                    
                    # Ensure we have valid data for the current day
                    if pd.notna(ticker_data['Close']):
                        # Calculate percent difference from the previous close
                        try:
                            previous_close = stock_data[ticker].iloc[-2]['Close']
                            current_close = ticker_data['Close']
                            percent_difference = ((current_close - previous_close) / previous_close)
                        except (IndexError, TypeError):
                            previous_close = None
                            percent_difference = None

                        # Append today's data
                        formatted_data.append({
                            "Date": today.strftime('%Y-%m-%d'),
                            "Ticker": ticker,
                            "Close": ticker_data['Close'],
                            "Volume": ticker_data['Volume'],
                            "High": ticker_data['High'],
                            "Low": ticker_data['Low'],
                            "Open": ticker_data['Open'],
                            "Percent_Difference": percent_difference
                        })
            except Exception as ticker_error:
                logger.error(f"Error processing {ticker}: {ticker_error}")
                continue

        # Convert the list of dictionaries to a DataFrame
        reformatted_data = pd.DataFrame(formatted_data)

        # Check if reformatted data is empty
        if reformatted_data.empty:
            logger.warning(f"No valid stock data available for {today}")
            return f"No valid stock data available for {today}"

        # Save to BigQuery
        try:
            client = bigquery.Client(project=PROJECT_ID)
            table_ref = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
            
            # Define job configuration
            job_config = bigquery.LoadJobConfig(
                write_disposition=bigquery.WriteDisposition.WRITE_APPEND,  # Append data if table exists
                autodetect=True  # Automatically detect schema
            )

            # Load data to BigQuery
            job = client.load_table_from_dataframe(
                reformatted_data,
                table_ref,
                job_config=job_config
            )
            
            # Wait for job to complete and log any errors
            job.result()
            
            logger.info(f"Stock data for {today} successfully saved to {table_ref}")
            return f"Stock data for {today} successfully saved to {table_ref}"

        except Exception as bigquery_error:
            logger.error(f"BigQuery upload failed: {bigquery_error}")
            return f"BigQuery upload failed: {bigquery_error}"

    except Exception as general_error:
        logger.error(f"Unexpected error in extract_stock_close: {general_error}")
        return f"Unexpected error: {general_error}"

# Note: If this is a Google Cloud Function, you might need to add a trigger
# such as a HTTP trigger or a scheduled cloud function trigger









In [51]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define stock tickers
TICKERS = [
    'AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR',
    'BWXT', 'ARBK', 'AMD', 'NVDA', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG',
    'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZN', 'CRM', 'NOW', 'CHTR', 'TDS', 'META', 'RGTI', 'QUBT',
    'LX', 'OKLO', 'PSIX', 'QFIN', 'RTX', 'TWLO'
]

# NASDAQ Composite Index Ticker
NASDAQ_TICKER = "^IXIC"

def extract_stock_close():
    """Fetch stock data for normal tickers (past 3 days) and NASDAQ (^IXIC) (since Dec 1, 2024)."""
    try:
        today = datetime.today()
        start_date = today - timedelta(days=3)  # Normal tickers (last 3 days)
        nasdaq_start_date = datetime(2024, 12, 1)  # Fixed year for NASDAQ

        logger.info(f"Fetching stock data from {start_date} to {today}")
        logger.info(f"Fetching NASDAQ (^IXIC) data from {nasdaq_start_date} to {today}")

        # Fetch stock data for normal tickers
        try:
            stock_data = yf.download(TICKERS, start=start_date, end=today, group_by='ticker', threads=5)
            logger.info(f"Stock data downloaded: {len(stock_data)} rows")
        except Exception as download_error:
            logger.error(f"Failed to download stock data: {download_error}")
            return
        
        # Fetch NASDAQ Composite Index (^IXIC) data
        try:
            nasdaq_data = yf.download(NASDAQ_TICKER, start=nasdaq_start_date, end=today)
            logger.info(f"NASDAQ data downloaded: {nasdaq_data.shape} rows and columns")
        except Exception as nasdaq_error:
            logger.error(f"Failed to download NASDAQ data: {nasdaq_error}")
            return

        # Debug: Print column names before fixing
        print("\nNASDAQ Column Names BEFORE FIX:", nasdaq_data.columns)

        # 🛠 FIX: Flatten NASDAQ MultiIndex Columns
        if isinstance(nasdaq_data.columns, pd.MultiIndex):
            nasdaq_data.columns = [col[0] for col in nasdaq_data.columns]
        nasdaq_data = nasdaq_data.reset_index()  # Convert Date index into a column

        # Detect correct column names dynamically
        column_map = {
            "Close": None,
            "Open": None,
            "High": None,
            "Low": None,
            "Volume": None
        }

        for col in nasdaq_data.columns:
            for key in column_map.keys():
                if key in col:
                    column_map[key] = col

        # Rename columns using detected names
        nasdaq_data = nasdaq_data.rename(columns=column_map)

        # Debug: Print final column names after renaming
        print("\nNASDAQ Column Names FINAL:", nasdaq_data.columns)

        formatted_data = []

        # Process normal tickers
        for ticker in TICKERS:
            try:
                if ticker in stock_data.columns.get_level_values(0):
                    ticker_data = stock_data[ticker].iloc[-1]  # Get latest data
                    
                    if pd.notna(ticker_data['Close']):
                        # Find previous close safely
                        previous_close = stock_data[ticker]['Close'].shift(1).iloc[-1]
                        percent_difference = None
                        
                        if pd.notna(previous_close):
                            percent_difference = ((ticker_data['Close'] - previous_close) / previous_close)

                        formatted_data.append({
                            "Date": today.strftime('%Y-%m-%d'),
                            "Ticker": ticker,
                            "Close": ticker_data['Close'],
                            "Volume": ticker_data['Volume'],
                            "High": ticker_data['High'],
                            "Low": ticker_data['Low'],
                            "Open": ticker_data['Open'],
                            "Percent_Difference": percent_difference
                        })
            except Exception as ticker_error:
                logger.error(f"Error processing {ticker}: {ticker_error}")
                continue

        # Process NASDAQ Data
        for _, row in nasdaq_data.iterrows():
            try:
                # Compute percent difference safely
                previous_close = nasdaq_data["Close"].shift(1).iloc[-1] if len(nasdaq_data) > 1 else None
                percent_difference = None

                if pd.notna(previous_close):
                    percent_difference = ((row["Close"] - previous_close) / previous_close)

                formatted_data.append({
                    "Date": row["Date"].strftime('%Y-%m-%d'),
                    "Ticker": NASDAQ_TICKER,
                    "Close": row["Close"],
                    "Volume": row["Volume"] if "Volume" in nasdaq_data.columns else None,
                    "High": row["High"] if "High" in nasdaq_data.columns else None,
                    "Low": row["Low"] if "Low" in nasdaq_data.columns else None,
                    "Open": row["Open"] if "Open" in nasdaq_data.columns else None,
                    "Percent_Difference": percent_difference
                })
            except Exception as nasdaq_error:
                logger.error(f"Error processing NASDAQ (^IXIC): {nasdaq_error}")
                continue

        # Convert to DataFrame
        reformatted_data = pd.DataFrame(formatted_data)

        # Save to CSV for debugging
        reformatted_data.to_csv("output.csv", index=False)
        logger.info("Saved output.csv for verification.")

    except Exception as general_error:
        logger.error(f"Unexpected error: {general_error}")

# Run the function locally
extract_stock_close()







INFO:__main__:Fetching stock data from 2025-01-31 17:02:50.095980 to 2025-02-03 17:02:50.095980
INFO:__main__:Fetching NASDAQ (^IXIC) data from 2024-12-01 00:00:00 to 2025-02-03 17:02:50.095980
[*********************100%***********************]  37 of 37 completed
INFO:__main__:Stock data downloaded: 2 rows
[*********************100%***********************]  1 of 1 completed
INFO:__main__:NASDAQ data downloaded: (41, 6) rows and columns
INFO:__main__:Saved output.csv for verification.



NASDAQ Column Names BEFORE FIX: MultiIndex([('Adj Close', '^IXIC'),
            (    'Close', '^IXIC'),
            (     'High', '^IXIC'),
            (      'Low', '^IXIC'),
            (     'Open', '^IXIC'),
            (   'Volume', '^IXIC')],
           names=['Price', 'Ticker'])

NASDAQ Column Names FINAL: Index(['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')


: 

In [43]:
# Yahoo Extract with date restriction 

import nltk
import os

# Explicitly set the nltk_data path
nltk_data_path = r"C:\Users\BryceDaniel\OneDrive - Lincoln Telephone Company\MSBA\GitHub\TrendSense\Market News\Market_News_Yahoo_Extract_Function\nltk_data"
nltk.data.path.append(nltk_data_path)

# Ensure 'punkt' is downloaded into the correct folder
nltk.download('punkt', download_dir=nltk_data_path)

import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from textblob import TextBlob


def calculate_sentiment(text):
    try:
        analysis = TextBlob(text)
        return analysis.sentiment.polarity
    except Exception as e:
        print(f"[ERROR] Sentiment analysis failed: {e}")
        return 0

def label_sentiment(score):
    if score > 0.35:
        return "Bullish"
    elif 0.15 < score <= 0.35:
        return "Somewhat-Bullish"
    elif -0.15 <= score <= 0.15:
        return "Neutral"
    elif -0.35 <= score < -0.15:
        return "Somewhat-Bearish"
    else:
        return "Bearish"

def get_market_news(tickers, days_back=2):
    all_news = []
    today = datetime.now().date()
    cutoff_date = today - timedelta(days=days_back)

    for ticker in tickers:
        stock = yf.Ticker(ticker)
        try:
            news = stock.news
            for item in news:
                try:
                    # Extract publish timestamp and date
                    publish_timestamp = item.get('providerPublishTime', 0)
                    publish_date = datetime.fromtimestamp(publish_timestamp).date()

                    # Only process news within the desired date range
                    if publish_date >= cutoff_date:
                        title = item.get('title', '')
                        sentiment_score = calculate_sentiment(title)
                        sentiment_label = label_sentiment(sentiment_score)

                        news_item = {
                            'ticker': ticker,
                            'title': title,
                            'summary': title,  # Replicate title in the summary column
                            'publisher': item.get('publisher', ''),
                            'link': item.get('link', ''),
                            'publish_date': datetime.fromtimestamp(publish_timestamp),
                            'type': item.get('type', ''),
                            'related_tickers': ', '.join(item.get('relatedTickers', [])),
                            'source': 'yahoo',
                            'overall_sentiment_score': sentiment_score,
                            'overall_sentiment_label': sentiment_label,
                        }
                        all_news.append(news_item)
                except Exception as news_item_error:
                    print(f"[ERROR] Error processing news item: {news_item_error}")
        except Exception as e:
            print(f"[ERROR] Error retrieving news for {ticker}: {str(e)}")
    return pd.DataFrame(all_news)

def save_to_csv(df, filename):
    try:
        df.to_csv(filename, index=False)
        print(f"[INFO] Data successfully saved to {filename}")
    except Exception as e:
        print(f"[ERROR] Failed to save data to CSV: {e}")

def fetch_and_save_market_news():
    indices = ['^IXIC', '^DJI', '^RUT', '^GSPC']
    market_news = get_market_news(tickers=indices)
    if not market_news.empty:
        market_news['category'] = 'General'

    tech_stocks = [
        'AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR',
        'BWXT', 'ARBK', 'AMD', 'NVDA', 'BTC', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG',
        'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZN', 'CRM', 'NOW', 'CHTR', 'TDS', 'META','RGTI','QUBT',
        'LX', 'OKLO', 'PSIX', 'QFIN', 'RTX', 'TWLO'
    ]
    tech_news = get_market_news(tickers=tech_stocks)
    if not tech_news.empty:
        tech_news['category'] = 'Tech'

    combined_news = pd.concat([market_news, tech_news], ignore_index=True)

    if not combined_news.empty:
        save_to_csv(combined_news, "market_news.csv")
    else:
        print("[INFO] No news data to save.")

if __name__ == "__main__":
    fetch_and_save_market_news()


[nltk_data] Downloading package punkt to C:\Users\BryceDaniel\OneDrive
[nltk_data]     - Lincoln Telephone
[nltk_data]     Company\MSBA\GitHub\TrendSense\Market
[nltk_data]     News\Market_News_Yahoo_Extract_Function\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


[INFO] Data successfully saved to market_news.csv


In [1]:
!pip install pandas_gbq





Collecting pandas_gbq
  Downloading pandas_gbq-0.27.0-py2.py3-none-any.whl.metadata (3.3 kB)
Collecting pydata-google-auth>=1.5.0 (from pandas_gbq)
  Downloading pydata_google_auth-1.9.1-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting google-auth-oauthlib>=0.7.0 (from pandas_gbq)
  Downloading google_auth_oauthlib-1.2.1-py2.py3-none-any.whl.metadata (2.7 kB)
Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib>=0.7.0->pandas_gbq)
  Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib>=0.7.0->pandas_gbq)
  Downloading oauthlib-3.2.2-py3-none-any.whl.metadata (7.5 kB)
Downloading pandas_gbq-0.27.0-py2.py3-none-any.whl (37 kB)
Downloading google_auth_oauthlib-1.2.1-py2.py3-none-any.whl (24 kB)
Downloading pydata_google_auth-1.9.1-py2.py3-none-any.whl (15 kB)
Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)
Downloading oauthlib-3.2.2-py3-none-any.whl (151 kB)
Ins


[notice] A new release of pip is available: 24.2 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [36]:
import requests
import yfinance as yf

class StockPriceTargetRetriever:
    def __init__(self, api_key=None):
        """
        Initialize the Stock Price Target Retriever
        
        :param api_key: API key for paid services (optional)
        """
        self.api_key = api_key
    
    def get_yahoo_finance_target(self, symbol):
        """
        Retrieve price targets and recommendations from Yahoo Finance
        
        :param symbol: Stock ticker symbol
        :return: Dictionary with recommendations and price targets
        """
        try:
            # Fetch the stock information
            stock = yf.Ticker(symbol)
            
            # Fetch analyst recommendations
            recommendations = stock.recommendations
            
            # Fetch analyst price targets
            info = stock.info
            
            # Extract price target information from stock info
            price_targets = {
                'current_price': info.get('currentPrice'),
                'target_high_price': info.get('targetHighPrice'),
                'target_low_price': info.get('targetLowPrice'),
                'target_mean_price': info.get('targetMeanPrice'),
                'target_median_price': info.get('targetMedianPrice')
            }
            
            return {
                'recommendations': recommendations,
                'price_targets': price_targets
            }
        except Exception as e:
            print(f"Error fetching Yahoo Finance data: {e}")
            return None
    
    def get_alpha_vantage_overview(self, symbol):
        """
        Retrieve stock overview from Alpha Vantage
        
        :param symbol: Stock ticker symbol
        :return: Dictionary of stock overview data
        """
        if not self.api_key:
            raise ValueError("Alpha Vantage requires an API key")
        
        url = f'https://www.alphavantage.co/query?function=OVERVIEW&symbol={symbol}&apikey={self.api_key}'
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            print(f"Error fetching data from Alpha Vantage: {e}")
            return None
    
    def get_financial_modeling_prep_target(self, symbol):
        """
        Retrieve price targets from Financial Modeling Prep
        
        :param symbol: Stock ticker symbol
        :return: List of price target data
        """
        if not self.api_key:
            raise ValueError("Financial Modeling Prep requires an API key")
        
        url = f'https://financialmodelingprep.com/api/v3/price-target?symbol={symbol}&apikey={self.api_key}'
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            print(f"Error fetching data from Financial Modeling Prep: {e}")
            return None

def main():
    # Initialize the retriever
    retriever = StockPriceTargetRetriever()
    
    # Retrieve price targets for Apple (AAPL)
    symbol = 'ASTS'
    
    # Yahoo Finance (completely free)
    yahoo_targets = retriever.get_yahoo_finance_target(symbol)
    
    # Print results with error handling
    if yahoo_targets:
        print("Yahoo Finance Targets:")
        print("Recommendations:")
        print(yahoo_targets.get('recommendations', 'No recommendations available'))
        print("\nPrice Targets:")
        price_targets = yahoo_targets.get('price_targets', {})
        for key, value in price_targets.items():
            print(f"{key.replace('_', ' ').title()}: {value}")
    else:
        print("Failed to retrieve stock information.")

if __name__ == '__main__':
    main()

# Important Notes:
# 1. This script requires yfinance library
# 2. Install dependencies: pip install yfinance requests
# 3. Be aware of potential rate limits or changes in Yahoo Finance's structure

# Troubleshooting:
# - Ensure you have the latest version of yfinance
# - Some stock symbols might not have complete information
# - Network connectivity can affect data retrieval
   

Yahoo Finance Targets:
Recommendations:
  period  strongBuy  buy  hold  sell  strongSell
0     0m          2    3     0     0           0
1    -1m          2    3     0     0           0
2    -2m          2    3     0     0           0
3    -3m          2    3     0     0           0

Price Targets:
Current Price: 25.645
Target High Price: 53.0
Target Low Price: 15.0
Target Mean Price: 35.94
Target Median Price: 36.0


Email sent successfully.


In [25]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from newspaper import Article
import nltk

# Ensure the required NLTK data is downloaded
nltk.download('punkt')

def fetch_article_summary(link):
    """
    Fetch and summarize the article content from a URL.
    """
    try:
        # Use Newspaper3k with headers
        article = Article(link)
        article.download()
        article.parse()
        article.nlp()
        return article.summary
    except Exception as e:
        print(f"[ERROR] Newspaper3k failed for {link}: {str(e)}. Falling back to BeautifulSoup.")

        # Fallback to BeautifulSoup
        try:
            response = requests.get(link, headers={'User-Agent': 'Mozilla/5.0'})
            response.raise_for_status()
            soup = BeautifulSoup(response.content, "html.parser")
            paragraphs = soup.find_all("p")
            content = " ".join([p.get_text() for p in paragraphs])
            return content[:500] + "..." if len(content) > 500 else content
        except Exception as bs_error:
            print(f"[ERROR] BeautifulSoup also failed for {link}: {str(bs_error)}")
            return "No summary available."




def get_market_news(tickers):
    """
    Fetch market news for the current day, capturing all available fields and generating summaries.
    Only processes news items with 'type' set to 'story'.
    """
    all_news = []
    today = datetime.now().date()
    one_day_ago = today - timedelta(days=1)

    for ticker in tickers:
        stock = yf.Ticker(ticker)

        try:
            news = stock.news
            for item in news:
                try:
                    publish_timestamp = item.get('providerPublishTime', 0)
                    publish_date = datetime.fromtimestamp(publish_timestamp).date()

                    # Filter news to include only today's and yesterday's articles
                    if publish_date >= one_day_ago:
                        # Only summarize articles with type 'story'
                        if item.get('type', '').lower() == 'story':
                            link = item.get('link', '')
                            summary = fetch_article_summary(link) if link else "No summary available."

                            news_item = {
                                'ticker': ticker,
                                'title': item.get('title', ''),
                                'publisher': item.get('publisher', ''),
                                'link': link,
                                'publish_date': datetime.fromtimestamp(publish_timestamp),
                                'summary': summary,  # Include the generated summary
                                'type': item.get('type', ''),  # Original type from Yahoo API
                                'related_tickers': ', '.join(item.get('relatedTickers', [])),  # Comma-separated related tickers
                            }
                            all_news.append(news_item)
                        else:
                            print(f"[INFO] Skipping non-story type: {item.get('type', '')}")
                except Exception as news_item_error:
                    print(f"[ERROR] Error processing news item: {news_item_error}")

        except Exception as e:
            print(f"[ERROR] Error retrieving news for {ticker}: {str(e)}")

    print(f"[INFO] Fetched {len(all_news)} news articles.")
    return pd.DataFrame(all_news)


def save_to_csv(df, output_dir="market_news"):
    """
    Save processed news data to a CSV file locally.
    """
    try:
        if df.empty:
            print("[INFO] No news data to save.")
            return None

        os.makedirs(output_dir, exist_ok=True)
        filename = f"market_news_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        filepath = os.path.join(output_dir, filename)
        df.to_csv(filepath, index=False, encoding="utf-8")
        print(f"[INFO] News data saved locally at: {filepath}")
        return filepath
    except Exception as e:
        print(f"[ERROR] Failed to save CSV: {str(e)}")
        return None


def main():
    """
    Main function for fetching and saving market news locally.
    """
    try:
        # Fetch general market news
        indices = ['^IXIC', '^DJI', '^RUT', '^GSPC']
        market_news = get_market_news(tickers=indices)
        if not market_news.empty:
            market_news['category'] = 'General'  # Add category for general market

        # Fetch tech stock news
        tech_stocks = ['AAPL', 'GOOGL', 'MSFT']
        tech_news = get_market_news(tickers=tech_stocks)
        if not tech_news.empty:
            tech_news['category'] = 'Tech'  # Add category for tech stocks

        # Combine news
        combined_news = pd.concat([market_news, tech_news], ignore_index=True)

        # Save to CSV locally
        if not combined_news.empty:
            save_to_csv(combined_news)
        else:
            print("[INFO] No news data to save.")
    except Exception as e:
        print(f"[ERROR] Error in main function: {e}")


if __name__ == "__main__":
    main()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\BryceDaniel\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[INFO] Skipping non-story type: VIDEO
[INFO] Skipping non-story type: VIDEO
[ERROR] Newspaper3k failed for https://finance.yahoo.com/news/p-500-closes-record-high-213627406.html: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'C:\\Users\\BryceDaniel/nltk_data'
    - 'c:\\Users\\BryceDaniel\\AppData\\Local\\Programs\\Python\\Python312\\nltk_data'
    - 'c:\\Users\\BryceDaniel\\AppData\\Local\\Programs\\Python\\Python312\\share\\nltk_data'
    - 'c:\\Users\\BryceDaniel\\AppData\\Local\\Programs\\Python\\Python312\\lib\\nltk_data'
    - 'C:\\Users\\BryceDaniel\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
    - 'c:\\U