In [None]:
### Yahoo Daily News

import yfinance as yf
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datetime import datetime, timedelta
from google.cloud import bigquery
import json

def analyze_sentiment_vader(text):
    """
    Analyze sentiment of a given text using VADER and return both category and score.
    """
    try:
        analyzer = SentimentIntensityAnalyzer()
        score = analyzer.polarity_scores(text)['compound']
        if score > 0.05:
            category = "Positive"
        elif score < -0.05:
            category = "Negative"
        else:
            category = "Neutral"

        return {"category": category, "score": score}
    except Exception as e:
        print(f"Sentiment Analysis Error: {e}")
        return {"category": "Unknown", "score": 0}

def get_market_news_with_sentiment(tickers):
    """
    Fetch market news for the current day and analyze sentiment, including sentiment score.
    """
    all_news = []
    today = datetime.now().date()
    one_day_ago = today - timedelta(days=1)

    for ticker in tickers:
        stock = yf.Ticker(ticker)

        try:
            news = stock.news
            for item in news:
                try:
                    publish_timestamp = item.get('providerPublishTime', 0)
                    publish_date = datetime.fromtimestamp(publish_timestamp).date()

                    # More flexible date filtering
                    if publish_date >= one_day_ago:
                        sentiment_result = analyze_sentiment_vader(item.get('title', ''))
                        news_item = {
                            'ticker': ticker,
                            'title': item.get('title', ''),
                            'publisher': item.get('publisher', ''),
                            'link': item.get('link', ''),
                            'publish_date': datetime.fromtimestamp(publish_timestamp),
                            'sentiment_category': sentiment_result['category'],
                            'sentiment_score': sentiment_result['score'],
                            'type': item.get('type', ''),
                            'related_tickers': ', '.join(item.get('relatedTickers', []))
                        }
                        all_news.append(news_item)
                except Exception as news_item_error:
                    print(f"Error processing news item: {news_item_error}")

        except Exception as e:
            print(f"Error retrieving news for {ticker}: {str(e)}")

    return pd.DataFrame(all_news)

def save_to_bigquery(df, project_id, dataset_id, table_id):
    """
    Save DataFrame to BigQuery using schema auto-detection.
    """
    client = bigquery.Client(project=project_id)
    table_ref = f"{project_id}.{dataset_id}.{table_id}"
    
    try:
        # Ensure DataFrame is not empty
        if df.empty:
            print("DataFrame is empty. No data to save.")
            return False

        # Convert datetime to timestamp
        df['publish_date'] = pd.to_datetime(df['publish_date'])

        # Configure the job to auto-detect schema and append data
        job_config = bigquery.LoadJobConfig(
            write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
            autodetect=True  # Enable schema auto-detection
        )

        # Load DataFrame directly to BigQuery
        job = client.load_table_from_dataframe(
            df, 
            table_ref, 
            job_config=job_config
        )

        # Wait for the job to complete
        job.result()

        print(f"Successfully saved {len(df)} rows to {table_ref}")
        return True
    
    except Exception as e:
        print(f"BigQuery Save Error: {str(e)}")
        print(f"DataFrame Columns: {df.columns}")
        print(f"DataFrame Sample:\n{df.head()}")
        return False

def main(request=None):
    """
    Enhanced main function for fetching and saving market news.
    """
    # Google Cloud configuration
    project_id = "trendsense"
    dataset_id = "market_data"
    table_id = "market_news_yahoo"

    try:
        # Fetch general market news for today
        indices = ['^IXIC', '^DJI', '^RUT', '^GSPC']
        market_news = get_market_news_with_sentiment(tickers=indices)
        if not market_news.empty:
            market_news['category'] = 'General'  # Add category for general market

        # Fetch tech stock news for today
        tech_stocks = ['AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR', 'BWXT', 'ARBK', 'AMD', 'NVDA', 'BTC', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG', 'AVGO', 'SMCI','GLW', 'HAL', 'LMT', 'AMZ', 'CRM', 'NOW', 'CHTR', 'TDS', 'META']
        tech_news = get_market_news_with_sentiment(tickers=tech_stocks)
        if not tech_news.empty:
            tech_news['category'] = 'Tech'  # Add category for tech stocks

        # Combine news
        combined_news = pd.concat([market_news, tech_news], ignore_index=True)

        # Save to BigQuery
        if not combined_news.empty:
            save_result = save_to_bigquery(combined_news, project_id, dataset_id, table_id)
            return "Data successfully saved to BigQuery.", 200
        else:
            return "No news to save.", 204
    except Exception as e:
        print(f"Error in main function: {e}")
        return f"Internal Server Error: {e}", 500

# Optional: For local testing
if __name__ == "__main__":
    main()

       

In [7]:
## Yahoo History push to Big Query

import yfinance as yf
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from datetime import datetime, timedelta
from google.cloud import bigquery


def analyze_sentiment_vader(text):
    """
    Analyze sentiment of a given text using VADER and return both category and score.
    """
    analyzer = SentimentIntensityAnalyzer()
    score = analyzer.polarity_scores(text)['compound']
    if score > 0.05:
        category = "Positive"
    elif score < -0.05:
        category = "Negative"
    else:
        category = "Neutral"

    return {"category": category, "score": score}


def get_market_news_with_sentiment(tickers, start_date):
    """
    Fetch market news starting from a specific date and analyze sentiment.
    """
    all_news = []

    for ticker in tickers:
        stock = yf.Ticker(ticker)

        try:
            news = stock.news
            for item in news:
                publish_date = datetime.fromtimestamp(item.get('providerPublishTime', 0))

                # Filter news to include only articles published after the start_date
                if publish_date.date() >= start_date:
                    sentiment_result = analyze_sentiment_vader(item.get('title'))
                    news_item = {
                        'ticker': ticker,
                        'title': item.get('title'),
                        'publisher': item.get('publisher'),
                        'link': item.get('link'),
                        'publish_date': publish_date,
                        'sentiment_category': sentiment_result['category'],
                        'sentiment_score': sentiment_result['score'],
                        'type': item.get('type'),
                        'related_tickers': ', '.join(item.get('relatedTickers', []))
                    }
                    all_news.append(news_item)
        except Exception as e:
            print(f"Error retrieving news for {ticker}: {str(e)}")

    return pd.DataFrame(all_news)


def save_to_bigquery(df, project_id, dataset_id, table_id):
    """
    Save DataFrame to BigQuery, automatically inferring schema and creating the table if necessary.
    """
    client = bigquery.Client(project=project_id)
    table_ref = f"{project_id}.{dataset_id}.{table_id}"
    
    try:
        # Check if the table exists
        try:
            client.get_table(table_ref)
        except Exception:
            print(f"Table {table_ref} does not exist. It will be created automatically.")

        # Write DataFrame to BigQuery
        job_config = bigquery.LoadJobConfig(write_disposition="WRITE_APPEND")
        job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
        job.result()  # Wait for the job to complete
        print(f"Data successfully written to {table_ref}.")
    except Exception as e:
        print(f"Error saving to BigQuery: {str(e)}")


def main():
    """
    Entry point for the script.
    """
    # Google Cloud configuration
    project_id = "trendsense"
    dataset_id = "market_data"
    table_id_hist = "market_news_yahoo_hist"  # Historical table

    try:
        # Define the start date for fetching 3 months of news
        start_date = (datetime.now() - timedelta(days=90)).date()

        # Fetch general market news for the last 3 months
        indices = ['^IXIC', '^DJI', '^RUT', '^GSPC']
        market_news = get_market_news_with_sentiment(tickers=indices, start_date=start_date)
        print(f"General market news fetched: {len(market_news)} rows")
        if not market_news.empty:
            market_news['category'] = 'General'  # Add category for general market

        # Fetch tech stock news for the last 3 months
        tech_stocks = ['AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR', 'BWXT', 'ARBK', 'AMD', 'NVDA', 'BTC', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG', 'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZ', 'CRM', 'NOW', 'CHTR', 'TDS', 'META']
        tech_news = get_market_news_with_sentiment(tickers=tech_stocks, start_date=start_date)
        print(f"Tech stock news fetched: {len(tech_news)} rows")
        if not tech_news.empty:
            tech_news['category'] = 'Tech'  # Add category for tech stocks

        # Combine news
        combined_news = pd.concat([market_news, tech_news], ignore_index=True)
        print(f"Total rows to save: {len(combined_news)}")

        # Save to BigQuery historical table
        if not combined_news.empty:
            save_to_bigquery(combined_news, project_id, dataset_id, table_id_hist)
        else:
            print("No news to save.")

    except Exception as e:
        print(f"Error in main function: {e}")


if __name__ == "__main__":
    main()




General market news fetched: 32 rows
Tech stock news fetched: 230 rows
Total rows to save: 262
Data successfully written to trendsense.market_data.market_news_yahoo_hist.


In [1]:
import nltk
print(nltk.data.path)


['C:\\Users\\BryceDaniel/nltk_data', 'c:\\Users\\BryceDaniel\\AppData\\Local\\Programs\\Python\\Python312\\nltk_data', 'c:\\Users\\BryceDaniel\\AppData\\Local\\Programs\\Python\\Python312\\share\\nltk_data', 'c:\\Users\\BryceDaniel\\AppData\\Local\\Programs\\Python\\Python312\\lib\\nltk_data', 'C:\\Users\\BryceDaniel\\AppData\\Roaming\\nltk_data', 'C:\\nltk_data', 'D:\\nltk_data', 'E:\\nltk_data']


In [None]:
from google.cloud import bigquery

def transfer_market_news_to_hist(request):
    """
    Google Cloud Function to transfer data from market_news_yahoo to market_news_yahoo_hist.

    Parameters:
        request (flask.Request): The HTTP request object (not used in this implementation).
    """
    project_id = "trendsense"
    dataset_id = "market_data"
    source_table_id = "market_news_yahoo"
    destination_table_id = "market_news_yahoo_hist"

    client = bigquery.Client(project=project_id)

    source_table = f"{project_id}.{dataset_id}.{source_table_id}"
    destination_table = f"{project_id}.{dataset_id}.{destination_table_id}"

    try:
        # Step 1: Query data from source table
        print(f"Querying data from {source_table}...")
        query = f"SELECT * FROM `{source_table}`"
        source_data = client.query(query).to_dataframe()

        if source_data.empty:
            print("No data found in the source table.")
            return "No data found.", 204

        print(f"Fetched {len(source_data)} rows from {source_table}.")

        # Step 2: Insert data into destination table
        print(f"Inserting data into {destination_table}...")
        job = client.load_table_from_dataframe(
            source_data,
            destination_table,
            job_config=bigquery.LoadJobConfig(write_disposition="WRITE_APPEND"),
        )
        job.result()  # Wait for the job to complete
        print(f"Successfully inserted {len(source_data)} rows into {destination_table}.")

        # Step 3: Clear transferred data from source table
        print("Deleting transferred data from the source table...")
        delete_query = f"DELETE FROM `{source_table}` WHERE TRUE"
        client.query(delete_query).result()
        print(f"Cleared data from {source_table}.")

        return "Data successfully transferred.", 200

    except Exception as e:
        print(f"Error transferring data: {str(e)}")
        return f"Error transferring data: {str(e)}", 500
