In [None]:
import requests
import pandas as pd
import time
from datetime import datetime
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
from transformers import pipeline

# Replace with your NewsAPI key
api_key = 'afc3fe9ac08745439bf521cb5b974fbc'

# Initialize sentiment analysis tools
vader_analyzer = SentimentIntensityAnalyzer()
bert_sentiment = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

# List of tickers to search news for
tickers = [
    'AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR',
    'BWXT', 'ARBK', 'AMD', 'NVDA', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG',
    'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZN', 'CRM', 'NOW', 'CHTR', 'TDS', 'META'
]

# Get today's date in ISO format
today = datetime.utcnow().strftime('%Y-%m-%d')

# Functions for sentiment analysis
def vader_sentiment(text):
    if text:
        return vader_analyzer.polarity_scores(text)['compound']
    return 0

def textblob_sentiment(text):
    if text:
        return TextBlob(text).sentiment.polarity
    return 0

def bert_sentiment_analysis(text):
    if text:
        result = bert_sentiment(text)[0]
        return result['label'], result['score']  # Returns sentiment label and confidence
    return "NEUTRAL", 0.0

def bert_to_vader_scale(label, confidence):
    label_to_score = {
        "1 star": -1.0,
        "2 stars": -0.5,
        "3 stars": 0.0,
        "4 stars": 0.5,
        "5 stars": 1.0
    }
    return label_to_score.get(label, 0.0) * confidence

# Function to fetch market news for the current day
def get_market_news(ticker):
    url = (
        f'https://newsapi.org/v2/everything?q={ticker}&from={today}&to={today}&sortBy=publishedAt&apiKey={api_key}'
    )
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get('articles', [])
    elif response.status_code == 429:
        print(f"Rate limit exceeded for {ticker}, retrying after delay...")
        time.sleep(5)
        return []
    else:
        print(f"Error fetching data for {ticker}: {response.status_code}")
        return []

# Save data in the required schema
def save_to_csv(news_data, filename="news_data_today.csv"):
    df = pd.DataFrame(news_data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

# Fetch and process news for all tickers
all_news = []
for ticker in tickers:
    print(f"Fetching news for {ticker}...")
    articles = get_market_news(ticker)
    
    for article in articles:
        title = article.get('title', '')
        summary = article.get('description', '')
        
        # Sentiment analysis
        headline_vader_sentiment = vader_sentiment(title)
        summary_textblob_sentiment = textblob_sentiment(summary)
        summary_vader_sentiment = vader_sentiment(summary)
        summary_bert_sentiment, bert_confidence = bert_sentiment_analysis(summary)
        summary_bert_vader_scaled = bert_to_vader_scale(summary_bert_sentiment, bert_confidence)
        
        # Article schema
        news_entry = {
            'ticker': ticker,
            'title': title,
            'headline_vader_sentiment': headline_vader_sentiment,
            'summary': summary,
            'summary_textblob_sentiment': summary_textblob_sentiment,
            'summary_vader_sentiment': summary_vader_sentiment,
            'summary_bert_sentiment': summary_bert_sentiment,
            'bert_confidence': bert_confidence,
            'summary_bert_vader_scaled': summary_bert_vader_scaled,
            'publisher': article.get('source', {}).get('name', ''),
            'link': article.get('url', ''),
            'publish_date': article.get('publishedAt', ''),
            'type': 'general',  # Default value
            'related_tickers': '',  # Default empty
            'source': 'NewsAPI',  # Identify source
        }
        all_news.append(news_entry)
    
    # Avoid rate limiting
    time.sleep(1)

# Save the formatted data to a CSV file
if all_news:
    save_to_csv(all_news)
else:
    print("No news data available.")








In [None]:
import os
import logging
from typing import Optional, Tuple, Dict, Any
from datetime import datetime, timedelta

import pandas as pd
from google.cloud import bigquery
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline


class NewsDataProcessorError(Exception):
    """Custom exception for NewsDataProcessor errors."""
    pass


class NewsDataProcessor:
    def __init__(self, project_id: str, dataset_id: str, logger: Optional[logging.Logger] = None):
        self._validate_input_parameters(project_id, dataset_id)
        self.logger = logger or self._setup_logger()
        try:
            self.client = bigquery.Client(project=project_id)
        except Exception as e:
            self.logger.error(f"Failed to initialize BigQuery client: {e}")
            raise NewsDataProcessorError(f"BigQuery client initialization failed: {e}")
        self.project_id = project_id
        self.dataset_id = dataset_id
        self.vader_analyzer = SentimentIntensityAnalyzer()
        self._bert_pipeline = None

    def _validate_input_parameters(self, project_id: str, dataset_id: str):
        if not project_id or not isinstance(project_id, str):
            raise NewsDataProcessorError("Invalid project_id. Must be a non-empty string.")
        if not dataset_id or not isinstance(dataset_id, str):
            raise NewsDataProcessorError("Invalid dataset_id. Must be a non-empty string.")

    def _setup_logger(self) -> logging.Logger:
        logger = logging.getLogger(self.__class__.__name__)
        logger.setLevel(logging.DEBUG)  # Set to DEBUG for detailed logs
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.DEBUG)
        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)
        return logger

    @property
    def bert_pipeline(self):
        if self._bert_pipeline is None:
            self._bert_pipeline = pipeline("sentiment-analysis")
        return self._bert_pipeline

    def calculate_vader_sentiment(self, text: Optional[str]) -> float:
        if not text or not isinstance(text, str):
            return 0.0
        try:
            sentiment = self.vader_analyzer.polarity_scores(text)
            return sentiment.get("compound", 0.0)
        except Exception as e:
            self.logger.warning(f"VADER sentiment analysis failed: {e}")
            return 0.0

    def calculate_bert_sentiment(self, text: Optional[str]) -> Tuple[float, float]:
        if not text or not isinstance(text, str):
            return 0.0, 0.0
        try:
            result = self.bert_pipeline(text)[0]
            # Map BERT sentiment to a range similar to VADER (-1 to 1)
            if result["label"] == "POSITIVE":
                # Scale positive sentiment from 0-1 to 0-1
                sentiment_score = (result["score"] * 2) - 1
            else:
                # Scale negative sentiment from 0-1 to -1-0
                sentiment_score = -((result["score"] * 2) - 1)
            
            confidence = result["score"]
            return sentiment_score, confidence
        except Exception as e:
            self.logger.warning(f"BERT sentiment analysis failed: {e}")
            return 0.0, 0.0

    def ensure_table_exists(self, table_id: str):
        table_ref = f"{self.project_id}.{self.dataset_id}.{table_id}"
        try:
            self.client.get_table(table_ref)
            self.logger.info(f"Table {table_ref} already exists.")
        except Exception:
            self.logger.info(f"Table {table_ref} does not exist. Creating it...")
            schema = [
                    bigquery.SchemaField("ticker", "STRING"),
                    bigquery.SchemaField("title", "STRING"),
                    bigquery.SchemaField("summary", "STRING"),
                    bigquery.SchemaField("publisher", "STRING"),
                    bigquery.SchemaField("link", "STRING"),
                    bigquery.SchemaField("publish_date", "TIMESTAMP"),  # Use TIMESTAMP for ISO 8601 datetime
                    bigquery.SchemaField("type", "STRING"),
                    bigquery.SchemaField("related_tickers", "STRING"),
                    bigquery.SchemaField("source", "STRING"),
                    bigquery.SchemaField("lexical_diversity", "FLOAT"),
                    bigquery.SchemaField("reliability_score", "FLOAT"),
                    bigquery.SchemaField("textblob_sentiment", "FLOAT"),
                    bigquery.SchemaField("vader_sentiment", "FLOAT"),
                    bigquery.SchemaField("bert_sentiment", "FLOAT"),
                    bigquery.SchemaField("bert_confidence", "FLOAT"),
                    bigquery.SchemaField("word_count", "INTEGER"),
                    bigquery.SchemaField("headline_sentiment", "FLOAT"),
            ]
            table = bigquery.Table(table_ref, schema=schema)
            try:
                self.client.create_table(table)
                self.logger.info(f"Table {table_ref} created successfully.")
            except Exception as e:
                self.logger.error(f"Failed to create table {table_ref}: {e}")
                raise NewsDataProcessorError(f"Table creation failed: {e}")
    def filter_existing_data(self, new_data: pd.DataFrame, target_table: str) -> pd.DataFrame:
        """
        Filter out rows that already exist in the target table based on publish_date.

        Args:
            new_data (pd.DataFrame): Incoming new data to be checked for duplicates.
            target_table (str): Fully qualified BigQuery table reference (e.g., `trendsense.market_data.Market_News_History_2`).

        Returns:
            pd.DataFrame: Filtered dataframe with only new rows.
        """
        if new_data.empty:
            self.logger.info("No new data provided for filtering.")
            return new_data

        try:
            # Convert publish_date to ISO string for JSON serialization
            new_data['publish_date'] = pd.to_datetime(new_data['publish_date']).dt.strftime('%Y-%m-%dT%H:%M:%S')

            # Query to fetch existing publish dates
            existing_dates_query = f"""
            SELECT DISTINCT FORMAT_TIMESTAMP('%Y-%m-%dT%H:%M:%S', publish_date) AS publish_date
            FROM `{target_table}`
            WHERE FORMAT_TIMESTAMP('%Y-%m-%dT%H:%M:%S', publish_date) IN UNNEST(@publish_dates)
            """

            # Prepare query parameters
            job_config = bigquery.QueryJobConfig(
                query_parameters=[
                    bigquery.ArrayQueryParameter('publish_dates', 'STRING', new_data['publish_date'].tolist())
                ]
            )

            # Execute query
            query_job = self.client.query(existing_dates_query, job_config=job_config)
            existing_dates = [row['publish_date'] for row in query_job]

            # Filter out rows with existing publish dates
            filtered_data = new_data[~new_data['publish_date'].isin(existing_dates)]
            self.logger.info(f"Total new rows after filtering: {len(filtered_data)} (from {len(new_data)} original rows)")

            return filtered_data

        except Exception as e:
            self.logger.error(f"Error filtering existing data: {e}")
            return new_data

 

    def process_and_move_data(self, source_table_id: str, target_table_id: str, batch_size: int = 1000) -> Dict[str, Any]:
        source_table = f"{self.project_id}.{self.dataset_id}.{source_table_id}"
        target_table = f"{self.project_id}.{self.dataset_id}.{target_table_id}"

        try:
            # Query data from the source table excluding unwanted columns
            self.logger.info(f"Querying source table: {source_table}")
            query = f"""
            SELECT 
                ticker, 
                title, 
                summary, 
                publisher, 
                link, 
                publish_date, 
                type, 
                related_tickers, 
                source, 
                lexical_diversity, 
                reliability_score, 
                summary_sentiment
            FROM `{source_table}`
            LIMIT {batch_size}
            """
            new_data = self.client.query(query).to_dataframe()
            self.logger.info(f"Rows retrieved from source table: {len(new_data)}")

            if new_data.empty:
                self.logger.info("No new data to process.")
                return {"status": "success", "message": "No new data", "rows_processed": 0}

            # Rename summary_sentiment to textblob_sentiment
            self.logger.info("Renaming columns...")
            new_data.rename(columns={"summary_sentiment": "textblob_sentiment"}, inplace=True)

            # Ensure publish_date is in datetime format
            new_data['publish_date'] = pd.to_datetime(new_data['publish_date'])

            # Filter out existing rows
            self.logger.info("Filtering existing rows...")
            new_data = self.filter_existing_data(new_data, target_table_id)
            self.logger.info(f"Rows remaining after filtering: {len(new_data)}")

            if new_data.empty:
                self.logger.info("No new unique rows to process after filtering.")
                return {"status": "success", "message": "No new unique rows", "rows_processed": 0}

            # Word Count Calculation
            self.logger.info("Calculating word count for summaries...")
            new_data["word_count"] = new_data["summary"].fillna("").apply(lambda x: len(str(x).split()))

            # Headline Sentiment using VADER
            self.logger.info("Performing VADER sentiment analysis on headlines...")
            new_data["headline_sentiment"] = new_data["title"].apply(self.calculate_vader_sentiment)

            # Existing Sentiment Analyses
            self.logger.info("Performing VADER sentiment analysis on summaries...")
            new_data["vader_sentiment"] = new_data["summary"].apply(self.calculate_vader_sentiment)
            bert_results = new_data["summary"].apply(self.calculate_bert_sentiment).tolist()

            # Validate BERT results
            if len(bert_results) != len(new_data):
                self.logger.error(f"BERT results length mismatch: {len(bert_results)} results for {len(new_data)} rows.")
                raise ValueError("BERT results length mismatch with DataFrame rows.")

            # Unpack BERT results into separate columns
            bert_sentiments, bert_confidences = zip(*bert_results)
            new_data["bert_sentiment"] = bert_sentiments
            new_data["bert_confidence"] = bert_confidences

            # Load data into the target table
            self.logger.info("Loading data into BigQuery...")
            job_config = bigquery.LoadJobConfig(write_disposition=bigquery.WriteDisposition.WRITE_APPEND)
            job = self.client.load_table_from_dataframe(new_data, target_table, job_config=job_config)
            job.result()  # Wait for the job to complete

            success_msg = f"Data successfully moved to {target_table}. Rows added: {len(new_data)}"
            self.logger.info(success_msg)

            return {"status": "success", "message": success_msg, "rows_processed": len(new_data)}

        except Exception as e:
            error_msg = f"Error processing data: {e}"
            self.logger.error(error_msg)
            return {"status": "error", "message": error_msg, "rows_processed": 0}


def move_market_news_data(request):
    """
    Google Cloud Function entry point to process and move market news data.
    """
    # Load configuration from environment variables
    project_id = os.getenv('GCP_PROJECT_ID', 'trendsense')
    dataset_id = os.getenv('BQ_DATASET_ID', 'market_data')
    source_table_id = os.getenv('SOURCE_TABLE_ID', 'Market_News_History_New')
    target_table_id = os.getenv('TARGET_TABLE_ID', 'Market_News_History_2')

    # Set up logging
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    try:
        # Initialize the processor and ensure the target table exists
        processor = NewsDataProcessor(project_id, dataset_id)
        processor.ensure_table_exists(target_table_id)

        # Process and move data
        result = processor.process_and_move_data(source_table_id, target_table_id)

        # Return the result in a response
        return {
            'statusCode': 200 if result['status'] == 'success' else 500,
            'body': result
        }

    except Exception as e:
        logging.error(f"Failed to process market news data: {e}")
        return {
            'statusCode': 500,
            'body': {
                'status': 'error',
                'message': str(e)
            }
        }


In [None]:
import functions_framework
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
from textblob import TextBlob
from google.cloud import bigquery
import pytz


# Replace with your NewsAPI key
api_key = 'afc3fe9ac08745439bf521cb5b974fbc'

# BigQuery configuration
project_id = "trendsense"
dataset_id = "market_data"
table_id = "News_News_Extract"

# List of tickers to search news for
tickers = [
    'AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR',
    'BWXT', 'ARBK', 'AMD', 'NVDA', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG',
    'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZN', 'CRM', 'NOW', 'CHTR', 'TDS', 'META'
]

# Get yesterday's date in ISO format
yesterday = datetime.utcnow() - timedelta(days=1)
yesterday_str = yesterday.strftime('%Y-%m-%d')

# Function for TextBlob sentiment analysis
def textblob_sentiment(text):
    if text:
        return TextBlob(text).sentiment.polarity  # Sentiment polarity from -1 to 1
    return 0

# Function to fetch market news for a specific date
def get_market_news(ticker, date):
    url = (
        f'https://newsapi.org/v2/everything?q={ticker}&from={date}&to={date}&sortBy=publishedAt&apiKey={api_key}'
    )
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get('articles', [])
    elif response.status_code == 429:
        print(f"Rate limit exceeded for {ticker}, retrying after delay...")
        time.sleep(5)
        return []
    else:
        print(f"Error fetching data for {ticker}: {response.status_code}")
        return []

# Function to save data to BigQuery
def save_to_bigquery(data, project_id, dataset_id, table_id):
    from google.cloud import bigquery
    
    client = bigquery.Client()
    table_ref = f"{project_id}.{dataset_id}.{table_id}"
    
    # Define schema if table doesn't exist
    schema = [
        bigquery.SchemaField("ticker", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("title", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("summary", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("summary_textblob_sentiment", "FLOAT", mode="NULLABLE"),
        bigquery.SchemaField("publisher", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("link", "STRING", mode="NULLABLE"),
        bigquery.SchemaField("publish_date", "TIMESTAMP", mode="NULLABLE"),
        bigquery.SchemaField("source", "STRING", mode="NULLABLE"),
    ]
    
    # Check if the table exists
    try:
        client.get_table(table_ref)
    except Exception:
        print(f"Table {table_ref} does not exist. Creating it...")
        table = bigquery.Table(table_ref, schema=schema)
        client.create_table(table)
        print(f"Table {table_ref} created.")
    
    # Convert publish_date to datetime and then to MST
    data['publish_date'] = pd.to_datetime(data['publish_date'], errors='coerce')
    utc = pytz.utc
    mst = pytz.timezone('US/Mountain')
    data['publish_date'] = data['publish_date'].apply(
        lambda x: x.astimezone(mst) if pd.notnull(x) else None
    )
    
    # Ensure numeric values for sentiment
    data['summary_textblob_sentiment'] = pd.to_numeric(data['summary_textblob_sentiment'], errors='coerce')

    # Log data types to verify
    print("DataFrame dtypes before uploading:")
    print(data.dtypes)

    # Load data into BigQuery
    job = client.load_table_from_dataframe(data, table_ref)
    job.result()  # Wait for the load job to complete
    print(f"Data successfully saved to BigQuery table: {table_ref}")
    
# Cloud Function Entry Point
@functions_framework.http
def main(request):
    all_news = []
    for ticker in tickers:
        print(f"Fetching news for {ticker} from {yesterday_str}...")
        articles = get_market_news(ticker, yesterday_str)

        for article in articles:
            title = article.get('title', '')
            summary = article.get('description', '')

            # Sentiment analysis using TextBlob
            summary_textblob_sentiment = textblob_sentiment(summary)

            # Article schema
            news_entry = {
                'ticker': ticker,
                'title': title,
                'summary': summary,
                'summary_textblob_sentiment': summary_textblob_sentiment,
                'publisher': article.get('source', {}).get('name', ''),
                'link': article.get('url', ''),
                'publish_date': article.get('publishedAt', ''),
                'source': 'NewsAPI',  # Identify source
            }
            all_news.append(news_entry)

        # Avoid rate limiting
        time.sleep(1)

    # Convert data to a DataFrame
    df = pd.DataFrame(all_news)

    if not df.empty:
        # Save to BigQuery
        save_to_bigquery(df, project_id, dataset_id, table_id)
        return {
            "status": "success",
            "message": f"Data saved to BigQuery table: {project_id}.{dataset_id}.{table_id}",
            "total_articles": len(all_news),
        }
    else:
        return {
            "status": "success",
            "message": "No news articles found for yesterday.",
            "total_articles": 0,
        }



Fetching rating for AAPL...
Fetching rating for GOOGL...
Fetching rating for MSFT...
Fetching rating for ASTS...
Fetching rating for PTON...
Fetching rating for GSAT...
Fetching rating for PLTR...
Fetching rating for SMR...
Fetching rating for ACHR...
Fetching rating for BWXT...
Fetching rating for ARBK...
Fetching rating for AMD...
Fetching rating for NVDA...
Fetching rating for GME...
Fetching rating for MU...
Fetching rating for TSLA...
Fetching rating for NFLX...
Fetching rating for ZG...
Fetching rating for AVGO...
Fetching rating for SMCI...
Fetching rating for GLW...
Fetching rating for HAL...
Fetching rating for LMT...
Fetching rating for AMZN...
Fetching rating for CRM...
Fetching rating for NOW...
Fetching rating for CHTR...
Fetching rating for TDS...
Fetching rating for META...


PermissionError: [Errno 13] Permission denied: 'stock_ratings.csv'

In [30]:
# Install necessary libraries (uncomment if needed)
# !pip install textblob vaderSentiment transformers flair

# Import libraries
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline
from flair.models import TextClassifier
from flair.data import Sentence

# Input text
text = (
    "The company is making significant strides in the quantum computing field "
    "with its latest AI-powered calibration breakthrough. In collaboration with Quantum Machines, "
    "Rigetti successfully applied artificial intelligence to automate the calibration of a 9-qubit "
    "Novera Quantum Processing Unit (QPU)."
)

# TextBlob Sentiment Analysis
def analyze_textblob(text):
    blob = TextBlob(text)
    return blob.sentiment.polarity

# VADER Sentiment Analysis
def analyze_vader(text):
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(text)
    return scores["compound"]

# BERT Sentiment Analysis
def analyze_bert(text):
    sentiment_pipeline = pipeline("sentiment-analysis")
    result = sentiment_pipeline(text)[0]
    return result

# Flair Sentiment Analysis
def analyze_flair(text):
    classifier = TextClassifier.load("sentiment")
    sentence = Sentence(text)
    classifier.predict(sentence)
    sentiment_score = sentence.labels[0]
    return sentiment_score

# Run all analyses
results = {
    "TextBlob": analyze_textblob(text),
    "VADER": analyze_vader(text),
    "BERT": analyze_bert(text),
    "Flair": analyze_flair(text),
}

# Print Results
print("Sentiment Analysis Results:")
for method, result in results.items():
    print(f"{method}: {result}")


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Device set to use cpu


2024-12-11 10:21:53,724 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to C:\Users\Bryce\AppData\Local\Temp\tmpzt4pdo0y


100%|██████████| 253M/253M [02:38<00:00, 1.67MB/s]   

2024-12-11 10:24:33,105 copying C:\Users\Bryce\AppData\Local\Temp\tmpzt4pdo0y to cache at C:\Users\Bryce\.flair\models\sentiment-en-mix-distillbert_4.pt





2024-12-11 10:24:33,666 removing temp file C:\Users\Bryce\AppData\Local\Temp\tmpzt4pdo0y
Sentiment Analysis Results:
TextBlob: 0.25625
VADER: 0.7964
BERT: {'label': 'POSITIVE', 'score': 0.9996393918991089}
Flair: Sentence[44]: "The company is making significant strides in the quantum computing field with its latest AI-powered calibration breakthrough. In collaboration with Quantum Machines, Rigetti successfully applied artificial intelligence to automate the calibration of a 9-qubit Novera Quantum Processing Unit (QPU)." → POSITIVE (0.9998)


In [29]:
!pip install flair


Collecting flair
  Downloading flair-0.14.0-py3-none-any.whl.metadata (12 kB)
Collecting boto3>=1.20.27 (from flair)
  Downloading boto3-1.35.78-py3-none-any.whl.metadata (6.7 kB)
Collecting conllu<5.0.0,>=4.0 (from flair)
  Downloading conllu-4.5.3-py2.py3-none-any.whl.metadata (19 kB)
Collecting deprecated>=1.2.13 (from flair)
  Downloading Deprecated-1.2.15-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting ftfy>=6.1.0 (from flair)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting gdown>=4.4.0 (from flair)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting langdetect>=1.0.9 (from flair)
  Downloading langdetect-1.0.9.tar.gz (981 kB)
     ---------------------------------------- 0.0/981.5 kB ? eta -:--:--
     ------------------------------ ------- 786.4/981.5 kB 8.5 MB/s eta 0:00:01
     -------------------------------------- 981.5/981.5 kB 2.1 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished


[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [26]:
import os
import requests
from datetime import datetime
from google.cloud import bigquery
import json

# Configuration - use environment variables or defaults for local testing
API_KEY = os.environ.get('FINANCIAL_MODELING_PREP_API_KEY', 'KhbgwU29WSYBQlGkdkYjAomzvDQRVE0')
PROJECT_ID = os.environ.get('GOOGLE_CLOUD_PROJECT', 'trendsense')
DATASET_ID = os.environ.get('BIGQUERY_DATASET_ID', 'stock_data')
TABLE_ID = os.environ.get('BIGQUERY_TABLE_ID', 'stock_data_ratings')

# List of stock symbols
SYMBOLS = [
    'AAPL', 'GOOGL', 'MSFT', 'ASTS', 'PTON', 'GSAT', 'PLTR', 'SMR', 'ACHR',
    'BWXT', 'ARBK', 'AMD', 'NVDA', 'GME', 'MU', 'TSLA', 'NFLX', 'ZG',
    'AVGO', 'SMCI', 'GLW', 'HAL', 'LMT', 'AMZN', 'CRM', 'NOW', 'CHTR', 'TDS', 'META'
]

def get_company_rating(symbol, api_key):
    """
    Fetch company rating for a given stock symbol from Financial Modeling Prep API
    
    Args:
        symbol (str): Stock symbol to fetch rating for
        api_key (str): API key for Financial Modeling Prep
    
    Returns:
        dict or None: Parsed rating data or None if fetch fails
    """
    if not api_key or api_key == 'KhbgwU29WSYBQlGkdkYjAomzvDQRVE0':
        print(f"Invalid API key. Please provide a valid Financial Modeling Prep API key.")
        return None

    url = f"https://financialmodelingprep.com/api/v3/rating/{symbol}?apikey={api_key}"
    
    try:
        response = requests.get(url, timeout=10)
        
        # Log detailed error information for debugging
        if response.status_code != 200:
            print(f"Rating fetch failed for {symbol}. Status: {response.status_code}")
            print(f"Response content: {response.text}")
            return None
        
        data = response.json()
        
        if not data:
            print(f"No rating data available for {symbol}")
            return None
        
        # Extract the first result
        rating_data = data[0]
        return {
            'symbol': symbol,
            'fetch_timestamp': datetime.utcnow().isoformat(),
            'date': rating_data.get('date', 'No date available'),
            'overall_rating': rating_data.get('rating'),
            'recommendation': rating_data.get('ratingRecommendation'),
            'rating_score': rating_data.get('ratingScore', 0.0),
            'dcf_score': rating_data.get('ratingDetailsDCFScore', 0.0),
            'dcf_recommendation': rating_data.get('ratingDetailsDCFRecommendation'),
            'roe_score': rating_data.get('ratingDetailsROEScore', 0.0),
            'roe_recommendation': rating_data.get('ratingDetailsROERecommendation'),
            'roa_score': rating_data.get('ratingDetailsROAScore', 0.0),
            'roa_recommendation': rating_data.get('ratingDetailsROARecommendation'),
            'pe_score': rating_data.get('ratingDetailsPEScore', 0.0),
            'pe_recommendation': rating_data.get('ratingDetailsPERecommendation'),
            'pb_score': rating_data.get('ratingDetailsPBScore', 0.0),
            'pb_recommendation': rating_data.get('ratingDetailsPBRecommendation')
        }
    
    except requests.RequestException as e:
        print(f"Network error for {symbol}: {e}")
        return None
    except ValueError as e:
        print(f"JSON parsing error for {symbol}: {e}")
        return None
    except Exception as e:
        print(f"Unexpected error for {symbol}: {e}")
        return None

def fetch_and_save_stock_ratings(api_key, project_id, dataset_id, table_id):
    """
    Fetch stock ratings and optionally save to BigQuery or local file
    
    Args:
        api_key (str): Financial Modeling Prep API key
        project_id (str): Google Cloud Project ID
        dataset_id (str): BigQuery dataset ID
        table_id (str): BigQuery table ID
    
    Returns:
        list: List of fetched stock ratings
    """
    # Validate configuration
    if not api_key or api_key == 'KhbgwU29WSYBQlGkdkYjAomzvDQRVE0':
        print("ERROR: Financial Modeling Prep API key is not configured.")
        return []

    # Collect ratings
    ratings_to_save = []
    error_symbols = []

    # Fetch ratings for each symbol
    for symbol in SYMBOLS:
        try:
            rating = get_company_rating(symbol, api_key)
            
            if rating:
                ratings_to_save.append(rating)
            else:
                error_symbols.append(symbol)
        
        except Exception as e:
            print(f"Error processing {symbol}: {e}")
            error_symbols.append(symbol)

    # Save results locally (JSON)
    if ratings_to_save:
        # Save to local JSON file
        output_file = 'stock_ratings_output.json'
        with open(output_file, 'w') as f:
            json.dump(ratings_to_save, f, indent=2)
        print(f"Saved {len(ratings_to_save)} stock ratings to {output_file}")

        # Optionally save to BigQuery if credentials are set up
        try:
            # Only attempt BigQuery if running in a Google Cloud environment
            client = bigquery.Client(project=project_id)
            
            # Prepare the table reference
            dataset_ref = client.dataset(dataset_id)
            table_ref = dataset_ref.table(table_id)
            
            # Insert rows into BigQuery
            errors = client.insert_rows_json(table_ref, ratings_to_save)
            
            if errors:
                print(f"Partial failure inserting rows. Symbols with errors: {error_symbols}")
            else:
                print(f"Successfully inserted {len(ratings_to_save)} stocks to BigQuery")
        
        except Exception as e:
            print(f"BigQuery insertion error: {e}")
            print("Continuing with local file save...")

    else:
        print('No ratings could be retrieved')

    return ratings_to_save

def main():
    """
    Main function to run the stock ratings fetch
    """
    # You can replace 'YOUR_API_KEY_HERE' with your actual API key
    ratings = fetch_and_save_stock_ratings(
        api_key=API_KEY, 
        project_id=PROJECT_ID, 
        dataset_id=DATASET_ID, 
        table_id=TABLE_ID
    )
    
    # Print out basic info about retrieved ratings
    print("\nRetrieved Ratings Summary:")
    for rating in ratings:
        print(f"{rating['symbol']}: {rating.get('recommendation', 'No recommendation')}")

if __name__ == '__main__':
    main()

ERROR: Financial Modeling Prep API key is not configured.

Retrieved Ratings Summary:
