## GETTING DATA

In [2]:
import yfinance as yf
import mysql.connector
import pandas as pd
import numpy as np

def safe_float(value):
    """Convert a value to float if not NaN, otherwise return None."""
    if pd.isnull(value):
        return None
    return float(value)

def main():
    # Database connection parameters
    server = '192.168.0.17'
    username = 'carterdossias'
    password = 'dossias1'
    database = 'Stocks_DB'
    
    # Establish connection to MySQL
    conn = mysql.connector.connect(
        host=server,
        user=username,
        password=password,
        database=database
    )
    cursor = conn.cursor()
    
    # Define the ticker symbol
    ticker_symbol = 'NVDA'
    
    # Fetch historical stock data using yfinance
    tsla = yf.Ticker(ticker_symbol)
    hist = tsla.history(period="max")
    
    # Localize the index to UTC to bypass DST-related issues
    if hist.index.tzinfo is None:
        hist.index = hist.index.tz_localize('UTC', nonexistent='shift_forward', ambiguous='NaT')
    else:
        hist.index = hist.index.tz_convert('UTC')
    
    # Create table name based on ticker symbol (e.g., tsla_data)
    table_name = f"{ticker_symbol}_data"
    
    # SQL to create table if it doesn't exist
    create_table_query = f"""
    CREATE TABLE IF NOT EXISTS {table_name} (
        date DATE PRIMARY KEY,
        open FLOAT,
        high FLOAT,
        low FLOAT,
        close FLOAT,
        volume BIGINT,
        dividends FLOAT,
        splits FLOAT
    )
    """
    cursor.execute(create_table_query)
    conn.commit()
    
    # SQL to insert data into the table with upsert functionality
    insert_query = f"""
    INSERT INTO {table_name} (date, open, high, low, close, volume, dividends, splits)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
        open = VALUES(open),
        high = VALUES(high),
        low = VALUES(low),
        close = VALUES(close),
        volume = VALUES(volume),
        dividends = VALUES(dividends),
        splits = VALUES(splits)
    """
    
    # Iterate over DataFrame rows and insert into the table
    for index, row in hist.iterrows():
        data_tuple = (
            index.date(), 
            safe_float(row.get('Open')), 
            safe_float(row.get('High')), 
            safe_float(row.get('Low')), 
            safe_float(row.get('Close')), 
            int(row.get('Volume')) if not pd.isnull(row.get('Volume')) else None, 
            safe_float(row.get('Dividends')), 
            safe_float(row.get('Stock Splits'))
        )
        cursor.execute(insert_query, data_tuple)
    
    # Commit changes and close the connection
    conn.commit()
    cursor.close()
    conn.close()
    print("Data imported successfully into the MySQL database.")

if __name__ == "__main__":
    main()

Data imported successfully into the MySQL database.


## GETTING NEWS WITH FINNHUB API

In [3]:
import requests
import datetime
import time
import pandas as pd

# Finnhub API key and endpoint details
API_KEY = "cv6gs71r01qi7f6qb7kgcv6gs71r01qi7f6qb7l0"
BASE_URL = "https://finnhub.io/api/v1/company-news"
symbol = "NVDA"

# Define the start date (adjust as needed) and end date (today)
start_date = datetime.date(2020, 1, 1)  # Example start date
end_date = datetime.date.today()

all_news = []
current_start = start_date

# Maximum number of retries for a 429 error
MAX_RETRIES = 5

while current_start <= end_date:
    # Define a weekly window: from current_start to current_start + 6 days
    current_end = current_start + datetime.timedelta(days=6)
    if current_end > end_date:
        current_end = end_date

    # Format the dates as YYYY-MM-DD strings
    params = {
        "symbol": symbol,
        "from": current_start.strftime("%Y-%m-%d"),
        "to": current_end.strftime("%Y-%m-%d"),
        "token": API_KEY
    }
    
    print(f"Fetching news for {params['from']} to {params['to']}")
    
    retries = 0
    success = False
    while not success and retries < MAX_RETRIES:
        response = requests.get(BASE_URL, params=params)
        if response.status_code == 200:
            news_items = response.json()
            if news_items:
                all_news.extend(news_items)
            success = True
        elif response.status_code == 429:
            # Rate limit error: wait longer and then retry
            retries += 1
            wait_time = 2 ** retries  # exponential backoff
            print(f"Rate limit reached. Retrying in {wait_time} seconds (attempt {retries}/{MAX_RETRIES})")
            time.sleep(wait_time)
        else:
            print(f"Error: {response.status_code} for range {params['from']} to {params['to']}")
            success = True  # exit retry loop on non-429 errors

    # Move to the next week
    current_start = current_end + datetime.timedelta(days=1)
    # Sleep briefly to help avoid rapid-fire requests
    time.sleep(1)

# Convert the collected news data to a DataFrame
df = pd.DataFrame(all_news)

# Helper function to safely convert Unix timestamp to "YYYY-MM-DD"
def safe_convert(ts):
    try:
        ts_val = int(ts)
        if ts_val <= 0:
            return ""
        return datetime.datetime.fromtimestamp(ts_val).strftime("%Y-%m-%d")
    except Exception as e:
        return ""

# If the 'datetime' column exists, convert the Unix timestamp to "YYYY-MM-DD"
if not df.empty and 'datetime' in df.columns:
    df['datetime'] = df['datetime'].apply(safe_convert)

csv_filename = "NVDA_all_news_weekly.csv"
df.to_csv(csv_filename, index=False)
print(f"All news data saved to {csv_filename}")

Fetching news for 2020-01-01 to 2020-01-07
Fetching news for 2020-01-08 to 2020-01-14
Fetching news for 2020-01-15 to 2020-01-21
Fetching news for 2020-01-22 to 2020-01-28
Fetching news for 2020-01-29 to 2020-02-04
Fetching news for 2020-02-05 to 2020-02-11
Fetching news for 2020-02-12 to 2020-02-18
Fetching news for 2020-02-19 to 2020-02-25
Fetching news for 2020-02-26 to 2020-03-03
Fetching news for 2020-03-04 to 2020-03-10
Fetching news for 2020-03-11 to 2020-03-17
Fetching news for 2020-03-18 to 2020-03-24
Fetching news for 2020-03-25 to 2020-03-31
Fetching news for 2020-04-01 to 2020-04-07
Fetching news for 2020-04-08 to 2020-04-14
Fetching news for 2020-04-15 to 2020-04-21
Fetching news for 2020-04-22 to 2020-04-28
Fetching news for 2020-04-29 to 2020-05-05
Fetching news for 2020-05-06 to 2020-05-12
Fetching news for 2020-05-13 to 2020-05-19
Fetching news for 2020-05-20 to 2020-05-26
Fetching news for 2020-05-27 to 2020-06-02
Fetching news for 2020-06-03 to 2020-06-09
Fetching ne