# Data Collection for Stock

In [5]:
import yfinance as yf

# Check data for ORCL
orcl_data = yf.download("ORCL", start="2024-01-23", end="2024-03-11")
print(orcl_data)
# Check data for IBM
ibm_data = yf.download("IBM", start="2023-03-08", end="2024-03-08")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Price            Close        High         Low        Open    Volume
Ticker            ORCL        ORCL        ORCL        ORCL      ORCL
Date                                                                
2024-01-23  110.635109  111.476025  108.389359  109.111563   9185900
2024-01-24  113.088615  114.186756  112.119091  112.208126  11702700
2024-01-25  113.771240  114.938632  113.128184  113.662415   8877200
2024-01-26  113.415092  114.246121  113.049043  113.415092   5541800
2024-01-29  112.534599  113.365620  111.802508  112.969900   7014400
2024-01-30  112.940224  113.177657  111.426565  112.148769   8231900
2024-01-31  110.506500  113.049040  110.476822  112.593956   7386700
2024-02-01  114.295586  114.542914  111.347431  111.703585   7882200
2024-02-02  114.552803  115.245321  113.929537  114.582481   6499400
2024-02-05  115.146385  115.472861  113.543692  114.295574   8084500
2024-02-06  114.068047  115.700411  113.751466  115.492655   4637500
2024-02-07  116.016983  116.333564




In [1]:
#Import Required Libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [90]:

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'}
url = 'https://www.cnbc.com/quotes/AMZN?qsearchterm=amazon'

page = requests.get(url)
print(page.status_code)

200


In [91]:
soup = BeautifulSoup(page.text, 'html.parser')
print(soup.title.text)

AMZN: Amazon.com Inc - Stock Price, Quote and News - CNBC


In [92]:
data = soup.find('div',{'class': 'QuoteStrip-dataContainer QuoteStrip-extendedHours'}).text
data

'Close225.94+5.28 (+2.39%)Volume38,754,30552 week range149.91 - 233.00'

In [95]:
import re

title = soup.title.text
# Regular expression patterns
pattern = r"^(.*?)-"
price_pattern = r"Close([\d\.]+)"
change_pattern = r"([+-][\d\.]+)"
percentage_pattern = r"\(([+-][\d\.%]+)\)"

# Extract data
name = re.search(pattern, title).group(1)
price = re.search(price_pattern, data).group(1)
change = re.search(change_pattern, data).group(1)
percentage = re.search(percentage_pattern, data).group(1)

# Concatenate results
result = f"Price: {price}, Change: {change}, Percentage: {percentage}"
print(result)
print(f"Company: {match}")


Price: 225.94, Change: +5.28, Percentage: +2.39%
Company: AMZN: Amazon.com Inc 


In [8]:
import os
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
from config import RAW_DATA_PATH

def fetch_stock_data(stocks):
    # Get today's date and calculate the date one year ago
    end_date = datetime.today().strftime('%Y-%m-%d')
    start_date = (datetime.today() - timedelta(days=410)).strftime('%Y-%m-%d')  # Strictly 1 year

    # Initialize an empty list to store the data
    all_data = []
    failed_stocks = []  # To keep track of stocks that failed to fetch

    # Loop through the stock symbols and fetch the data
    for stock in stocks:
        print(f"Fetching data for {stock}...")
        
        try:
            # Download historical stock data
            stock_data = yf.download(stock, start=start_date, end=end_date)
            
            # Check if data was returned (some symbols might fail to fetch)
            if stock_data.empty:
                print(f"No data found for {stock}. Skipping...")
                failed_stocks.append((stock, "No data found"))
                continue
            
            # Add the stock symbol and company name as new columns
            try:
                stock_info = yf.Ticker(stock).info
                company_name = stock_info.get('longName', 'N/A')  # Use 'longName' for the full company name
            except Exception as e:
                print(f"Error fetching company info for {stock}: {e}")
                company_name = 'N/A'
            
            stock_data['Symbol'] = stock
            stock_data['Company'] = company_name
            
            # Reset the index to ensure 'Date' becomes a column
            stock_data.reset_index(inplace=True)
            
            # Append the data to the list
            all_data.append(stock_data)
        
        except Exception as e:
            print(f"Error fetching data for {stock}: {e}")
            failed_stocks.append((stock, str(e)))
            continue

    # Combine the data for all stocks into one DataFrame
    if all_data:
        # Ensure all data frames have the same columns
        columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Symbol', 'Company']
        all_data = [df[columns] for df in all_data]
        
        combined_data = pd.concat(all_data, ignore_index=True)

        # Ensure the output directory exists
        os.makedirs(RAW_DATA_PATH, exist_ok=True)

        # Define the output file path
        output_file_path = os.path.join(RAW_DATA_PATH, "raw_collected_1year_data.csv")

        # Save the data to a CSV file
        combined_data.to_csv(output_file_path, index=False)
        
        print(f"Data collection and saving to CSV completed at {output_file_path}")
    else:
        print("No data was collected for any stock.")

    # Print failed stocks (if any)
    if failed_stocks:
        print("\nFailed to fetch data for the following stocks:")
        for stock, error in failed_stocks:
            print(f"- {stock}: {error}")

# List of stock symbols to fetch
stocks = ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'TSLA', 'NVDA', 'META', 'UNH', 'MA', 'LLY',
            'COST', 'V', 'JNJ', 'PG', 'WMT', 'DIS', 'HD', 'BAC', 'XOM', 'CVX',
            'PFE', 'ABBV', 'KO', 'PEP', 'CSCO', 'INTC', 'MRK', 'T', 'VZ', 'ADBE',
            'CRM', 'NFLX', 'PYPL', 'ORCL', 'IBM', 'QCOM', 'AMD', 'TXN', 'NKE', 'MCD',
            'SBUX', 'GS', 'MS', 'C', 'BA', 'CAT', 'GE', 'HON', 'LMT', 'MMM',
            'UPS', 'FDX', 'AMT', 'PLD', 'SPG', 'NOW', 'ZM', 'DOCU', 'SNOW', 'SQ',
            'ROKU', 'SPOT', 'UBER', 'LYFT', 'ABNB', 'SHOP', 'TWLO', 'DDOG', 'OKTA', 'CRWD',
            'ZS', 'NET', 'MDB', 'FSLY', 'PLTR', 'ASML', 'BABA', 'LULU', 'TGT', 'LOW',
            'TJX', 'DG', 'DLTR', 'ROST', 'SNAP', 'TIXT', 'CNQ', 'MNSO', 'D']
    
# Call the function to fetch data for the given stocks
fetch_stock_data(stocks)

[*********************100%***********************]  1 of 1 completed

Fetching data for AAPL...



[*********************100%***********************]  1 of 1 completed

Fetching data for MSFT...



[*********************100%***********************]  1 of 1 completed

Fetching data for AMZN...
Fetching data for GOOGL...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for TSLA...
Fetching data for NVDA...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for META...
Fetching data for UNH...



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for MA...



[*********************100%***********************]  1 of 1 completed

Fetching data for LLY...



[*********************100%***********************]  1 of 1 completed

Fetching data for COST...





Fetching data for V...


[*********************100%***********************]  1 of 1 completed


Fetching data for JNJ...


[*********************100%***********************]  1 of 1 completed


Fetching data for PG...


[*********************100%***********************]  1 of 1 completed


Fetching data for WMT...


[*********************100%***********************]  1 of 1 completed


Fetching data for DIS...


[*********************100%***********************]  1 of 1 completed


Fetching data for HD...


[*********************100%***********************]  1 of 1 completed


Fetching data for BAC...


[*********************100%***********************]  1 of 1 completed


Fetching data for XOM...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for CVX...





Fetching data for PFE...


[*********************100%***********************]  1 of 1 completed


Fetching data for ABBV...


[*********************100%***********************]  1 of 1 completed


Fetching data for KO...


[*********************100%***********************]  1 of 1 completed


Fetching data for PEP...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for CSCO...



[*********************100%***********************]  1 of 1 completed

Fetching data for INTC...





Fetching data for MRK...


[*********************100%***********************]  1 of 1 completed


Fetching data for T...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for VZ...





Fetching data for ADBE...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for CRM...



[*********************100%***********************]  1 of 1 completed

Fetching data for NFLX...



[*********************100%***********************]  1 of 1 completed

Fetching data for PYPL...



[*********************100%***********************]  1 of 1 completed

Fetching data for ORCL...





Fetching data for IBM...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for QCOM...



[*********************100%***********************]  1 of 1 completed

Fetching data for AMD...





Fetching data for TXN...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for NKE...





Fetching data for MCD...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for SBUX...



[*********************100%***********************]  1 of 1 completed

Fetching data for GS...



[*********************100%***********************]  1 of 1 completed

Fetching data for MS...



[*********************100%***********************]  1 of 1 completed

Fetching data for C...





Fetching data for BA...


[*********************100%***********************]  1 of 1 completed


Fetching data for CAT...


[*********************100%***********************]  1 of 1 completed


Fetching data for GE...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for HON...





Fetching data for LMT...


[*********************100%***********************]  1 of 1 completed


Fetching data for MMM...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for UPS...



[*********************100%***********************]  1 of 1 completed

Fetching data for FDX...



[*********************100%***********************]  1 of 1 completed

Fetching data for AMT...





Fetching data for PLD...


[*********************100%***********************]  1 of 1 completed


Fetching data for SPG...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for NOW...



[*********************100%***********************]  1 of 1 completed

Fetching data for ZM...





Fetching data for DOCU...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for SNOW...





Fetching data for SQ...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Error fetching company info for SQ: list index out of range
Fetching data for ROKU...



[*********************100%***********************]  1 of 1 completed

Fetching data for SPOT...



[*********************100%***********************]  1 of 1 completed

Fetching data for UBER...



[*********************100%***********************]  1 of 1 completed

Fetching data for LYFT...



[*********************100%***********************]  1 of 1 completed

Fetching data for ABNB...



[*********************100%***********************]  1 of 1 completed

Fetching data for SHOP...



[*********************100%***********************]  1 of 1 completed

Fetching data for TWLO...



[*********************100%***********************]  1 of 1 completed

Fetching data for DDOG...



[*********************100%***********************]  1 of 1 completed

Fetching data for OKTA...



[*********************100%***********************]  1 of 1 completed

Fetching data for CRWD...



[*********************100%***********************]  1 of 1 completed

Fetching data for ZS...



[*********************100%***********************]  1 of 1 completed

Fetching data for NET...



[*********************100%***********************]  1 of 1 completed

Fetching data for MDB...





Fetching data for FSLY...


[*********************100%***********************]  1 of 1 completed


Fetching data for PLTR...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for ASML...



[*********************100%***********************]  1 of 1 completed

Fetching data for BABA...



[*********************100%***********************]  1 of 1 completed

Fetching data for LULU...





Fetching data for TGT...


[*********************100%***********************]  1 of 1 completed


Fetching data for LOW...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for TJX...



[*********************100%***********************]  1 of 1 completed

Fetching data for DG...





Fetching data for DLTR...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for ROST...





Fetching data for SNAP...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for TIXT...





Fetching data for CNQ...


[*********************100%***********************]  1 of 1 completed


Fetching data for MNSO...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Fetching data for D...





Data collection and saving to CSV completed at data/raw_data\raw_collected_1year_data.csv
