In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import datetime

In [3]:
def stock_screener(tickers, start_date, end_date, min_price=None, max_price=None, min_volume=None):
    """
    Fetch historical stock data for given tickers within a specified date range and filter based on given criteria.

    :param tickers: List of stock tickers
    :param start_date: Start date in 'YYYY-MM-DD' format
    :param end_date: End date in 'YYYY-MM-DD' format
    :param min_price: Minimum closing price
    :param max_price: Maximum closing price
    :param min_volume: Minimum trading volume
    :return: Filtered DataFrame with stock data
    """
    all_data = []

    for ticker in tickers:
        data = yf.download(ticker, start=start_date, end=end_date)
        data['Ticker'] = ticker
        all_data.append(data)

    df = pd.concat(all_data)

    if min_price is not None:
        df = df[df['Close'] >= min_price]

    if max_price is not None:
        df = df[df['Close'] <= max_price]

    if min_volume is not None:
        df = df[df['Volume'] >= min_volume]

    return df




In [4]:
news_df = pd.read_csv('data/training/in/news/archive/raw_partner_headlines.csv')

In [5]:
news_tickers = news_df['stock'].unique()

In [7]:

# Define your variables here
start_date = "2019-01-01"
end_date = "2020-06-01"
min_price = None  # Minimum closing price
max_price = None  # Maximum closing price
min_volume = None  # Minimum trading volume

# Fetch and filter data
price_df = stock_screener(news_tickers[:10], start_date, end_date, min_price, max_price, min_volume)

# Display the filtered data
print(price_df)

# Save to CSV
price_df.to_csv("data/training/in/price/price_data_2019-01-01_2020-06-01.csv")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAC']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1546318800, endDate = 1590984000")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAN']: YFChartError("%ticker%: Data doesn't exist for startDate = 1546318800, endDate = 1590984000")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%********

                 Open       High        Low      Close  Adj Close     Volume  \
Date                                                                           
2019-01-02  66.500000  66.570000  65.300003  65.690002  63.161682  2113300.0   
2019-01-03  65.529999  65.779999  62.000000  63.270000  60.834843  5383900.0   
2019-01-04  64.089996  65.949997  64.089996  65.459999  62.940548  3123700.0   
2019-01-07  65.639999  67.430000  65.610001  66.849998  64.277039  3235100.0   
2019-01-08  67.589996  68.209999  66.699997  67.830002  65.219322  1578100.0   
...               ...        ...        ...        ...        ...        ...   
2020-05-22  34.619999  35.093334  34.046665  34.466667  33.604305   274650.0   
2020-05-26  35.220001  37.033333  35.220001  36.520000  35.606270   309000.0   
2020-05-27  36.726665  37.606667  36.240002  36.653332  35.736271   429300.0   
2020-05-28  36.799999  37.113335  35.946667  36.146667  35.242279   308550.0   
2020-05-29  35.833332  36.480000  35.266

In [24]:
price_df.reset_index().describe()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
count,2485,2485.0,2485.0,2485.0,2485.0,2485.0,2485.0
mean,2019-09-15 06:29:24.507042304,30.951947,31.365491,30.539671,30.957952,29.911149,3877460.0
min,2019-01-02 00:00:00,1.48,1.7,1.46,1.5,1.454647,0.0
25%,2019-05-09 00:00:00,10.76,11.13,10.32,10.69,10.69,13800.0
50%,2019-09-16 00:00:00,28.540001,28.99,28.1,28.540001,28.117407,624500.0
75%,2020-01-23 00:00:00,45.439999,45.650002,45.209999,45.470001,42.423931,3270000.0
max,2020-05-29 00:00:00,89.970001,90.639999,89.839996,90.129997,87.400757,138061000.0
std,,23.534456,23.708568,23.372903,23.562305,22.575013,11844380.0
