In [1]:
import sys
import numpy as np
import yfinance as yf
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import math
import matplotlib.pyplot as plt


#Read data and fill missing values
data = pd.read_csv("candles.csv", index_col=0)
data = data.ffill()
print(data.head())

                                 open        high         low       close  \
datetime                                                                    
2024-05-03 09:30:00-04:00  186.815002  187.229996  185.429993  186.880798   
2024-05-03 10:30:00-04:00  186.895004  187.869995  185.725006  185.773895   
2024-05-03 11:30:00-04:00  185.800003  186.789993  185.800003  186.335007   
2024-05-03 12:30:00-04:00  186.324997  186.800003  185.950104  186.270004   
2024-05-03 13:30:00-04:00  186.270004  186.658203  186.050003  186.414993   

                             volume  dividends  stock splits  
datetime                                                      
2024-05-03 09:30:00-04:00  12693085        0.0           0.0  
2024-05-03 10:30:00-04:00   6400873        0.0           0.0  
2024-05-03 11:30:00-04:00   3127625        0.0           0.0  
2024-05-03 12:30:00-04:00   2636132        0.0           0.0  
2024-05-03 13:30:00-04:00   2638140        0.0           0.0  


In [2]:
class DynamicStockSelector:
    def __init__(self, tickers, max_stocks=3):
        self.tickers = tickers
        self.max_stocks = max_stocks

    def fetch_stock_data(self, ticker):
        """Fetch and process data for a single NYSE stock."""
        try:
            df = yf.Ticker(ticker).history(period="1y", interval="1h")
            if df.empty or len(df) < 2:
                raise ValueError("Insufficient data")

            df = df.reset_index()
            df.columns = [c.lower() for c in df.columns]

            volatility = df['close'].pct_change().std()
            volume = df['volume'].mean()
            liquidity = df['close'].iloc[-1] * df['volume'].iloc[-1]
            trend = (df['close'].iloc[-1] - df['close'].iloc[0]) / df['close'].iloc[0]

            return {
                'ticker': ticker,
                'volatility': volatility,
                'volume': volume,
                'liquidity': liquidity,
                'trend': trend
            }

        except Exception as e:
            print(f"Error processing {ticker}: {e}")
            return None

    def apply_filters(self, df):
        """Apply filters to remove low-performing stocks."""
        min_volume = df['volume'].quantile(0.25)
        df = df[df['volume'] > min_volume]

        min_liquidity = df['liquidity'].quantile(0.25)
        df = df[df['liquidity'] > min_liquidity]

        vol_lower, vol_upper = df['volatility'].quantile([0.25, 0.75])
        df = df[(df['volatility'] > vol_lower) & (df['volatility'] < vol_upper)]

        return df

    def select_stocks(self):
        with ThreadPoolExecutor(max_workers=10) as executor:
            stock_data = list(filter(None, executor.map(self.fetch_stock_data, self.tickers)))

        df_stocks = pd.DataFrame(stock_data)
        if df_stocks.empty:
            print("No valid stock data collected.")
            return []

        df_stocks = self.apply_filters(df_stocks)

        # Normalize and score
        for column in ['volatility', 'volume', 'liquidity', 'trend']:
            df_stocks[column] = (df_stocks[column] - df_stocks[column].min()) / (
                df_stocks[column].max() - df_stocks[column].min() + 1e-9)

        df_stocks['score'] = (
            df_stocks['volatility'] * 0.3 +
            df_stocks['volume'] * 0.3 +
            df_stocks['liquidity'] * 0.2 +
            df_stocks['trend'] * 0.2
        )

        top_stocks = df_stocks.nlargest(self.max_stocks, 'score')
        print("Top selected stocks:")
        print(top_stocks[['ticker', 'score']])

    
        return top_stocks['ticker'].tolist()

def select_stocks(tickers=None, max_stocks=3):
    """
    Module-level function to select top-performing stocks.
    
    Args:
        tickers (list, optional): List of stock tickers to analyze. Defaults to popular tech stocks.
        max_stocks (int, optional): Maximum number of stocks to return. Defaults to 3.
        
    Returns:
        list: Ticker symbols of top-performing stocks
    """
    if tickers is None:
        tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'JNJ', 'JPM', 'V', 'NVDA']
    
    selector = DynamicStockSelector(tickers=tickers, max_stocks=max_stocks)
    return selector.select_stocks()



In [3]:
def fetch_candles(ticker, interval, num_candles):
    interval_mapping = {
        '1m': '1d', '5m': '5d', '15m': '5d', '30m': '5d',
        '1h': '1y', '1d': '60d', '1wk': '1y', '1mo': '2y'
    }

    if interval not in interval_mapping:
        raise ValueError(f"Unsupported interval: {interval}")

    try:
        df = yf.Ticker(ticker).history(period=interval_mapping[interval], interval=interval)
        df = df.tail(num_candles).reset_index()
        df.columns = [col.lower() for col in df.columns]

        # filename = f"{ticker}_{interval}_{num_candles}_candles.csv"
        filename = f"candles.csv"
        df.to_csv(filename, index=False)
        print(f"Saved {len(df)} candles for {ticker} to {filename}")
        return df

    except Exception as e:
        print(f"Failed to fetch data for {ticker}: {e}")
        return None

if __name__ == "__main__":
    # --- USER SETTINGS ---
    interval = "1h"         # e.g., "1d", "1h", "15m"
    num_candles = 1738        # Number of candles

    # --- FETCH TOP STOCKS ---
    try:
        top_tickers = select_stocks()
    except Exception as e:
        print(f"Error calling select_stocks(): {e}")
        top_tickers = []

    if not top_tickers:
        print("No tickers returned from top_performer.select_stocks()")
    else:
        print(f"Fetching {num_candles} {interval} candles for:", ", ".join(top_tickers))
        for ticker in top_tickers:
            fetch_candles(ticker, interval, num_candles)


# data = pd.read_csv('candles.csv',
#                    parse_dates=['datetime'],
#                    index_col='datetime')
# # ensure timezone doesn’t break any numpy ops
# data.index = data.index.tz_convert(None)


$GOOGL: possibly delisted; no price data found  (period=1y)


Error processing GOOGL: Insufficient data


$AAPL: possibly delisted; no price data found  (period=1y)


Error processing AAPL: Insufficient data


$V: possibly delisted; no price data found  (period=1y)


Error processing V: Insufficient data


$AMZN: possibly delisted; no price data found  (period=1y)


Error processing AMZN: Insufficient data


$JNJ: possibly delisted; no price data found  (period=1y)


Error processing JNJ: Insufficient data


$JPM: possibly delisted; no price data found  (period=1y)


Error processing JPM: Insufficient data
Top selected stocks:
Empty DataFrame
Columns: [ticker, score]
Index: []
No tickers returned from top_performer.select_stocks()


In [4]:
#Data

print(data.head())

                                 open        high         low       close  \
datetime                                                                    
2024-05-03 09:30:00-04:00  186.815002  187.229996  185.429993  186.880798   
2024-05-03 10:30:00-04:00  186.895004  187.869995  185.725006  185.773895   
2024-05-03 11:30:00-04:00  185.800003  186.789993  185.800003  186.335007   
2024-05-03 12:30:00-04:00  186.324997  186.800003  185.950104  186.270004   
2024-05-03 13:30:00-04:00  186.270004  186.658203  186.050003  186.414993   

                             volume  dividends  stock splits  
datetime                                                      
2024-05-03 09:30:00-04:00  12693085        0.0           0.0  
2024-05-03 10:30:00-04:00   6400873        0.0           0.0  
2024-05-03 11:30:00-04:00   3127625        0.0           0.0  
2024-05-03 12:30:00-04:00   2636132        0.0           0.0  
2024-05-03 13:30:00-04:00   2638140        0.0           0.0  


In [5]:
# Backward Pass
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

In [6]:
import math

def standard_scale(df, columns):
    scaled_df = df[columns].copy()
    for column in columns:
        mean = scaled_df[column].mean()
        std = scaled_df[column].std()
        scaled_df[column] = (scaled_df[column] - mean) / std
    return scaled_df

# Apply scaling
# Define predictors and target
PREDICTORS = ["open", "high", "low"]
TARGET = "close"

# Scale our data to mean 0
data[PREDICTORS] = standard_scale(data, PREDICTORS)

# Split into train, valid, test sets
np.random.seed(0)
split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

  return bound(*args, **kwds)


In [7]:
def init_params(layer_conf):
    layers = []
    for i in range(1, len(layer_conf)):
        # np.random.seed(0)
        k = 1 / math.sqrt(layer_conf[i]["hidden"])
        
        i_weight = np.random.rand(layer_conf[i-1]["units"], layer_conf[i]["hidden"]) * 2 * k - k
        h_weight = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["hidden"]) * 2 * k - k
        h_bias = np.random.rand(1, layer_conf[i]["hidden"]) * 2 * k - k

        o_weight = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["output"]) * 2 * k - k
        o_bias = np.random.rand(1, layer_conf[i]["output"]) * 2 * k - k

        layers.append(
            [i_weight, h_weight, h_bias, o_weight, o_bias]
        )
    return layers


In [8]:
layer_conf = [
    {"type": "input", "units": 3}, # 3 is the number of features
    {"type": "rnn", "hidden": 3, "output": 1}, # make our 3 features into 4 and give 1 output
]

In [9]:
# Forward Pass
def forward(x, layers):
    outputs = []
    hiddens = []    
    for i in range(len(layers)):
        i_weight, h_weight, h_bias, o_weight, o_bias = layers[i] # Get all of the info
        hidden = np.zeros((x.shape[0], i_weight.shape[1])) # shape = (num inputs, num of hidden units)
        output = np.zeros((x.shape[0], o_weight.shape[1])) # shape = (num inputs, num of output units)

        for j in range(x.shape[0]): # Go through all of the inputs
            input_x = x[j,:][np.newaxis,:] @ i_weight # apply weights to x
            hidden_x = input_x + hidden[max(j-1,0),:][np.newaxis,:] @ h_weight + h_bias # gets current higgen state, apply weights, add biases and current input_x
            hidden_x = np.tanh(hidden_x) # activation function
            hidden[j,:] = hidden_x

            # output 
            output_x = hidden_x @ o_weight + o_bias
            output[j,:] = output_x

        hiddens.append(hidden)
        outputs.append(output)
    return hiddens, outputs[-1]

In [10]:
# Backward Pass
def backward(layers, x, lr, grad, hiddens):
    for i in range(len(layers)):
        i_weight, h_weight, h_bias, o_weight, o_bias = layers[i]  # Get layer parameters
        hidden = hiddens[i]  # Hidden states for current layer
        next_h_grad = None

        # Initialize gradients
        o_weight_grad = np.zeros_like(o_weight)
        o_bias_grad = np.zeros_like(o_bias)
        h_weight_grad = np.zeros_like(h_weight)
        h_bias_grad = np.zeros_like(h_bias)
        i_weight_grad = np.zeros_like(i_weight)

        for j in range(x.shape[0] - 1, -1, -1):  # Backprop through time
            out_grad = grad[j][np.newaxis, :]  # Shape (1, output_dim)

            # Output weight and bias gradient
            o_weight_grad += hidden[j][:, np.newaxis] @ out_grad
            o_bias_grad += out_grad

            # Propagate to hidden
            h_grad = out_grad @ o_weight.T

            if j < x.shape[0] - 1:
                # Backprop through next hidden state's gradient
                hh_grad = next_h_grad @ h_weight.T
                h_grad += hh_grad

            # Apply tanh derivative
            tanh_deriv = 1 - hidden[j][np.newaxis, :] ** 2
            h_grad = np.multiply(h_grad, tanh_deriv)

            next_h_grad = h_grad.copy()

            if j > 0:
                h_weight_grad += hidden[j - 1][:, np.newaxis] @ h_grad
                h_bias_grad += h_grad

            i_weight_grad += x[j][:, np.newaxis] @ h_grad

        # Normalize and apply gradients
        scale = lr / x.shape[0]
        i_weight -= i_weight_grad * scale
        h_weight -= h_weight_grad * scale
        h_bias -= h_bias_grad * scale
        o_weight -= o_weight_grad * scale
        o_bias -= o_bias_grad * scale

        layers[i] = [i_weight, h_weight, h_bias, o_weight, o_bias]

    return layers


In [None]:
epochs = 10
lr = 1e-5

layers = init_params(layer_conf)

for epoch in range(epochs):
    sequence_len = 100
    epoch_loss = 0
    seq_loss = 0
    for j in range(train_x.shape[0] - sequence_len):
        seq_x = train_x[j: (j + sequence_len),]
        seq_y = train_y[j: (j + sequence_len),]
        hiddens, outputs = forward(seq_x, layers)
        grad = mse_grad(seq_y, outputs)
        params = backward(layers, seq_x, lr, grad, hiddens)
        epoch_loss += mse(seq_y, outputs)

    # if epoch % 50 == 0:
    valid_loss = 0
    for j in range(valid_x.shape[0] - sequence_len):
        seq_x = valid_x[j: (j+sequence_len),]
        seq_y = valid_y[j: (j+sequence_len),]
        _, outputs = forward(seq_x, layers)
        valid_loss += mse(seq_y,outputs)
    print(f"Epoch: {epoch} train loss {epoch_loss / len(train_x)} valid loss {valid_loss / len(valid_x)}")


Epoch: 0 train loss 32980.218794044726 valid loss 30004.828127463476
Epoch: 1 train loss 30275.76348110781 valid loss 27869.360479112438
Epoch: 2 train loss 27722.274373011933 valid loss 25892.455839760285
Epoch: 3 train loss 25382.065408850576 valid loss 24068.048074624417
Epoch: 4 train loss 23239.81890440431 valid loss 22384.84817034795
Epoch: 5 train loss 21279.421436189365 valid loss 20831.750588530784
Epoch: 6 train loss 19485.698822247643 valid loss 19398.372005160792
Epoch: 7 train loss 17844.617707358957 valid loss 18075.123766216973
Epoch: 8 train loss 16343.270500269413 valid loss 16853.18656892281
Epoch: 9 train loss 14969.811975490507 valid loss 15724.459504180708
Epoch: 10 train loss 13713.384246810656 valid loss 14681.50403067447
Epoch: 11 train loss 12564.040927780507 valid loss 13717.48963852021
Epoch: 12 train loss 11512.674257298213 valid loss 12826.143317833463
Epoch: 13 train loss 10550.946572986346 valid loss 12001.703248710404
Epoch: 14 train loss 9671.2265641385

In [None]:
sequence_len = 100
j = valid_x.shape[0] - sequence_len
seq_x = valid_x[j: (j + sequence_len),]
seq_y = valid_y[j: (j + sequence_len),]
_, outputs = forward(seq_x, layers)

# Convert to 1D if needed
actual = seq_y.squeeze()
predicted = outputs.squeeze()

# Plot
plt.figure(figsize=(10, 5))
plt.plot(actual, label="Actual")
plt.plot(predicted, label="Predicted")
plt.title("Last Validation Sequence: Predicted vs Actual")
plt.xlabel("Time Step")
plt.ylabel("Value")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()