In [1]:
import numpy as np
# !pip install cupy-cuda12x

# import cupy as np
import pandas as pd

#Read data and fill missing values
data = pd.read_csv("clean_weather.csv", index_col=0)
data = data.ffill()

In [23]:
import yfinance as yf
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor

class DynamicStockSelector:
    def __init__(self, tickers, max_stocks=3):
        self.tickers = tickers
        self.max_stocks = max_stocks

    def fetch_stock_data(self, ticker):
        """Fetch and process data for a single NYSE stock."""
        try:
            df = yf.Ticker(ticker).history(period="2d", interval="1h")
            if df.empty or len(df) < 2:
                raise ValueError("Insufficient data")

            df = df.reset_index()
            df.columns = [c.lower() for c in df.columns]

            volatility = df['close'].pct_change().std()
            volume = df['volume'].mean()
            liquidity = df['close'].iloc[-1] * df['volume'].iloc[-1]
            trend = (df['close'].iloc[-1] - df['close'].iloc[0]) / df['close'].iloc[0]

            return {
                'ticker': ticker,
                'volatility': volatility,
                'volume': volume,
                'liquidity': liquidity,
                'trend': trend
            }

        except Exception as e:
            print(f"Error processing {ticker}: {e}")
            return None

    def apply_filters(self, df):
        """Apply filters to remove low-performing stocks."""
        min_volume = df['volume'].quantile(0.25)
        df = df[df['volume'] > min_volume]

        min_liquidity = df['liquidity'].quantile(0.25)
        df = df[df['liquidity'] > min_liquidity]

        vol_lower, vol_upper = df['volatility'].quantile([0.25, 0.75])
        df = df[(df['volatility'] > vol_lower) & (df['volatility'] < vol_upper)]

        return df

    def select_stocks(self):
        with ThreadPoolExecutor(max_workers=10) as executor:
            stock_data = list(filter(None, executor.map(self.fetch_stock_data, self.tickers)))

        df_stocks = pd.DataFrame(stock_data)
        if df_stocks.empty:
            print("No valid stock data collected.")
            return []

        df_stocks = self.apply_filters(df_stocks)

        # Normalize and score
        for column in ['volatility', 'volume', 'liquidity', 'trend']:
            df_stocks[column] = (df_stocks[column] - df_stocks[column].min()) / (
                df_stocks[column].max() - df_stocks[column].min() + 1e-9)

        df_stocks['score'] = (
            df_stocks['volatility'] * 0.3 +
            df_stocks['volume'] * 0.3 +
            df_stocks['liquidity'] * 0.2 +
            df_stocks['trend'] * 0.2
        )

        top_stocks = df_stocks.nlargest(self.max_stocks, 'score')
        print("Top selected stocks:")
        print(top_stocks[['ticker', 'score']])

    
        return top_stocks['ticker'].tolist()

def select_stocks(tickers=None, max_stocks=3):
    """
    Module-level function to select top-performing stocks.
    
    Args:
        tickers (list, optional): List of stock tickers to analyze. Defaults to popular tech stocks.
        max_stocks (int, optional): Maximum number of stocks to return. Defaults to 3.
        
    Returns:
        list: Ticker symbols of top-performing stocks
    """
    if tickers is None:
        tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'JNJ', 'JPM', 'V', 'NVDA']
    
    selector = DynamicStockSelector(tickers=tickers, max_stocks=max_stocks)
    return selector.select_stocks()


ModuleNotFoundError: No module named 'yfinance'

In [None]:
import yfinance as yf
import pandas as pd
import top_performer  

def fetch_candles(ticker, interval, num_candles):
    interval_mapping = {
        '1m': '1d', '5m': '5d', '15m': '5d', '30m': '5d',
        '1h': '7d', '1d': '60d', '1wk': '1y', '1mo': '2y'
    }

    if interval not in interval_mapping:
        raise ValueError(f"Unsupported interval: {interval}")

    try:
        df = yf.Ticker(ticker).history(period=interval_mapping[interval], interval=interval)
        df = df.tail(num_candles).reset_index()
        df.columns = [col.lower() for col in df.columns]

        filename = f"{ticker}_{interval}_{num_candles}_candles.csv"
        df.to_csv(filename, index=False)
        print(f"Saved {len(df)} candles for {ticker} to {filename}")
        return df

    except Exception as e:
        print(f"Failed to fetch data for {ticker}: {e}")
        return None

if __name__ == "__main__":
    # --- USER SETTINGS ---
    interval = "1h"         # e.g., "1d", "1h", "15m"
    num_candles = 48        # Number of candles

    # --- FETCH TOP STOCKS ---
    try:
        top_tickers = top_performer.select_stocks()
    except Exception as e:
        print(f"Error calling select_stocks(): {e}")
        top_tickers = []

    if not top_tickers:
        print("No tickers returned from top_performer.select_stocks()")
    else:
        print(f"Fetching {num_candles} {interval} candles for:", ", ".join(top_tickers))
        for ticker in top_tickers:
            fetch_candles(ticker, interval, num_candles)


In [2]:
#Data

print(data.head())

            tmax  tmin  rain  tmax_tomorrow
1970-01-01  60.0  35.0   0.0           52.0
1970-01-02  52.0  39.0   0.0           52.0
1970-01-03  52.0  35.0   0.0           53.0
1970-01-04  53.0  36.0   0.0           52.0
1970-01-05  52.0  35.0   0.0           50.0


In [3]:
# Manual Forward Pass (Dont need this anymore)

np.random.seed(0)

i_weight = np.random.rand(1,2)
h_weight = np.random.rand(2,2)
o_weight = np.random.rand(2,1)

temps = data["tmax"].tail(3).to_numpy()
temps

array([66., 70., 62.])

In [4]:
# Manual Forward Pass (Dont need this anymore)


x0 = temps[0].reshape(1,1)
x1 = temps[1].reshape(1,1)
x2 = temps[2].reshape(1,1)

x0

array([[66.]])

In [5]:
# Manual Forward Pass (Dont need this anymore)


#LAYER 1
xi_0 = x0 @ i_weight

xi_0

array([[36.22169126, 47.20249818]])

In [6]:
# Manual Forward Pass (Dont need this anymore)


xh_0 = np.maximum(0,xi_0) #relu

xh_0

array([[36.22169126, 47.20249818]])

In [7]:
# Manual Forward Pass (Dont need this anymore)


xo_0 = xh_0 @ o_weight
xo_0

array([[57.94406231]])

In [8]:
# Manual Forward Pass (Dont need this anymore)


#Layer 1
xi_1 = x1 @ i_weight # apply weights on input x1

xh = xh_0 @ h_weight # Update the hidden Layer
xh_1 = np.maximum(0, xh + xi_1) # RELU for the hidden layer

xo_1 = xh_1 @ o_weight

xo_1

array([[124.54916092]])

In [9]:
# Manual Forward Pass (Dont need this anymore)


#Layer 2
xi_2 = x2 @ i_weight # apply weights on input x1

xh = xh_1 @ h_weight # Update the hidden Layer
xh_2 = np.maximum(0, xh + xi_2) # RELU for the hidden layer

xo_2 = xh_2 @ o_weight

xo_2

array([[190.94853131]])

In [10]:
# Initialize variables

np.random.seed(0)

i_weight = np.random.rand(1,5) / 5 - .1
h_weight = np.random.rand(5,5) / 5 - .1
h_bias = np.random.rand(1,5) / 5 - .1
o_weight = np.random.rand(5,1) * 50
o_bias = np.random.rand(1,1)

In [11]:
# Forward Pass

outputs = np.zeros(3)
hiddens = np.zeros((3,5))
prev_hidden = None
sequence = data["tmax"].tail(3).to_numpy()

for i in range(3):
    x = sequence[i].reshape(1,1)
    xi = x @ i_weight

    if prev_hidden is None: # check if it is the first iteration
        xh = xi
    else:
        xh = xi + prev_hidden @ h_weight + h_bias
    
    xh = np.tanh(xh) # apply activation function to hidden layer
    prev_hidden = xh
    hiddens[i,] = xh # save the hidden layer for back propogation
    xo = xh @ o_weight + o_bias
    outputs[i] = xo.item()
        

In [12]:
outputs

array([74.31470595, 80.66149404, 77.67852446])

In [13]:
# Backward Pass
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

In [14]:
actuals = np.array([70,62,65])

loss_grad = mse_grad(actuals, outputs)
loss_grad

array([ 4.31470595, 18.66149404, 12.67852446])

In [15]:
next_hidden = None

o_weight_grad, o_bias_grad, h_weight_grad, h_bias_grad, i_weight_grad =  [0] * 5

for i in range(2, -1, -1):
    l_grad = loss_grad[i].reshape(1,1)

    o_weight_grad += hiddens[i][:,np.newaxis] @ l_grad
    o_bias_grad += np.mean(l_grad)

    o_grad = l_grad @ o_weight.T
    if next_hidden is None:
        h_grad = o_grad
    else:
        h_grad = o_grad + next_hidden @ h_weight.T

    tanh_deriv = 1 - hiddens[i,:][np.newaxis,:]
    h_grad = np.multiply(h_grad, tanh_deriv)

    next_hidden = h_grad

    if i > 0:
        h_weight_grad += hiddens[i-1,:][:,np.newaxis] @ h_grad
        h_bias_grad += np.mean(h_grad)

    i_weight_grad += sequence[i].reshape(1,1).T @ h_grad

In [16]:
i_weight_grad

array([[ 32352.67091083,    392.99655233,   9528.43418739,
         35514.29142052, 146401.10347147]])

In [17]:
import math

def standard_scale(df, columns):
    scaled_df = df[columns].copy()
    for column in columns:
        mean = scaled_df[column].mean()
        std = scaled_df[column].std()
        scaled_df[column] = (scaled_df[column] - mean) / std
    return scaled_df

# Apply scaling
# Define predictors and target
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

# Scale our data to mean 0
data[PREDICTORS] = standard_scale(data, PREDICTORS)

# Split into train, valid, test sets
np.random.seed(0)
split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

  return bound(*args, **kwds)


In [18]:
def init_params(layer_conf):
    layers = []
    for i in range(1, len(layer_conf)):
        np.random.seed(0)
        k = 1 / math.sqrt(layer_conf[i]["hidden"])
        
        i_weight = np.random.rand(layer_conf[i-1]["units"], layer_conf[i]["hidden"]) * 2 * k - k
        h_weight = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["hidden"]) * 2 * k - k
        h_bias = np.random.rand(1, layer_conf[i]["hidden"]) * 2 * k - k

        o_weight = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["output"]) * 2 * k - k
        o_bias = np.random.rand(1, layer_conf[i]["output"]) * 2 * k - k

        layers.append(
            [i_weight, h_weight, h_bias, o_weight, o_bias]
        )
    return layers


In [19]:
layer_conf = [
    {"type": "input", "units": 3}, # 3 is the number of features
    {"type": "rnn", "hidden": 3, "output": 1}, # make our 3 features into 4 and give 1 output
]

In [20]:
# Forward Pass
def forward(x, layers):
    outputs = []
    hiddens = []    
    for i in range(len(layers)):
        i_weight, h_weight, h_bias, o_weight, o_bias = layers[i] # Get all of the info
        hidden = np.zeros((x.shape[0], i_weight.shape[1])) # shape = (num inputs, num of hidden units)
        output = np.zeros((x.shape[0], o_weight.shape[1])) # shape = (num inputs, num of output units)

        for j in range(x.shape[0]): # Go through all of the inputs
            input_x = x[j,:][np.newaxis,:] @ i_weight # apply weights to x
            hidden_x = input_x + hidden[max(j-1,0),:][np.newaxis,:] @ h_weight + h_bias # gets current higgen state, apply weights, add biases and current input_x
            hidden_x = np.tanh(hidden_x) # activation function
            hidden[j,:] = hidden_x

            # output 
            output_x = hidden_x @ o_weight + o_bias
            output[j,:] = output_x

        hiddens.append(hidden)
        outputs.append(output)
    return hiddens, outputs[-1]

In [21]:
# Backward Pass
def backward(layers, x, lr, grad, hiddens):
    for i in range(len(layers)):
        i_weight, h_weight, h_bias, o_weight, o_bias = layers[i]  # Get layer parameters
        hidden = hiddens[i]  # Hidden states for current layer
        next_h_grad = None

        # Initialize gradients
        o_weight_grad = np.zeros_like(o_weight)
        o_bias_grad = np.zeros_like(o_bias)
        h_weight_grad = np.zeros_like(h_weight)
        h_bias_grad = np.zeros_like(h_bias)
        i_weight_grad = np.zeros_like(i_weight)

        for j in range(x.shape[0] - 1, -1, -1):  # Backprop through time
            out_grad = grad[j][np.newaxis, :]  # Shape (1, output_dim)

            # Output weight and bias gradient
            o_weight_grad += hidden[j][:, np.newaxis] @ out_grad
            o_bias_grad += out_grad

            # Propagate to hidden
            h_grad = out_grad @ o_weight.T

            if j < x.shape[0] - 1:
                # Backprop through next hidden state's gradient
                hh_grad = next_h_grad @ h_weight.T
                h_grad += hh_grad

            # Apply tanh derivative
            tanh_deriv = 1 - hidden[j][np.newaxis, :] ** 2
            h_grad = np.multiply(h_grad, tanh_deriv)

            next_h_grad = h_grad.copy()

            if j > 0:
                h_weight_grad += hidden[j - 1][:, np.newaxis] @ h_grad
                h_bias_grad += h_grad

            i_weight_grad += x[j][:, np.newaxis] @ h_grad

        # Normalize and apply gradients
        scale = lr / x.shape[0]
        i_weight -= i_weight_grad * scale
        h_weight -= h_weight_grad * scale
        h_bias -= h_bias_grad * scale
        o_weight -= o_weight_grad * scale
        o_bias -= o_bias_grad * scale

        layers[i] = [i_weight, h_weight, h_bias, o_weight, o_bias]

    return layers


In [22]:
epochs = 150
lr = 1e-5

layers = init_params(layer_conf)

for epoch in range(epochs):
    sequence_len = 7
    epoch_loss = 0
    seq_loss = 0
    for j in range(train_x.shape[0] - sequence_len):
        seq_x = train_x[j: (j + sequence_len),]
        seq_y = train_y[j: (j + sequence_len),]

        hiddens, outputs = forward(seq_x, layers)
        grad = mse_grad(seq_y, outputs)
        params = backward(layers, seq_x, lr, grad, hiddens)
        epoch_loss += mse(seq_y, outputs)

    # if epoch % 50 == 0:
    valid_loss = 0
    for j in range(valid_x.shape[0] - sequence_len):
        seq_x = valid_x[j: (j+sequence_len),]
        seq_y = valid_y[j: (j+sequence_len),]
        _, outputs = forward(seq_x, layers)
        valid_loss += mse(seq_y,outputs)
    print(f"Epoch: {epoch} train loss {epoch_loss / len(train_x)} valid loss {valid_loss / len(valid_x)}")

Epoch: 0 train loss 3433.71089821061 valid loss 2644.251124146831
Epoch: 1 train loss 1771.443825637194 valid loss 1358.8473333515356
Epoch: 2 train loss 894.5481972025864 valid loss 719.3313070693455
Epoch: 3 train loss 468.8375400812705 valid loss 401.17169571582457
Epoch: 4 train loss 263.29661326443454 valid loss 241.643701565443
Epoch: 5 train loss 164.29171328901373 valid loss 160.6026010843097
Epoch: 6 train loss 116.67961741772037 valid loss 118.71097744097042
Epoch: 7 train loss 93.84596915275891 valid loss 96.6091630463487
Epoch: 8 train loss 82.98127188970948 valid loss 84.70792008722387
Epoch: 9 train loss 77.93358105537212 valid loss 78.21579757013822
Epoch: 10 train loss 75.76313935757865 valid loss 74.72324814499964
Epoch: 11 train loss 75.09601168951343 valid loss 73.04256649500043
Epoch: 12 train loss 75.37166116231857 valid loss 72.71765087187686
Epoch: 13 train loss 76.65796969496003 valid loss 74.28906552108091
Epoch: 14 train loss 75.90095331411781 valid loss 74.67


KeyboardInterrupt

