# CS 267A - Final Project | Pyro

## Pyro Stock Modeling

In [None]:
!pip install --upgrade -q jax==0.1.57 jaxlib==0.1.37 numpyro
!pip install yfinance

import jax.numpy as np
from jax import random
import numpyro; numpyro.set_host_device_count(4)
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS
assert numpyro.__version__.startswith('0.2.4')

from scipy.stats import norm



In [None]:
def calc_ar(data, predict_len = 1):
    """AR Model for which parameters are calculated and not learned.

    Parameters:
    data (np.array): Array with training data
    predict_len (int): Number of time-steps for which future prediction in needed

    Returns:
    np.array: Array with predictions of specified length 
    """

    # Calculate psi
    # Source: Page 7 of https://www.math.utah.edu/~zhorvath/ar1.pdf
    num = 0
    for i in range(1, len(data)):
        num += data[i] * data[i-1]

    denom = 0
    for i in range(0, len(data) - 1):
        denom += data[i] * data[i]

    psi = num / denom

    # Find the distribution of epsilons
    epsilon_dist = [0]*(len(data)-1)
    for i in range(1, len(data)):
        epsilon_dist[i-1] = data[i] - psi * data[i - 1]
    epsilon_dist = np.array(epsilon_dist)
    (loc, scale) = norm.fit(epsilon_dist)
  
    # For prediction, sample some epsilons from the distribution
    epsilons = numpyro.sample("epsilons", dist.Normal(loc=loc, scale=scale), rng_key=random.PRNGKey(2), sample_shape=(1, predict_len))[0]

    # Make the predictions
    predictions = [0] * predict_len

    y_prev = data[-1]
    for i in range(0, predict_len):
        y_next =  epsilons[i] + y_prev * psi
        predictions[i] = y_next
        y_prev = y_next
    growth = [((predictions[i]-data[-1])/data[-1])] +[0]*(predict_len-1)
    for i in range(1,predict_len):
        growth[i] = (predictions[i]-predictions[i-1])/predictions[i-1]
    return np.array(growth)
def profit(test,truth):
    total = [0]*len(test[0])
    for i in range(0,len(test[0])):
        best_buy = np.argmax(test[:,i])
        total[i] = 100*(truth[best_buy,i]+1)
    return total
def get_dataset(price_data, n ,growth_data, train_size = 500, test_size = 50):
    
    start_point = n*train_size
    split_point = start_point + train_size
    final_point = split_point + test_size
    if final_point > len(price_data[0]):
        return len(price_data[0]) - start_point
    train = price_data[:,start_point:split_point]
    test = price_data[:,split_point:final_point]
    grow = growth_data[:,split_point:final_point]
    return train,grow
def get_results(train,train_len = 50):
    results = [0]*len(train)
    for i in range(0,len(train)):
      
      results[i]=(calc_ar(train[i],predict_len = train_len))
    print("is_it_calc?")
    return np.array(results)
def total_dataset(price_data,gt_data,train_size = 50):
    count = 0
    actual_total = []
    best_total = []
    while type(get_dataset(price_data,count,gt_data))!= type(1):
        print("count",count)
        dataset = get_dataset(price_data,count,gt_data, test_size = train_size)
        predictions = get_results(dataset[0],train_len = train_size)
        actual_total.append(profit(predictions,dataset[1]))
        best_total.append(profit(dataset[1],dataset[1]))
        count+=1
    return actual_total, best_total

In [None]:
first_round = total_dataset(price_data, gt_data)

count 0
is_it_calc?
count 1
is_it_calc?
count 2
is_it_calc?
count 3
is_it_calc?
count 4
is_it_calc?
count 5


In [None]:
dataset = get_dataset(price_data,4,gt_data)
predictions = get_results(dataset[0])
#actual_income = profit(predictions,dataset[1])
#best_income = profit(dataset[1],dataset[1])

is_it_calc?


In [None]:
103669.37569486132-118125.99133141339

-14456.615636552073