In [12]:
import numpy as np
import pandas as pd

import gymnasium as gym

import yfinance as yf

from matplotlib import pyplot as plt
import matplotlib.dates as mdates

In [13]:
start = '2021-01-01'
end = '2024-08-31'

In [14]:
#, "ABBV","ABEV", "AMD","GOOG", "KO", "DIS", "FORD", "BABA", "META", "WMT", "RCL", "GLMD", "TUP","JNJ", "SQSP", "WTI", "INTC", "MSFT", "MELI", "TSLA", "ISRG", "KHC", "C", "FIS", "TPR", "PGR", "CAT", "MRNA"
tickets = ["BBVA", "NVDA", "AMZN", "BABA"]
print(len(tickets), " stocks.")

4  stocks.


In [15]:
def get_federal_interest_rates(start, end):
    df = pd.read_csv("data/federal_interest_rates.csv")
    df.rename(columns={'DATE': 'date', 'DFF': 'federal_interest_rate'}, inplace=True)
    df.date = df.date.astype(str)
    return df

def deannualize(annual_rate, periods=365):
    return (1 + annual_rate) ** (1/periods) - 1

def get_risk_free_rate(start, end):
    rates = yf.download("^IRX", start, end)
    rates.reset_index(inplace=True, drop=False)
    rates = rates[["Date", "Close"]]
    rates.rename(columns={'Date': 'date', "Close":"free_rate"}, inplace=True)
    
    rates.date = rates.date.astype(str)
    #daily = annualized.apply(deannualize)
    return rates   

def calc_slope(x):
    slope = np.polyfit(range(len(x)), x, 1)[0]
    return slope

def get_signals(df, ticket, lags):
    cols = ["date", "price_"+ticket, ticket+'_volatility7', ticket+'_volatility30']
        
    N = len(cols)
    for i in range(len(lags)):
        cols.append(ticket+"_returns"+str(lags[i]))
        df[cols[i+N]] = df['Close'].shift(1).pct_change(periods=lags[i])

    N = len(cols)
    for i in range(len(lags)):
        cols.append(ticket+"_slope"+str(lags[i]))
        df[cols[i+N]] = df['Close'].shift(1).rolling(lags[i]).apply(calc_slope)
        
        
    df[ticket+'_volatility7'] = df['Close'].shift(1).pct_change().rolling(7).std()
    df[ticket+'_volatility30'] = df['Close'].shift(1).pct_change().rolling(30).std()
    df.rename(columns={"Close": "price_"+ticket}, inplace=True)
    return df[cols]

def get_data(ticket, start, end):
    dates = pd.DataFrame(pd.date_range(start=start, end=end), columns=["date"])
    dates.date = dates.date.astype(str)
    data = yf.download(ticket, start, end)
    data.reset_index(inplace=True, drop=False)
    data = data[["Date", "Close"]]
    data.rename(columns={'Date': 'date'}, inplace=True)
    data.date = data.date.astype(str)
    data = dates.merge(data, on="date", how="left")
    data = data.interpolate()
    return data

In [16]:
def get_stock_data(tickets, start, end):
    df_hist = pd.DataFrame(pd.date_range(start=start, end=end), columns=["date"])
    df_hist.date = df_hist.date.astype(str)
    fed = get_federal_interest_rates(start, end)
    rates = get_risk_free_rate(start, end)

    df_hist = df_hist.merge(fed, on="date", how="left")    
    df_hist = df_hist.merge(rates, on="date", how="left")    
    
    for ticket in tickets:
        print("Ticket:", ticket)
        data = get_data(ticket, start, end)        
        data = get_signals(data, ticket, [3, 7, 15, 30])
        df_hist = df_hist.merge(data, on="date", how="left")

    df_hist = df_hist.interpolate()
    return df_hist

In [17]:
data = get_stock_data(tickets, start, end)

[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rates.rename(columns={'Date': 'date', "Close":"free_rate"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rates.date = rates.date.astype(str)
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'Date': 'date'}, inplace=True)
A value is trying t

Ticket: BBVA


[*********************100%%**********************]  1 of 1 completed

Ticket: NVDA



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'Date': 'date'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.date = data.date.astype(str)
  data = data.interpolate()
[*********************100%%**********************]  1 of 1 completed

Ticket: AMZN



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'Date': 'date'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.date = data.date.astype(str)
  data = data.interpolate()
[*********************100%%**********************]  1 of 1 completed

Ticket: BABA



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'Date': 'date'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.date = data.date.astype(str)
  data = data.interpolate()
  df_hist = df_hist.interpolate()


In [18]:
data[data.date >= "2022-01-01"]

Unnamed: 0,date,federal_interest_rate,free_rate,price_BBVA,BBVA_volatility7,BBVA_volatility30,BBVA_returns3,BBVA_returns7,BBVA_returns15,BBVA_returns30,...,BABA_volatility7,BABA_volatility30,BABA_returns3,BABA_returns7,BABA_returns15,BABA_returns30,BABA_slope3,BABA_slope7,BABA_slope15,BABA_slope30
365,2022-01-01,0.07,0.039667,5.896667,0.004899,0.012587,0.001706,0.005567,0.029825,0.113852,...,0.043697,0.033703,0.034756,0.005481,-0.012141,-0.030207,3.350002,0.384286,-0.187226,-0.173611
366,2022-01-02,0.07,0.046333,5.923333,0.005047,0.011688,-0.002256,0.007116,0.068237,0.087946,...,0.043624,0.033708,0.064502,0.014410,-0.022768,-0.021967,-1.834999,0.686697,-0.043458,-0.165689
367,2022-01-03,0.08,0.053000,5.950000,0.005178,0.011529,0.005659,0.008656,0.067267,0.100991,...,0.043508,0.029931,-0.025531,0.023419,0.000974,0.070472,0.529999,0.911429,0.055869,-0.222396
368,2022-01-04,0.08,0.080000,6.070000,0.005294,0.011345,0.013629,0.010187,0.066308,0.090409,...,0.043348,0.029321,0.013385,0.032507,0.025675,0.039192,0.529999,1.021071,0.100410,-0.252436
369,2022-01-05,0.08,0.085000,5.970000,0.008000,0.011603,0.029395,0.035836,0.081996,0.096988,...,0.042784,0.028731,0.002011,0.041463,0.039652,-0.001336,-0.145000,0.651786,0.046071,-0.263028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1334,2024-08-27,5.33,4.968000,10.620000,0.009808,0.016092,-0.005666,0.005731,0.080369,-0.036890,...,0.024414,0.016407,-0.042735,-0.025623,0.014350,0.058702,-1.216667,0.081667,0.218691,0.247066
1335,2024-08-28,5.33,4.955000,10.560000,0.009369,0.016182,0.004730,0.022137,0.090349,-0.025390,...,0.020405,0.016386,-0.032584,0.005059,0.006674,0.045258,-0.763336,-0.395476,0.148548,0.235766
1336,2024-08-29,5.33,4.965000,10.560000,0.009745,0.016200,0.000948,0.009560,0.065590,-0.027624,...,0.017080,0.016896,-0.040453,-0.046809,-0.018249,0.012719,-1.070000,-0.727619,0.036155,0.215895
1337,2024-08-30,5.33,4.968000,10.650000,0.008958,0.016128,0.002849,0.017341,0.067745,-0.034735,...,0.019386,0.017096,-0.009051,-0.023385,0.019504,0.038718,-0.215000,-0.851549,-0.082524,0.200335


In [19]:
data[data.date >= "2022-01-01"].to_csv("data/train_portfolio4.csv", index=False)

In [20]:
start = '2024-07-01'
end = '2024-11-25'

In [21]:
test = get_stock_data(tickets, start, end)

[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rates.rename(columns={'Date': 'date', "Close":"free_rate"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rates.date = rates.date.astype(str)
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'Date': 'date'}, inplace=True)
A value is trying t

Ticket: BBVA
Ticket: NVDA
Ticket: AMZN



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'Date': 'date'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.date = data.date.astype(str)
  data = data.interpolate()
[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.rename(columns={'Date': 'date'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[r

Ticket: BABA


  df_hist = df_hist.interpolate()


In [22]:
test[test.date >= "2024-09-01"].to_csv("data/test_portfolio4.csv", index=False)