---
title: "Crowds"
author: "[Jason Foster](mailto:jason.j.foster@gmail.com)"
date: last-modified
categories:
  - analysis
  - finance
  - python
draft: true
execute:
  freeze: false
editor: 
  mode: source
---

In [None]:
# factors_r = ["SP500"] # "SP500" does not contain dividends
# factors_d = ["DTB3"]

In [None]:
#| echo: false
# exec(open("posts/helper-levels.py").read()) # local
# exec(open("../helper-levels.py").read())
# width = 20 * 3

# Parse web

In [None]:
import yfscreen as yfs

In [None]:
filters = ["eq", ["categoryname", "Tactical Allocation"]]
query = yfs.create_query(filters)
payload = yfs.create_payload("mutualfund", query, 250)
data = yfs.get_data(payload)

In [None]:
sorted_df = data.sort_values(
  by = [
    "netAssets.raw",
    "netExpenseRatio.raw",
    "firstTradeDateMilliseconds",
    "longName",
    "symbol"
  ],
  ascending = [False, True, True, True, True],
  kind = "stable"
)
tickers = sorted_df.loc[~sorted_df["netAssets.raw"].duplicated(), "symbol"].tolist()

In [None]:
allocations = ["IVV", "IDEV", "IUSB", "IEMG", "IJH", "IAGG", "IJR"]
tickers = tickers + allocations

# Optimization

In [None]:
import json
import cvxpy as cp

In [None]:
def min_rss_optim(x, y):
    
  w = cp.Variable(x.shape[1])
    
  objective = cp.Minimize(cp.sum_squares(y - x @ w))
    
  constraints = [cp.sum(w) == 1, w >= 0, w <= 1]
    
  problem = cp.Problem(objective, constraints)
  problem.solve()
    
  return w.value

In [None]:
#| echo: false
#| output: false # pause one second after five requests
# exec(open("posts/helper-prices-yfh.py").read()) # local
exec(open("../helper-prices-yfh.py").read()) # run then render

overlap_df = returns_df.rolling(scale["overlap"], min_periods = 1).mean()

# overlap_df = overlap_df.dropna()
# overlap_x_df = overlap_df[factors]
overlap_x_df = overlap_df[allocations]
# overlap_y_df = overlap_df.loc[:, ~overlap_df.columns.isin(factors + allocations)]
overlap_y_df = overlap_df.loc[:, ~overlap_df.columns.isin(allocations)]
# overlap_z_df = overlap_df[allocations]

In [None]:
def pnl(x):
  return np.nanprod(1 + x) - 1

In [None]:
performance_df = returns_df.rolling(width, min_periods = 1).apply(pnl, raw = False)

In [None]:
n_rows = overlap_df.shape[0]
result_ls = []
index_ls = []

# for i in range(width - 1, n_rows):
for i in range(n_rows - 1, n_rows):
  
  idx = range(max(i - width + 1, 0), i + 1)
  x_subset = overlap_x_df.iloc[idx]
  y_subset = overlap_y_df.iloc[idx]
  params_ls = []
  tickers_ls = []
  performance_ls = []
  
  for j in [ticker for ticker in tickers if ticker not in allocations]:
    
    idx = ~x_subset.isna().any(axis = 1) & ~y_subset[j].isna()
    x_complete = x_subset.loc[idx]
    y_complete = y_subset.loc[idx, j]
    
    if (x_complete.shape[0] > 0) and (y_complete.size > 0):
        
      params = min_rss_optim(x_complete.values, y_complete.values)
      params_ls.append(params)
      
      tickers_ls.append(j)
      
      performance_ls.append(performance_df[j].iloc[i])

  if params_ls:
    
    result = pd.DataFrame(params_ls, index = tickers_ls)
    result["performance"] = performance_ls
    
    result_ls.append(result)
    index_ls.append(overlap_x_df.index[i])

In [None]:
# json.dump([x.to_dict() for x in result_ls], open("result_ls.json", "w"))
# json.dump([x.isoformat() for x in index_ls], open("index_ls.json", "w"))

# Performance

In [None]:
# result_ls = [pd.DataFrame(x) for x in json.load(open("result_ls.json", "r"))]
# index_ls = [pd.Timestamp(x) for x in json.load(open("index_ls.json", "r"))]

In [None]:
def quantile_cut(x):
  
  result = pd.cut(
    -x,
    bins = np.nanquantile(-x, [0, 0.25, 0.5, 0.75, 1]),
    labels = ["Q1", "Q2", "Q3", "Q4"],
    include_lowest = True
  )
  
  return result

In [None]:
n_rows = len(result_ls)
numeric_cols = allocations + ["performance"]
score_ls = []

for i in range(n_rows):
  
  score_df = pd.DataFrame(result_ls[i])
  score_df.columns = numeric_cols
  
  score_df["date"] = index_ls[i]
  score_df["quantile"] = quantile_cut(score_df["performance"])

  score = score_df.groupby(["date", "quantile"], observed = True)[numeric_cols] \
    .mean() \
    .reset_index()

  overall_means = score_df[numeric_cols].mean()
  
  overall = pd.DataFrame({
    "date": [index_ls[i]],
    "quantile": ["Overall"],
    **{col: [overall_means[col]] for col in numeric_cols}
  })
  
  score = pd.concat([score, overall], ignore_index = True)
  
  score_ls.append(score)

In [None]:
score_df = pd.concat(score_ls, ignore_index = True)
print(score_df)

In [None]:
# score_df.to_json("score_df.json", date_format = "iso")