## Step 1: Export all the conversations from discord
## Step 2: Find all the json responses

In [None]:
from os import listdir
from os.path import isfile, join

dirpath = '~/source/discord/archive/20241229'
jsonfiles = [join(dirpath, f) for f in listdir(dirpath) if isfile(join(dirpath, f))]

## Step 3: Get all symphonies URLs from the json responses

In [7]:
import json
import pandas as pd
import numpy as np

def get_symphony_id(url):
    if url.endswith('/details'):
        return url.split('/')[-2]
    else :
        return url.split('/')[-1]

def get_symphonies(filename):
    with open(filename, 'r') as file:
        data = json.load(file)

    messages = data['messages']
    symphonies_dict = {}

    embeds = []
    for message in messages:
        name = message['author']['name']
        for embed in message['embeds']:
            fields = [field for field in embed['fields'] if field['name'] == 'Author']
            name = fields[0]['value'] if len(fields) > 0 else name
            embeds.append((embed, name))

    for embed, name in embeds:
        if 'url' not in embed or embed['url'] is None:
            continue
        if 'app.composer.trade/symphony' not in embed['url']:
            continue
        sid = get_symphony_id(embed['url'])
        symphonies_dict[sid] = {
            'title': embed['title'],
            'url': embed['url'],
            'timestamp': embed['timestamp'],
            'id': sid,
            'author': name
        }

    return symphonies_dict

def symphonies_to_df(symphonies_dict):
    titles, urls, timestamps, ids, names = [],[],[],[],[]
    for _, value in symphonies_dict.items():
        titles.append(value['title'])
        urls.append(value['url'])
        timestamps.append(value['timestamp'])
        ids.append(value['id'])
        names.append(value['author'])
    return pd.DataFrame(
        data={
            "symphony_sid": np.array(ids),
            "title":np.array(titles),
            "url":np.array(urls), 
            "timestamp":np.array(timestamps),
            "name": np.array(names)
        }
    )

symphonies_dict = {}
for target_file in jsonfiles:
    print("Processing file: {}".format(target_file))
    symphonies_dict.update(get_symphonies(target_file))


## Step 4: Export URL info into dataframe
## Step 5: Fetch all of them into memories

In [None]:
df = symphonies_to_df(symphonies_dict)

In [None]:
import requests
import time

def fetch_symphony(id, retry=3):
  if retry == 0:
    return False, 404, {}
  try:
    ok, status_code, data =  _fetch_symphony(id)
    if status_code == 429:
      print('Retry {}'.format(id))
      time.sleep(10)
      return fetch_symphony(id, retry-1)
    else: 
      return ok, status_code, data
  except:
    print('Retry {}'.format(id))
    time.sleep(1)
    return fetch_symphony(id, retry-1)

def _fetch_symphony(id):
  if id.endswith('/details'):
    id = id.split('/')[-2]
  else :
    id = id.split('/')[-1]
    
  url = f"https://backtest-api.composer.trade/api/v1/public/symphonies/{id}"

  data = requests.get(url)
  return data.ok, data.status_code, data.json()

def response_to_dataframe(symphony_data_list):
  symphony_dict = {}
  for symphony_data in symphony_data_list:
    for key, value in symphony_data.items():
      symphony_dict[key] = symphony_dict.get(key, [])
      symphony_dict[key].append(value)
  return pd.DataFrame.from_dict(symphony_dict)

symphony_sid_list = df['symphony_sid'].values
response_list = []
failure_list = []
for idx, sid in enumerate(symphony_sid_list):
  if idx % 20 == 0:
    print('Sleeping at index {} with failure {}'.format(idx, len(failure_list)))
    time.sleep(1)
  ok_status_code, status_code, json_data = fetch_symphony(sid)
  if ok_status_code:
    response_list.append(json_data)
  else:
    failure_list.append((idx, sid, status_code))

df_response = response_to_dataframe(response_list)

In [None]:
# Refetch the failure ones
response_sid_set = set([res['symphony_sid'] for res in response_list])
failure_sid_list = set([res[1] for res in failure_list])

for idx, sid in enumerate(symphony_sid_list):
  if sid in response_sid_set or sid in failure_sid_list:
    continue
  if idx % 20 == 0:
    print('Sleeping at index {} with failure {}'.format(idx, len(failure_list)))
    time.sleep(1)
  ok_status_code, status_code, json_data = fetch_symphony(sid)
  if ok_status_code:
    response_list.append(json_data)
  else:
    failure_list.append((idx, sid, status_code))

## Step 6: Export DF as OOS

In [4]:
import os
from datetime import date, datetime, timedelta

def write_json(json_data, filename):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(json_data, f, ensure_ascii=False, indent=4)

def read_json(filename):
    with open(filename) as f:
        return json.load(f)
    return None

def get_csv_name(filename, end_date=None):
    if end_date is None:
        _, end_date = get_start_end_date()
    path = "bin/SYMPHONIES-{}/{}.csv".format(end_date, filename)
    os.makedirs(os.path.dirname(path), exist_ok=True)
    return path

def get_start_end_date():
  today = date.today().strftime('%Y-%m-%d')
  start_date = '2000-01-01'
  end_date = today
  return start_date, end_date

df.to_csv(get_csv_name('SYMPHONIES'))
df_response = response_to_dataframe(response_list)
df_response.to_csv(get_csv_name('OOS'))

In [134]:
oos_name = get_csv_name('OOS', end_date = '2024-12-30')
df_response = pd.read_csv(oos_name)

In [None]:
# Health check
sid = set(df_response['symphony_sid'])
copies_sid = set(df_response['copied_from_symphony_sid'].dropna())
print("All: {} and Children: {}".format(len(sid), len(copies_sid)))

include_in_table = copies_sid.intersection(sid)
not_include_in_table = copies_sid - include_in_table
print("Include: {} and Not Include: {}".format(len(include_in_table), len(not_include_in_table)))

## Step 7: Run backtest on all the smphonies

In [None]:
# borrowed from https://github.com/howardr/polyphony/blob/main/src/composer.py
import json
import os
from datetime import date, datetime, timedelta
import requests


def fetch_backtest_raw(id, start_date, end_date, retry=3):
  if retry == 0:
    return False, 404, {}
  try:
    ok, status_code, data =  _fetch_backtest_raw(id, start_date, end_date)
    if status_code == 429:
      print('Retry {}'.format(id))
      time.sleep(10)
      return fetch_backtest_raw(id, start_date, end_date, retry-1)
    else: 
      return ok, status_code, data
  except:
    print('Retry {}'.format(id))
    time.sleep(1)
    return fetch_backtest_raw(id, start_date, end_date, retry-1)

def _fetch_backtest_raw(id, start_date, end_date):
  if id.endswith('/details'):
    id = id.split('/')[-2]
  else :
    id = id.split('/')[-1]

  payload = {
    "capital": 100000,
    "apply_reg_fee": True,
    "apply_taf_fee": True,
    "backtest_version": "v2",
    "slippage_percent": 0.0005,
    "start_date": start_date,
    "end_date": end_date,
  }

  url = f"https://backtest-api.composer.trade/api/v2/public/symphonies/{id}/backtest"
  print("{} {} {} {}".format(id, start_date, end_date, url))

  data = requests.post(url, json=payload)
  # return data.json()
  return data.ok, data.status_code, data.json()

def get_backtest_and_symphony_name(jsond):
  id = list(jsond['legend'].keys())[0]
  symphony_name = jsond['legend'][id]['name']

  holdings = jsond["last_market_days_holdings"]
  start_date = convert_trading_date(jsond["first_day"])
  end_date = convert_trading_date(jsond["last_market_day"])

  tickers = []
  for ticker in holdings:
    tickers.append(ticker)

  allocations = jsond["tdvm_weights"]
  date_range = pd.date_range(start=start_date, end=end_date)
  df_allocations = pd.DataFrame(0.0, index=date_range, columns=tickers)

  for ticker in allocations:
    for date_int in allocations[ticker]:
      trading_date = convert_trading_date(date_int)
      percent = allocations[ticker][date_int]
      df_allocations.at[trading_date, ticker] = percent

  capital_return = jsond['dvm_capital']
  df_return = pd.DataFrame(0.0, index=date_range, columns=['capital'])
  for date_int in capital_return[id]:
    trading_date = convert_trading_date(date_int)
    df_return.at[trading_date, 'capital'] = capital_return[id][date_int]

  stats = jsond['stats']
  
  return df_allocations, df_return, stats, symphony_name, id

def convert_trading_date(date_int):
  date_1 = datetime.strptime("01/01/1970", "%m/%d/%Y")
  dt = date_1 + timedelta(days=int(date_int))
  return dt

In [None]:
import time
start_date, end_date = get_start_end_date()

rlist, flist = [], []
for idx, row in df.iterrows():
    if idx % 20 == 0:
        print('Sleeping at index {} with failure {}'.format(idx, len(flist)))
        time.sleep(1)
    sid = row['symphony_sid']
    filename = "bin/BT-{}/{}.json".format(end_date, sid)
    # print("Backtesting {} and writing to {}".format(sid, filename))
    ok_status_code, status_code, jsond = fetch_backtest_raw(row['symphony_sid'], start_date, end_date)
    write_json(jsond, filename)
    if ok_status_code:
        rlist.append(json_data)
    else:
        flist.append((idx, sid, status_code))

## Step 8: Export the backtest information 

In [None]:
from os import listdir
from os.path import isfile, join

dirpath = 'bin/BT-2024-12-30/'
jsonfiles = [join(dirpath, f) for f in listdir(dirpath) if isfile(join(dirpath, f))]

dict_allocation, dict_return, dict_stats, dict_name = {}, {}, {}, {}
for index, jsonfile in enumerate(jsonfiles):
    if index % 20 == 0:
        print('Index: {}'.format(index))
    try:
        jsond = read_json(jsonfile)
        df_allocations, df_return, stats, symphony_name, id = get_backtest_and_symphony_name(jsond)
        dict_name[id] = symphony_name
        dict_stats[id] = stats
        dict_return[id] = df_return
        dict_allocation[id] = df_allocations
    except:
        print('Fail jsonfile {}'.format(jsonfile))

In [140]:
import quantstats as qs
def get_portfolio_values(df):
    # get rid of data prior to start of backtest and non-trading days
    df = df.loc[(df != 0).any(axis=1)]
    return df.iloc[:,0]

# portfolio_values = get_portfolio_values(dict_return['jLlgKCXl1P4iP84bSmC1'])
# qs.stats.smart_sharpe(portfolio_values)

In [None]:
import quantstats as qs
import inspect

dict_quant_stats = {}
qs.extend_pandas()
skip_func = {'comp','compsum', 'distribution', 'implied_volatility', 'monthly_returns', 
             'to_drawdown_series', 'outliers', 'remove_outliers', 'rolling_sharpe', 'rolling_sortino',
             'rolling_volatility', 'pct_rank'}
function_names = [f for f in dir(qs.stats) if f[0] != '_' and f not in skip_func]

for key, value in dict_return.items():
    portfolio_values = get_portfolio_values(value)
    benchmark = 'SPY'
    temp_dict = {}
    benchmark_func = []
    for function_name in function_names:
        func = getattr(qs.stats, function_name)
        args = set(inspect.getfullargspec(func).args)
        if 'returns' in args or 'prices' in args:
            if 'benchmark' not in args:
                temp_dict[function_name] = func(portfolio_values)
    dict_quant_stats[key] =  temp_dict           


In [142]:
def extract_date(row):
    return pd.to_datetime(row['last_semantic_update_at'][:10])
df_response['backtest_start_date'] = df_response.apply(extract_date, axis=1)
sid_to_oos = dict(df_response[['symphony_sid', 'backtest_start_date']].values)

In [None]:
dict_quant_oos_stats = {}

for key, value in dict_return.items():
    oos_start_date = sid_to_oos[key]
    # portfolio_values = value.loc[oos_start_date:].iloc[:,0]
    portfolio_values = get_portfolio_values(value.loc[oos_start_date:])
    benchmark = 'SPY'
    temp_dict = {}
    benchmark_func = []
    for function_name in function_names:
        func = getattr(qs.stats, function_name)
        args = set(inspect.getfullargspec(func).args)
        if 'returns' in args or 'prices' in args:
            if 'benchmark' not in args:
                try:
                    temp_dict[function_name] = func(portfolio_values)
                except:
                    temp_dict[function_name] = None
    dict_quant_oos_stats[key] =  temp_dict          

In [144]:
# dict_allocation, dict_return, dict_stats, dict_name, dict_quant_oos_stats, dict_quant_stats
def convert_sid_dict_to_df(sid_to_name, sid_to_values):
    columns = set()
    df_dict = { 'sid': [], 'name': [] }
    for sid, values_in_dict in sid_to_values.items():
        columns = columns.union(values_in_dict.keys())
    for column in columns:
        df_dict[column] = []
    for sid, name in sid_to_name.items():
        if sid not in sid_to_values:
            print('SID {} {} not in backtest.'.format(sid, name))
            continue
        backtest_stats = sid_to_values[sid]
        df_dict['sid'].append(sid)
        df_dict['name'].append(name)
        for column in columns:
            if column in backtest_stats:
                df_dict[column].append(backtest_stats[column])
            else:
                df_dict[column].append(None)
    return pd.DataFrame(df_dict)


def merge_dicts(dict1, dict2):
    result = dict1.copy()
    for key, value in dict2.items():
        if key in result:
            result[key].update(value)
        else:
            result[key] = value
    return result

In [145]:
dict_backtest_stats = merge_dicts(dict_stats, dict_quant_stats)
df_backtest_stats = convert_sid_dict_to_df(dict_name, dict_backtest_stats)
df_backtest_stats.to_csv(get_csv_name('BACKTEST'))

In [146]:
df_oos_stats = convert_sid_dict_to_df(dict_name, dict_quant_oos_stats)
df_oos_stats.to_csv(get_csv_name('OOS'))

In [None]:
# dict_quant_stats['apfULtEPpM7VqWodougq']
dict_quant_stats.columns