In [2]:
import numpy as np
import pandas as pd
import quantstats as qs
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt

from pypfopt.risk_models import sample_cov
from pypfopt.expected_returns import mean_historical_return

from robustcov.utils import init_mu_cov
from robustcov.runner import PortfolioCreater
from robustcov.optimizers import ConvexOptimizer
from robustcov.optimizers import NCOOptimizer


qs.extend_pandas()

In [3]:
df = pd.read_csv(Path('..') / 'data' / 'indices.csv')
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df = df.set_index('Date')


# convert price history to expected returns and covariance matrix
mu = mean_historical_return(df).values
cov = sample_cov(df).values


### Country clustering

In [4]:
names = df.columns.tolist() 
countries = np.array([i.split(':')[0] for i in names])
unique_countries, counts = np.unique(countries, return_counts=True)

df['sample'] = np.where(df.index < '2020-06-01', 'train', 'test')

In [7]:
def calc_stats(returns, weights, country, sample):
    weighted_returns = returns.mul(weights.T.mean(axis=1).values, axis=1)
    portfolio_returns = weighted_returns.sum(axis=1)

    return {
        'sample': sample,
        'country': country,
        'sharpe': portfolio_returns.sharpe(),
        'cumreturn': (1 + portfolio_returns).prod(),
        'win_rate': qs.stats.win_rate(portfolio_returns),
        'avg_win': qs.stats.avg_win(portfolio_returns),
        'consecutive_wins': qs.stats.consecutive_wins(portfolio_returns),
        'consecutive_loss': qs.stats.consecutive_losses(portfolio_returns),
        'max_drawdown': qs.stats.max_drawdown(portfolio_returns),
        'expected_return': qs.stats.expected_return(portfolio_returns),
        '1st_percentile_loss': qs.stats.outlier_loss_ratio(portfolio_returns),
        '99st_percentile_win': qs.stats.outlier_win_ratio(portfolio_returns),
        'profit_factor': qs.stats.profit_factor(portfolio_returns)
    }, portfolio_returns


In [10]:


train = df[df['sample'] == 'train']
test = df[df['sample'] == 'test']
train.pop('sample')
test.pop('sample')

statistics = []
portf_train_returns = {}
portf_test_returns = {}

for n, country in enumerate(unique_countries):

    print(f'{n}/{len(unique_countries)}', end='\r')
    country_mask = countries == country
    stocks = train.loc[:, country_mask]
    buy_and_hold = stocks.pct_change().mean(axis=1)

    mu = mean_historical_return(stocks).values
    cov = sample_cov(stocks).values

    if len(mu) < 4:
        print(country, 'skipped')
        continue

    optimizers = [
        # ConvexOptimizer()
        NCOOptimizer(max_clusters=len(cov) // 2)
    ]

    estimator = PortfolioCreater(
        min_var_portf=False,
        optimizers=optimizers,
        print_every=False,
        bandwidth=1,
        trials=10
    )

    results = estimator.estimate(mu, cov)
    weights = pd.DataFrame(results).T

    train_returns = train.loc[:, country_mask].pct_change()
    test_returns = test.loc[:, country_mask].pct_change()

    train_stats, train_returns = calc_stats(train_returns, weights, country, 'train')
    test_stats, test_returns = calc_stats(test_returns, weights, country, 'test')

    statistics.append(train_stats)
    statistics.append(test_stats)
    
    portf_train_returns.update({country: train_returns})
    portf_test_returns.update({country: test_returns})


Austria skipped
Botswana skipped
Bulgaria skipped
Colombia skipped
Croatia skipped
Egypt skipped
Greece skipped
20/58

  return _np.product(1 + returns) ** (1 / len(returns)) - 1


Hungary skipped
Kenya skipped
Malaysia skipped
Malta skipped
Norway skipped
Pakistan skipped
Philippines skipped
Poland skipped
Singapore skipped
Slovenia skipped
Taiwan skipped
Thailand skipped
Tunisia skipped
Ukraine skipped
56/58