In [None]:
import xarray as xr
xr.set_options(keep_attrs=True,
               display_expand_data=False)
import pandas as pd

import plotly.express as px
import plotly.io as pio
# pio.renderers.default='png'

from stats import get_factor_data


# MDS

In [None]:
from numpy import sqrt, cos, sin, arctan2, array

def transform_coordinates(coordinates: pd.DataFrame, transformation_type=None, factor: str = 'SPY'):
    
    def get_rotation_matrix(theta):
        return array([[cos(theta), -sin(theta)],
                      [sin(theta), cos(theta)]])
        
    def rotate(coordinates, theta):
        rotation_matrix = get_rotation_matrix(theta)
        transformed_coordinates = (rotation_matrix @ coordinates.T).T
        transformed_coordinates.columns = coordinates.columns
        return transformed_coordinates #.rename(columns=dict(enumerate(df_t.columns)))
    
    if transformation_type is None:
        return coordinates
    
    v = coordinates.loc[factor]
    x, y = v
        
    if transformation_type == 'rotate':
        theta = arctan2(y, x)
        return coordinates.pipe(rotate, -theta)
    
    if transformation_type == 'normalize':
        theta = arctan2(y, x)
        length = sqrt(sum(v**2))
        return coordinates.pipe(rotate, -theta).div(length)


In [None]:
from sklearn.manifold import MDS
from numpy import sqrt

def multidimensional_scaling(correlation_matrix: pd.DataFrame, init=None) -> pd.DataFrame:
    """
    Perform multidimensional scaling on a correlation matrix.

    Parameters
    ----------
    correlation_matrix : pd.DataFrame
        The input correlation matrix as a pandas DataFrame.
    init : np.ndarray, optional
        Initial positions of the points in the embedding space. If None, random initialization is used.

    Returns
    -------
    pd.DataFrame
        A DataFrame containing the MDS results with dimensions 'dim1' and 'dim2'.
    """
    
    dissimilarity_matrix = sqrt(1 - correlation_matrix**2)

    # Pass n_init explicitly to suppress warning when init is not None:
    n_init = 4 if init is None else 1 
    embedding = MDS(dissimilarity='precomputed', random_state=42, n_init=n_init)
    coordinates = embedding.fit_transform(dissimilarity_matrix, init=init)
    
    return pd.DataFrame(coordinates, 
                        index=dissimilarity_matrix.index, 
                        columns=pd.Index(['dim1', 'dim2'], name='dimension'))


def mds_ts_df(corr: xr.DataArray, start_date = None, transformation=None, factor='SPY') -> pd.DataFrame:
    # TODO: Factor out corr_type
    dates = data.corr.sel(date=slice(start_date, None)).date.values
    coordinates = None
    
    mds_dict = {}
    for date in dates:
        df = corr.sel(date=date, corr_type=63).to_pandas()
        coordinates = multidimensional_scaling(df, init=coordinates)
        transformed = transform_coordinates(coordinates, transformation, factor=factor)
        mds_dict[date] = transformed
    return (pd.concat(mds_dict)
            .rename_axis(index=['date', df.index.name])
            # .stack()
            # .rename('mds')
            # .to_xarray()
            )


# def draw_mds(df: pd.DataFrame) -> px.scatter:
#     # df.plot.scatter(x='dim1', y='dim2')
#     fig_format = {'template': 'plotly_white', 'height': 750, 'width': 750}
#     fig = (px.scatter(df.join(factor_master).reset_index(), 
#                       x='dim1', y='dim2', text='asset', color='asset_class',
#                      **fig_format)
#            .update_traces(textposition='middle right', textfont=dict(color='lightgray'))
#            .update_xaxes(title=None)
#            .update_yaxes(title=None))
#     return fig


def draw_mds_ts(df: pd.DataFrame) -> px.scatter:
    # df.plot.scatter(x='dim1', y='dim2')
    fig_format = {'template': 'plotly_white', 'height': 750, 'width': 750}
    
    fig = (px.scatter(df, 
                      x='dim1', y='dim2', text='asset', color='asset_class', 
                      animation_frame='date',
                     **fig_format)
           .update_traces(textposition='middle right', textfont=dict(color='lightgray'))
           .update_xaxes(title=None)
           .update_yaxes(title=None))
    return fig

In [None]:
max_tick = max([abs(df1['dim1'].max()),
                abs(df1['dim2'].max()),
                abs(df1['dim1'].min()),
                abs(df1['dim2'].min())
                ])

In [None]:
def draw_mds_ts(df: pd.DataFrame) -> px.scatter:
    # df.plot.scatter(x='dim1', y='dim2')
    fig_format = {'template': 'plotly_white', 'height': 750, 'width': 750}
    
    # max_tick = df[['dim1', 'dim2']].abs().max().max()
    # max_tick = 4
    
    fig = (px.scatter(df, 
                      x='dim1', y='dim2', text='asset', color='asset_class', 
                      animation_frame='date',
                     **fig_format)
           .update_traces(textposition='middle right', textfont=dict(color='lightgray'))
           .update_xaxes(title=None) #, range=(-max_tick, max_tick))
           .update_yaxes(title=None) #, range=(-max_tick, max_tick)))
           )
        #    .update_xaxes(min=-max_tick, max=max_tick)
        #    .update_yaxes(min=-max_tick, max=max_tick)
        
    return fig


# Collect data 

In [None]:
factor_data = get_factor_data()
data = factor_data
factor_master = pd.DataFrame(data.asset.attrs).T


# Run MDS

In [None]:
for transformation in [None, 'rotate', 'normalize']:
    df = mds_ts_df(data.corr, transformation=transformation, start_date='2020')
    df1 = df.reset_index().join(factor_master, on='asset')
    df1['date'] = df1['date'].astype(str)
    fig = draw_mds_ts(df1)
    fig.show()