In [129]:
import json
import numpy as np
from datetime import datetime

In [130]:
def filter_dates_before(input_dict, specified_date):
    """
    Filters the input dictionary to include only entries with dates before the specified date.

    Parameters:
    input_dict (dict): A dictionary with dates as keys in 'yyyy-mm-dd' format.
    specified_date (str): A date in 'yyyy-mm-dd' format to filter the dictionary.

    Returns:
    dict: A new dictionary containing only the entries with dates before the specified date.
    """
    if specified_date is None:
        return input_dict
    # Convert the specified date string to a datetime object
    specified_date_obj = datetime.strptime(specified_date, '%Y-%m-%d')
    
    # Create a new dictionary to hold the filtered results
    filtered_dict = {}
    
    # Iterate through the input dictionary
    for date_str, data in input_dict.items():
        # Convert the date string to a datetime object
        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        
        # Check if the date is before the specified date
        if date_obj < specified_date_obj:
            filtered_dict[date_str] = data
            
    return filtered_dict

In [131]:


def read_input_file(input_file_name:str, date_limit=None):
    '''Given a file, it returns the data related to it,
    ignoring data before `date_limit`
    '''
    with open(input_file_name, 'r') as file:
        data = json.load(file)
    
    ######################
    # DATA PREPROCESSING #
    ######################

    # Filter by date
    for a in data['assets']:
        data['assets'][a]['history'] = filter_dates_before(data['assets'][a]['history'], date_limit)

    # Filter out all assets with a number of history items different than the trend (i.e. 751)
    # We recognize that this approach works well only in cases where the number of outliers is reduced
    # as in this case (9 out of ~900 assets).

    filtered_data = {key:data[key] for key in ['evaluation_date']}
    filtered_data['assets'] = {}
    for a in data['assets']:
        if len(data['assets'][a]['history']) == 751:
            filtered_data['assets'][a] = data['assets'][a]

    return filtered_data 


In [132]:
data = read_input_file("../eth_hackathon/input_one_day.json")

In [133]:
def covariance_matrix(data):
    '''Returns the covariance matrix of the assets in `data`
    using history up to date `date_limit`'''

    assets = list(data['assets'].keys())
    # number of history elements (assumed to be equal for all assets)
    num_history_dates = len(data['assets'][assets[0]]['history'])
    # print(num_history_dates)
    # number of assets we are taking into account
    N_considered_assets = len(data['assets'])

    # Each row of assets_matrix represents a variable, and each column a single observation of all those variables.
    assets_matrix = np.empty((N_considered_assets, num_history_dates))

    for i, asset in enumerate(assets):
        history = data['assets'][asset]['history']
        ##TODO add filtering by date
        assets_matrix[i, :] = list(history.values())

    return np.corrcoef(assets_matrix)

print(covariance_matrix(data))

[[ 1.         -0.01979352 -0.14791098 ... -0.17340911 -0.04348772
   0.00799467]
 [-0.01979352  1.          0.87165607 ...  0.54388361  0.75285394
   0.9037155 ]
 [-0.14791098  0.87165607  1.         ...  0.62456203  0.72692153
   0.79919744]
 ...
 [-0.17340911  0.54388361  0.62456203 ...  1.          0.49634856
   0.5453978 ]
 [-0.04348772  0.75285394  0.72692153 ...  0.49634856  1.
   0.82822657]
 [ 0.00799467  0.9037155   0.79919744 ...  0.5453978   0.82822657
   1.        ]]


In [134]:
for a in data['assets']:
    if not a:
        print('ciao')