In [13]:
import os

os.listdir('data')

['yf_data.csv',
 'bonds.csv',
 'cost.csv',
 'amzn.csv',
 'nvda.csv',
 'coststats.csv',
 'nflx.csv',
 'jpm.csv',
 'tslastats.csv',
 'aapl.csv']

In [10]:
import pandas as pd

filenames = ['aapl','amzn','nvda','cost','jpm','nflx']
df = pd.DataFrame()
date, id_, vol, close = [], [], [], []

for f in filenames:
    data = pd.read_csv(f'data/{f}.csv')[6:]
    id_.extend([f.upper() for _ in range(len(data))])
    date.extend(data['Security'].values)
    vol.extend(data['Unnamed: 2'].values.astype(int))
    close.extend(data[f'{f.upper()} US Equity'].values.astype(float))
    
df['ID']=id_
df['close']=close
df['DATE']=date
df['volume']=vol

df['DATE'] = pd.to_datetime(df['DATE'])
df.set_index(['DATE'], inplace=True)

df

Unnamed: 0_level_0,ID,close,volume
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-05-03,AAPL,184.820,62696844
2024-05-02,AAPL,173.030,94214915
2024-05-01,AAPL,169.300,50383147
2024-04-30,AAPL,170.330,65934776
2024-04-29,AAPL,173.500,68169419
...,...,...,...
2010-06-10,NFLX,16.951,24771964
2010-06-09,NFLX,16.490,39739875
2010-06-08,NFLX,15.761,24122266
2010-06-07,NFLX,15.594,26230715


In [39]:

for f in filenames:
    data = pd.read_csv(f'data/{f}.csv')[6:]
    print(data[f'{f.upper()} US Equity'], data['Security'])

6       184.82
7       173.03
8        169.3
9       170.33
10       173.5
         ...  
3527     8.424
3528     8.795
3529     9.142
3530     9.239
3531     9.513
Name: AAPL US Equity, Length: 3526, dtype: object 6       2024-05-03
7       2024-05-02
8       2024-05-01
9       2024-04-30
10      2024-04-29
           ...    
3527    2010-05-07
3528    2010-05-06
3529    2010-05-05
3530    2010-05-04
3531    2010-05-03
Name: Security, Length: 3526, dtype: object
6       187.34
7       184.72
8          179
9          175
10      180.96
         ...  
3594     5.944
3595     6.271
3596     6.302
3597     6.138
3598     5.974
Name: AMZN US Equity, Length: 3593, dtype: object 6       2024-05-03
7       2024-05-02
8       2024-05-01
9       2024-04-30
10      2024-04-29
           ...    
3594    2010-02-01
3595    2010-01-29
3596    2010-01-28
3597    2010-01-27
3598    2010-01-26
Name: Security, Length: 3593, dtype: object
6       881.43
7       858.17
8       830.41
9       864.02
10  

In [11]:
df.to_csv('bloomberg20240503.csv')

In [4]:
import pandas as pd
from typing import Dict

def security_price_correlation(X: pd.DataFrame, date_range: tuple) -> Dict[str, float]:
    # Assert that the necessary columns are present
    assert 'close' in X.columns and 'ID' in X.columns and 'DATE' in X.columns, "DataFrame must contain 'close', 'id', and 'date' columns."
    
    # Filter DataFrame for the specified date range
    X_filtered = X[(X['DATE'] >= date_range[0]) & (X['DATE'] <= date_range[1])]
    
    # Pivot the DataFrame to have dates as index and stocks as columns with close prices as values
    X_pivot = X_filtered.pivot(index='DATE', columns='ID', values='close')
    
    # Drop rows with any NaN values to ensure valid correlation computation
    X_pivot = X_pivot.dropna()
    
    # Compute the correlation matrix
    correlation_matrix = X_pivot.corr()
    
    # Initialize dictionary to store pairwise correlations
    pairwise_corr = {}
    
    # Extract pairwise correlations from the matrix
    stocks = correlation_matrix.columns
    for i in range(len(stocks)):
        for j in range(i + 1, len(stocks)):  # Ensure that each pair is only computed once
            # Construct the key as 'stock1:stock2'
            key = f"{stocks[i]}:{stocks[j]}"
            # Store the correlation in the dictionary
            pairwise_corr[key] = correlation_matrix.iloc[i, j]
    
    return pairwise_corr


In [7]:
D = security_price_correlation(df.reset_index(),('2023-01-01','2023-03-01'))

In [8]:
D

{'AAPL:AMZN': 0.7234532113000218,
 'AAPL:COST': 0.843234627768679,
 'AAPL:JPM': 0.7320844609484425,
 'AAPL:NFLX': 0.6993811830413621,
 'AAPL:NVDA': 0.9338000377467748,
 'AMZN:COST': 0.8888641240152809,
 'AMZN:JPM': 0.4677407313365499,
 'AMZN:NFLX': 0.8348191869567221,
 'AMZN:NVDA': 0.5989684224995746,
 'COST:JPM': 0.6210836483671388,
 'COST:NFLX': 0.8416841380525045,
 'COST:NVDA': 0.6929102763383007,
 'JPM:NFLX': 0.4333657642099699,
 'JPM:NVDA': 0.731280359149206,
 'NFLX:NVDA': 0.5166951974786309}