In [51]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize

# 1. Expanded stock universe
industries = {
    'Technology': ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'NVDA', 'ADBE', 'INTC', 'CSCO', 'CRM', 'ORCL', 'TXN', 'IBM', 'HPQ', 'QCOM', 'MU'],
    'Consumer Staples': ['PEP', 'KO', 'PG', 'MDLZ', 'GIS', 'HSY', 'CL', 'K', 'WMT', 'COST', 'TGT', 'KR', 'SYY', 'TSN'],
    'Healthcare': ['PFE', 'MRK', 'JNJ', 'UNH', 'ABT', 'ABBV', 'GILD', 'BIIB', 'LLY', 'BMY', 'DHR', 'TMO', 'CVS', 'ANTM'],
    'Financials': ['JPM', 'GS', 'MS', 'BAC', 'C', 'AXP', 'USB', 'BK', 'WFC', 'BLK', 'TROW', 'SPGI', 'SCHW', 'V', 'MA'],
    'Energy': ['XOM', 'CVX', 'COP', 'SLB', 'PSX', 'MPC', 'EOG', 'HAL', 'BKR', 'VLO', 'OXY', 'HES'],
    'Industrials': ['BA', 'CAT', 'GE', 'MMM', 'DE', 'UPS', 'FDX', 'LMT', 'RTX', 'HON', 'ETN', 'GD', 'EMR'],
    'Utilities': ['NEE', 'DUK', 'SO', 'D', 'EXC', 'AEP', 'XEL', 'ES', 'PEG', 'WEC', 'SRE', 'PCG'],
    'Consumer Discretionary': ['TSLA', 'HD', 'NKE', 'SBUX', 'MCD', 'LOW', 'TJX', 'LVS', 'MAR', 'ROST', 'DLTR', 'YUM'],
    'Real Estate': ['AMT', 'PLD', 'EQIX', 'PSA', 'DLR', 'VTR', 'SPG', 'O', 'SBAC', 'CCI', 'WY', 'EQR'],
    'Communication Services': ['FB', 'GOOGL', 'DIS', 'CMCSA', 'VZ', 'T', 'NFLX', 'TMUS', 'CHTR', 'TWTR', 'VIAC'],
    'Materials': ['LIN', 'APD', 'SHW', 'ECL', 'PPG', 'NUE', 'DD', 'FCX', 'VMC', 'MLM', 'IP', 'LYB']
}

# Combine all stocks
stocks = [ticker for industry in industries.values() for ticker in industry]

# 2. Fetch historical data for the last 6 months (or longer for correlation computation)
data = yf.download(stocks, start='2024-01-01', end='2024-10-18')['Adj Close']


[*********************100%***********************]  142 of 142 completed

4 Failed downloads:
['TWTR', 'FB', 'VIAC', 'ANTM']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


In [52]:
data['KR'].pct_change()

Date
2024-01-02 00:00:00+00:00         NaN
2024-01-03 00:00:00+00:00    0.005403
2024-01-04 00:00:00+00:00   -0.013328
2024-01-05 00:00:00+00:00    0.003268
2024-01-08 00:00:00+00:00   -0.001737
                               ...   
2024-10-11 00:00:00+00:00    0.002717
2024-10-14 00:00:00+00:00   -0.001626
2024-10-15 00:00:00+00:00    0.015741
2024-10-16 00:00:00+00:00    0.009084
2024-10-17 00:00:00+00:00   -0.001942
Name: KR, Length: 201, dtype: float64

In [53]:
data['GIS'].pct_change().shift(-1)

Date
2024-01-02 00:00:00+00:00   -0.000300
2024-01-03 00:00:00+00:00   -0.014836
2024-01-04 00:00:00+00:00   -0.011409
2024-01-05 00:00:00+00:00   -0.005385
2024-01-08 00:00:00+00:00    0.005152
                               ...   
2024-10-11 00:00:00+00:00    0.000283
2024-10-14 00:00:00+00:00    0.005647
2024-10-15 00:00:00+00:00   -0.003229
2024-10-16 00:00:00+00:00   -0.004507
2024-10-17 00:00:00+00:00         NaN
Name: GIS, Length: 201, dtype: float64

In [54]:
#previous_pairs = 
set(selected_pairs) - set(check_correlation_threshold(data, selected_pairs))

{('AEP', 'ES'),
 ('BK', 'BAC'),
 ('BK', 'BLK'),
 ('BK', 'WFC'),
 ('C', 'BK'),
 ('CAT', 'EMR'),
 ('CCI', 'SBAC'),
 ('CRM', 'ADBE'),
 ('CSCO', 'TXN'),
 ('D', 'ES'),
 ('FDX', 'ETN'),
 ('LMT', 'HON'),
 ('MS', 'C'),
 ('MS', 'SCHW'),
 ('NEE', 'EXC'),
 ('NFLX', 'DIS'),
 ('NVDA', 'QCOM'),
 ('O', 'AMT'),
 ('O', 'SBAC'),
 ('PLD', 'AMT'),
 ('PLD', 'SBAC'),
 ('SBAC', 'PLD'),
 ('SRE', 'ES'),
 ('TXN', 'QCOM'),
 ('USB', 'SCHW'),
 ('V', 'SCHW'),
 ('WEC', 'DUK'),
 ('WEC', 'ES'),
 ('WEC', 'EXC'),
 ('XEL', 'ES')}

In [55]:
selected_pairs

[('NVDA', 'QCOM'),
 ('CSCO', 'TXN'),
 ('CRM', 'ADBE'),
 ('TXN', 'QCOM'),
 ('LLY', 'TMO'),
 ('MS', 'C'),
 ('MS', 'SCHW'),
 ('C', 'BK'),
 ('USB', 'SCHW'),
 ('BK', 'BAC'),
 ('BK', 'WFC'),
 ('BK', 'BLK'),
 ('V', 'SCHW'),
 ('CAT', 'EMR'),
 ('FDX', 'CAT'),
 ('FDX', 'ETN'),
 ('LMT', 'HON'),
 ('NEE', 'EXC'),
 ('D', 'DUK'),
 ('D', 'EXC'),
 ('D', 'AEP'),
 ('D', 'ES'),
 ('AEP', 'DUK'),
 ('AEP', 'EXC'),
 ('AEP', 'ES'),
 ('XEL', 'EXC'),
 ('XEL', 'AEP'),
 ('XEL', 'ES'),
 ('XEL', 'WEC'),
 ('XEL', 'SRE'),
 ('WEC', 'DUK'),
 ('WEC', 'EXC'),
 ('WEC', 'ES'),
 ('SRE', 'EXC'),
 ('SRE', 'ES'),
 ('PLD', 'AMT'),
 ('PLD', 'O'),
 ('PLD', 'SBAC'),
 ('O', 'AMT'),
 ('O', 'SBAC'),
 ('SBAC', 'PLD'),
 ('SBAC', 'EQR'),
 ('CCI', 'PLD'),
 ('CCI', 'SBAC'),
 ('NFLX', 'DIS')]

In [56]:
correlation_threshold = 0.40
principal = 500000

# 3. Define lagged correlation function for signals
def ew_lagged_return_correlation(stock_a, stock_b, span=60):
    """Calculates the 1-day lagged exponential weighted correlation between two stock price series."""
    # Calculate daily returns
    returns_a = stock_a.pct_change()
    returns_b = stock_b.pct_change()

    # Calculate lagged returns
    lagged_returns_a = returns_a.shift(1)
    lagged_returns_b = returns_b.shift(1)

    # Calculate exponentially weighted moving averages for lagged correlations
    correlation_a_with_lagged_b = returns_a.ewm(span=span).corr(lagged_returns_b)
    correlation_b_with_lagged_a = returns_b.ewm(span=span).corr(lagged_returns_a)

    return correlation_a_with_lagged_b, correlation_b_with_lagged_a

# 4. Generate signals based on lagged asset returns and correlation
def generate_lagged_signals(data, correlation_threshold=correlation_threshold, span=30):
    """Generates signals based on lagged asset return and correlation."""
    signals = pd.Series(index=data.columns)
    selected_pairs = []
    selected_non_lagged_stocks = []

    print("\nCorrelated Pairs Above Threshold:")
    for industry_name, industry_stocks in industries.items():
        for stock_a in industry_stocks:
            for stock_b in industry_stocks:
                if stock_a != stock_b:
                    # Calculate 1-day lagged return correlations
                    corr_a_with_lagged_b, corr_b_with_lagged_a = ew_lagged_return_correlation(data[stock_a], data[stock_b], span=span)

                    # Today's correlation values
                    correlation_today_a_lag_b = corr_a_with_lagged_b.iloc[-1]
                    correlation_today_b_lag_a = corr_b_with_lagged_a.iloc[-1]

                    # Apply the trading rules based on correlation values
                    if abs(correlation_today_a_lag_b) > correlation_threshold or abs(correlation_today_b_lag_a) > correlation_threshold:
                        # Print the correlated pair
                        print(f"{stock_a} (correlated with {stock_b}) - correlation a->lagged_b: {correlation_today_a_lag_b:.2f}, b->lagged_a: {correlation_today_b_lag_a:.2f}")

                        # Use the lagged return of stock_b
                        lagged_return_b = data[stock_b].pct_change().shift(1).iloc[-1]

                        # Case 1: Positive correlation and lagged return positive -> go long
                        if correlation_today_a_lag_b > correlation_threshold and lagged_return_b > 0:
                            signals[stock_a] = 1
                            selected_pairs.append((stock_a, stock_b))
                            selected_non_lagged_stocks.append(stock_a)

                        # Case 2: Negative correlation and lagged return negative -> go long
                        elif correlation_today_a_lag_b < -correlation_threshold and lagged_return_b < 0:
                            signals[stock_a] = 1
                            selected_pairs.append((stock_a, stock_b))
                            selected_non_lagged_stocks.append(stock_a)

                        # Case 3: Do nothing when the other conditions are not met
                        else:
                            signals[stock_a] = 0

    return signals, selected_pairs, selected_non_lagged_stocks

# 5. Mean-Variance Optimization Function
def mean_variance_optimizer(returns):
    """Optimizes the portfolio weights to achieve the best risk-return trade-off."""
    mean_returns = returns.mean()
    cov_matrix = returns.cov()

    # Number of assets
    num_assets = len(mean_returns)

    # Objective Function: Negative Sharpe Ratio
    def portfolio_performance(weights):
        """Calculates portfolio performance."""
        portfolio_return = np.dot(weights, mean_returns)
        portfolio_risk = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        return -portfolio_return / portfolio_risk  # Minimize negative Sharpe ratio

    # Constraints: sum of weights = 1
    constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})
    # Bounds: weights between 0 and 1 (long-only)
    bounds = tuple((0, 1) for _ in range(num_assets))

    # Initial guess: Equal weights
    initial_weights = num_assets * [1. / num_assets]

    # Optimize portfolio using SLSQP
    optimized = minimize(portfolio_performance, initial_weights, method='SLSQP', bounds=bounds, constraints=constraints)
    
    return optimized.x

# 6. Today's trading signals based on lagged assets and filtered for optimization
def trade_today(data, principal=100000, correlation_threshold=correlation_threshold):
    """Generates signals for trading today and applies mean-variance optimization."""
    # Generate signals based on lagged asset strategy
    signals, selected_pairs, selected_non_lagged_stocks = generate_lagged_signals(data, correlation_threshold)

    # Filter the returns data to only include selected non-lagged stocks
    selected_data = data[selected_non_lagged_stocks]
    
    # Calculate returns over the last 6 months for the selected stocks (for mean-variance optimization)
    six_month_returns = selected_data.pct_change().dropna()

    # Optimize portfolio weights based on the selected non-lagged stocks
    optimized_weights = mean_variance_optimizer(six_month_returns)

    # Calculate dollar allocation for each asset
    allocation = {stock: weight * principal for stock, weight in zip(selected_non_lagged_stocks, optimized_weights) if weight > 0.001}

    return allocation, selected_pairs

# 7. Rebalancing function: check correlations for future rebalancing
def check_correlation_threshold(data, selected_pairs, correlation_threshold=correlation_threshold, span=30):
    """Checks if the selected pairs have dropped below the correlation threshold."""
    dropped_pairs = []
    for stock_a, stock_b in selected_pairs:
        # Recompute the rolling correlation
        corr_a_with_lagged_b, corr_b_with_lagged_a = ew_lagged_return_correlation(data[stock_a], data[stock_b], span=span)
        
        # Today's correlation values
        correlation_today_a_lag_b = corr_a_with_lagged_b.iloc[-1]
        correlation_today_b_lag_a = corr_b_with_lagged_a.iloc[-1]

        # Check if either correlation has dropped below the threshold
        if abs(correlation_today_a_lag_b) < correlation_threshold and abs(correlation_today_b_lag_a) < correlation_threshold:
            dropped_pairs.append((stock_a, stock_b))

    return dropped_pairs

# 8. Run today's trade and get the results
allocation, selected_pairs = trade_today(data, principal=principal)

# Output: Print the allocation and pairs for the next rebalancing
print("\nOptimized Portfolio Allocation (in dollars):")
for stock, alloc in allocation.items():
    print(f"{stock}: ${alloc:.2f}")

print("\nSelected Correlated Pairs for Trading:")
for stock_a, stock_b in selected_pairs:
    print(f"{stock_a} (based on {stock_b})")

# Output of selected pairs for future use
print("\nPairs to monitor for future rebalancing:")
for stock_a, stock_b in selected_pairs:
    print(f"Pair: {stock_a} and {stock_b}")

# # 9. Optional: Check if correlations have dropped below threshold for rebalancing
# dropped_pairs = check_correlation_threshold(data, selected_pairs)
# if dropped_pairs:
#     print("\nPairs to drop (correlation below threshold):")
#     for stock_a, stock_b in dropped_pairs:
#         print(f"{stock_a} (correlated with {stock_b})")
# else:
#     print("\nNo pairs to drop.")



Correlated Pairs Above Threshold:
NVDA (correlated with QCOM) - correlation a->lagged_b: -0.41, b->lagged_a: -0.12
ADBE (correlated with CRM) - correlation a->lagged_b: -0.34, b->lagged_a: -0.46
CSCO (correlated with TXN) - correlation a->lagged_b: -0.45, b->lagged_a: -0.16
CRM (correlated with ADBE) - correlation a->lagged_b: -0.46, b->lagged_a: -0.34
TXN (correlated with CSCO) - correlation a->lagged_b: -0.16, b->lagged_a: -0.45
TXN (correlated with QCOM) - correlation a->lagged_b: -0.54, b->lagged_a: -0.06
QCOM (correlated with NVDA) - correlation a->lagged_b: -0.12, b->lagged_a: -0.41
QCOM (correlated with TXN) - correlation a->lagged_b: -0.06, b->lagged_a: -0.54
PFE (correlated with DHR) - correlation a->lagged_b: 0.15, b->lagged_a: -0.41
UNH (correlated with DHR) - correlation a->lagged_b: 0.05, b->lagged_a: 0.40
DHR (correlated with PFE) - correlation a->lagged_b: -0.41, b->lagged_a: 0.15
DHR (correlated with UNH) - correlation a->lagged_b: 0.40, b->lagged_a: 0.05
MS (correlate

In [8]:
# Output: Print the allocation and pairs for the next rebalancing
print("\nOptimized Portfolio Allocation (in dollars):")
tmp = 0
for stock, alloc in allocation.items():
    tmp += alloc
tmp


Optimized Portfolio Allocation (in dollars):


476911.3920632361

# BREAK

In [21]:
import yfinance as yf
import numpy as np
import pandas as pd
from scipy.optimize import minimize

stocks = ['ASTS', 'LUMN', 'SMMT', 'VKTX', '196170.KQ', '0020.HK']

# Download the adjusted close price data for the last year
end_date = "2024-10-09"
start_date = "2023-10-09"
data = yf.download(stocks, start=start_date, end=end_date)['Adj Close']

# Drop any rows with missing data
data = data.dropna()

# Calculate daily returns
returns = data.pct_change().dropna()

# Define a function to calculate portfolio performance
def portfolio_performance(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
    sharpe_ratio = portfolio_return / portfolio_volatility
    return portfolio_return, portfolio_volatility, sharpe_ratio

# Define the objective function (negative Sharpe ratio for minimization)
def negative_sharpe_ratio(weights, returns):
    return -portfolio_performance(weights, returns)[2]

# Constraints: sum of weights is 1
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})

# Bounds: each weight is between 0 and 1
bounds = [(0, 1)] * len(stocks)

# Initial guess (equal distribution)
initial_weights = [1 / len(stocks)] * len(stocks)

# Optimize the portfolio for maximum Sharpe ratio
optimized_result = minimize(negative_sharpe_ratio, initial_weights, args=(returns,), method='SLSQP', bounds=bounds, constraints=constraints)

# Get the optimized weights and performance metrics
optimized_weights = optimized_result.x
optimized_return, optimized_volatility, optimized_sharpe_ratio = portfolio_performance(optimized_weights, returns)

# Display results
print("Optimized Weights:", optimized_weights)
print("Expected Annual Return:", optimized_return)
print("Expected Annual Volatility:", optimized_volatility)
print("Sharpe Ratio:", optimized_sharpe_ratio)

[*********************100%***********************]  6 of 6 completed


Optimized Weights: [0.08631445 0.42526524 0.13584154 0.12507876 0.07977838 0.14772163]
Expected Annual Return: 2.618655976471768
Expected Annual Volatility: 0.6308087017574668
Sharpe Ratio: 4.151267998009621


In [24]:

optimized_weights * 500000

array([ 43157.22566575, 212632.61880221,  67920.77019487,  62539.37806597,
        39889.18991862,  73860.81735258])