<a href="https://www.kaggle.com/code/ferhat00/bist100-optimizer?scriptVersionId=289568166" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# 🇹🇷 BIST 100 Portfolio Optimizer\n## Complete Optimization + Analysis\n\n**Features**: Data Download | Portfolio Optimization | Performance Analysis | Interactive Visualizations

## 📦 Setup

In [30]:
!pip install quantstats yfinance plotly pandas matplotlib seaborn scipy nbformat



In [31]:
import pandas as pd
import numpy as np
import yfinance as yf
import quantstats as qs
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from scipy.optimize import minimize
import warnings
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8-darkgrid')
qs.extend_pandas()
print('✅ Setup complete!')

✅ Setup complete!


## 🎯 Step 1: Define BIST 100 Universe

In [32]:
BIST100_TICKERS = [
    'AEFES.IS', 'AKBNK.IS', 'AKSA.IS', 'AKSEN.IS', 'ALARK.IS',
    'ANSGR.IS', 'ARCLK.IS', 'ASELS.IS', 'BIMAS.IS', 'BISAS.IS',
    'BRSAN.IS', 'BRYAT.IS', 'BTCIM.IS', 'BUCIM.IS', 'CCOLA.IS',
    'CIMSA.IS', 'CLEBI.IS', 'DOAS.IS', 'DOHOL.IS', 'ECILC.IS',
    'EGEEN.IS', 'EKGYO.IS', 'ENKAI.IS', 'EREGL.IS', 'FENER.IS',
    'FROTO.IS', 'GARAN.IS', 'GSRAY.IS', 'GUBRF.IS', 'GENTS.IS',
    'HALKB.IS', 'HEKTS.IS', 'IPDRO.IS', 'ISCTR.IS', 'ISFIN.IS',
    'KCHOL.IS', 'KOZAA.IS', 'KOZAL.IS', 'KRDMD.IS', 'MGROS.IS',
    'OYAKC.IS', 'MAKIM.IS', 'OTKAR.IS', 'PETKM.IS', 'SAHOL.IS',
    'SASA.IS', 'SISE.IS', 'SKBNK.IS', 'TAVHL.IS', 'TCELL.IS',
    'THYAO.IS', 'TKFEN.IS', 'TOASO.IS', 'TSKB.IS', 'TSPOR.IS',
    'TTKOM.IS', 'TTRAK.IS', 'TUKAS.IS', 'TUPRS.IS', 'ULKER.IS',
    'VAKBN.IS', 'VESTL.IS', 'AGHOL.IS', 'YKBNK.IS', 'ZOREN.IS',
    'KUYUM.IS', 'PGSUS.IS', 'ODAS.IS', 'MAVI.IS', 'ENJSA.IS',
    'MPARK.IS', 'SOKM.IS', 'KONTR.IS', 'TUREX.IS', 'CANTE.IS',
    'GENIL.IS', 'GRSLT.IS', 'YESIL.IS', 'MRGNE.IS', 'MIATK.IS',
    'DGKLY.IS', 'GRSEL.IS', 'KCAER.IS', 'ASTOR.IS', 'EUPWR.IS',
    'GRTHO.IS', 'CWENE.IS', 'KLVNE.IS', 'PAMEL.IS', 'ENJYO.IS',
    'REEDR.IS', 'TABGD.IS', 'BINHO.IS', 'PASEU.IS', 'OBAMS.IS',
    'ALTIN.IS', 'EFORC.IS', 'GLRMK.IS', 'DSFAK.IS', 'BALSU.IS'
]
print(f'📊 {len(BIST100_TICKERS)} stocks defined')

📊 100 stocks defined


## 📥 Step 2: Download Historical Data

In [33]:
# Configure date range
end_date = datetime.now()
start_date = end_date - timedelta(days=730)  # 1 year (changed from 730)

print(f'📅 Period: {start_date.date()} to {end_date.date()}')

📅 Period: 2024-01-02 to 2026-01-01


In [34]:
# Download data with progress
print('Downloading BIST 100 data...')
data = yf.download(BIST100_TICKERS, start=start_date, end=end_date, 
                   group_by='ticker', threads=True, progress=True)
print('✓ Download complete!')

Downloading BIST 100 data...


[*********************100%***********************]  100 of 100 completed

10 Failed downloads:
['MRGNE.IS', 'KLVNE.IS', 'DGKLY.IS', 'DSFAK.IS', 'GRSLT.IS', 'ENJYO.IS', 'KUYUM.IS', 'IPDRO.IS']: YFTzMissingError('possibly delisted; no timezone found')
['BISAS.IS', 'ALTIN.IS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2024-01-02 19:43:41.053679 -> 2026-01-01 19:43:41.053679)')


✓ Download complete!


In [35]:
# Diagnostic: Check data structure
print('Data type:', type(data))
print('Data shape:', data.shape)
print('\nColumn structure:')
if isinstance(data.columns, pd.MultiIndex):
    print('MultiIndex columns - Levels:', data.columns.nlevels)
    print('Level 0 (first 5):', data.columns.levels[0][:5].tolist() if len(data.columns.levels[0]) > 0 else 'empty')
    print('Level 1:', data.columns.levels[1].tolist() if data.columns.nlevels > 1 else 'N/A')
else:
    print('Single level columns:', data.columns.tolist()[:10])
print('\nFirst few rows:')
data.head()

Data type: <class 'pandas.core.frame.DataFrame'>
Data shape: (502, 510)

Column structure:
MultiIndex columns - Levels: 2
Level 0 (first 5): ['AEFES.IS', 'AGHOL.IS', 'AKBNK.IS', 'AKSA.IS', 'AKSEN.IS']
Level 1: ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

First few rows:


Ticker,TSPOR.IS,TSPOR.IS,TSPOR.IS,TSPOR.IS,TSPOR.IS,BRSAN.IS,BRSAN.IS,BRSAN.IS,BRSAN.IS,BRSAN.IS,...,GUBRF.IS,GUBRF.IS,GUBRF.IS,GUBRF.IS,GUBRF.IS,KOZAA.IS,KOZAA.IS,KOZAA.IS,KOZAA.IS,KOZAA.IS
Price,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-01-02,1.643031,1.649375,1.598625,1.617656,23801863.0,591.0,650.0,591.0,650.0,1033508.0,...,155.0,169.100006,154.600006,169.100006,34025794.0,42.540001,43.419998,42.540001,43.18,6884983.0
2024-01-03,1.617656,1.624,1.541531,1.541531,25549503.0,680.0,689.5,638.0,644.0,2106339.0,...,173.5,179.399994,161.399994,163.5,70903373.0,43.200001,44.34,41.200001,41.900002,14225017.0
2024-01-04,1.547875,1.566906,1.535187,1.560562,21663203.0,647.5,708.0,624.0,634.5,3434629.0,...,164.0,167.300003,161.399994,161.600006,18969438.0,41.82,43.34,41.259998,43.34,13218453.0
2024-01-05,1.560562,1.630343,1.547875,1.592281,48984925.0,644.0,651.0,612.5,625.5,1243994.0,...,162.300003,162.800003,151.199997,152.800003,21863978.0,43.34,43.5,42.240002,43.0,8944651.0
2024-01-08,1.617656,1.750875,1.617656,1.750875,53788174.0,628.5,645.0,615.0,616.5,1024228.0,...,152.800003,156.800003,148.5,151.5,23067667.0,43.060001,43.459999,42.799999,42.959999,11452361.0


In [36]:
# Extract adjusted close prices (improved extraction logic)
prices = pd.DataFrame()

# Check if data is empty
if data.empty:
    print('❌ No data downloaded - all tickers may have failed')
else:
    # Handle MultiIndex columns (group_by='ticker')
    if isinstance(data.columns, pd.MultiIndex):
        print(f'Processing MultiIndex data with {data.columns.nlevels} levels...')
        # Get unique tickers from level 0
        tickers_in_data = data.columns.get_level_values(0).unique()
        print(f'Found {len(tickers_in_data)} tickers in data')
        
        for ticker in tickers_in_data:
            try:
                # Try to get Adj Close column
                if (ticker, 'Adj Close') in data.columns:
                    prices[ticker] = data[(ticker, 'Adj Close')]
                elif (ticker, 'Close') in data.columns:
                    prices[ticker] = data[(ticker, 'Close')]
            except Exception as e:
                print(f'Skipping {ticker}: {e}')
    
    # Handle single-level columns
    else:
        print('Processing single-level columns...')
        if 'Adj Close' in data.columns:
            prices = data[['Adj Close']].copy()
        elif 'Close' in data.columns:
            prices = data[['Close']].copy()

print(f'\n✓ Extracted {len(prices.columns)} stocks')
print(f'✓ Rows: {len(prices)}')

# Clean data (relaxed criteria: 60% completeness + 20-day forward-fill)
if len(prices.columns) > 0:
    min_data = len(prices) * 0.60
    prices = prices[prices.columns[prices.count() >= min_data]]
    prices = prices.fillna(method='ffill', limit=20).dropna(axis=1)
    tickers = prices.columns.tolist()
    
    if len(tickers) > 0:
        print(f'\n✓ {len(tickers)} stocks with sufficient data')
        print(f'✓ Period: {prices.index[0].date()} to {prices.index[-1].date()}')
        print(f'✓ Trading days: {len(prices)}')
    else:
        print('\n⚠️ No stocks passed the data quality filters')
else:
    print('\n❌ No price data extracted')
    tickers = []

Processing MultiIndex data with 2 levels...
Found 100 tickers in data

✓ Extracted 100 stocks
✓ Rows: 502

✓ 86 stocks with sufficient data
✓ Period: 2024-01-02 to 2025-12-31
✓ Trading days: 502


In [37]:
# Preview data
prices.tail()

Unnamed: 0_level_0,TSPOR.IS,BRSAN.IS,BRYAT.IS,TOASO.IS,MAKIM.IS,BUCIM.IS,TABGD.IS,PGSUS.IS,ISFIN.IS,GRSEL.IS,...,PAMEL.IS,CANTE.IS,VAKBN.IS,ANSGR.IS,MAVI.IS,GENTS.IS,ENJSA.IS,SAHOL.IS,GUBRF.IS,KOZAA.IS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-12-25,1.11,523.5,2183.0,247.399994,16.440001,7.25,225.5,206.5,19.02,323.5,...,83.900002,2.11,29.68,23.639999,43.0,10.49,89.0,84.599998,351.0,107.699997
2025-12-26,1.1,513.5,2147.0,251.75,15.97,7.32,232.399994,204.5,18.799999,324.0,...,86.0,2.07,30.200001,23.360001,42.560001,10.25,88.900002,84.0,362.75,107.599998
2025-12-29,1.07,528.5,2124.0,247.699997,15.55,7.31,215.399994,195.399994,18.48,317.5,...,82.300003,2.02,30.4,23.32,42.459999,9.74,89.550003,83.949997,352.0,103.599998
2025-12-30,1.08,529.5,2125.0,248.600006,15.31,7.55,213.0,192.199997,16.690001,319.5,...,84.949997,2.01,30.34,22.98,43.18,9.65,90.349998,83.5,345.25,103.900002
2025-12-31,1.07,555.0,2200.0,247.0,14.95,8.0,209.100006,191.699997,16.35,320.0,...,82.050003,2.0,30.68,23.200001,43.52,9.52,90.349998,84.25,348.0,104.400002


## 🎯 Step 3: Portfolio Optimization

In [38]:
# Calculate returns
returns = prices.pct_change().dropna()

# Expected returns (annualized)
expected_returns = returns.mean() * 252

# Covariance matrix (annualized)
cov_matrix = returns.cov() * 252

# Risk-free rate (Turkish 1-year bond ~30%)
risk_free_rate = 0.30

print(f'✓ Returns calculated for {len(tickers)} stocks')
print(f'✓ Risk-free rate: {risk_free_rate*100:.1f}%')

✓ Returns calculated for 86 stocks
✓ Risk-free rate: 30.0%


In [39]:
# Optimization helper functions
def portfolio_stats(weights, returns, cov):
    ret = np.sum(returns * weights)
    vol = np.sqrt(np.dot(weights.T, np.dot(cov, weights)))
    return ret, vol

def neg_sharpe(weights, returns, cov, rf):
    ret, vol = portfolio_stats(weights, returns, cov)
    return -(ret - rf) / vol

def minimize_vol(weights, returns, cov):
    return portfolio_stats(weights, returns, cov)[1]

print('✓ Optimization functions defined')

✓ Optimization functions defined


In [40]:
# Run optimizations
n_assets = len(tickers)
init = np.array([1/n_assets] * n_assets)
cons = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}
bnds = tuple((0, 0.15) for _ in range(n_assets))

print('Running optimizations...')

# Max Sharpe
max_sharpe = minimize(neg_sharpe, init, args=(expected_returns, cov_matrix, risk_free_rate),
                      method='SLSQP', bounds=bnds, constraints=cons)
ms_ret, ms_vol = portfolio_stats(max_sharpe.x, expected_returns, cov_matrix)
ms_sharpe = (ms_ret - risk_free_rate) / ms_vol

# Min Volatility (removed risk_free_rate from args - not needed)
min_vol = minimize(minimize_vol, init, args=(expected_returns, cov_matrix),
                   method='SLSQP', bounds=bnds, constraints=cons)
mv_ret, mv_vol = portfolio_stats(min_vol.x, expected_returns, cov_matrix)
mv_sharpe = (mv_ret - risk_free_rate) / mv_vol

print('✓ Optimizations complete')

Running optimizations...
✓ Optimizations complete


## 📊 Step 4: Optimization Results

In [41]:
# Summary table
results = pd.DataFrame({
    'Strategy': ['Maximum Sharpe', 'Minimum Volatility'],
    'Return (%)': [ms_ret*100, mv_ret*100],
    'Volatility (%)': [ms_vol*100, mv_vol*100],
    'Sharpe Ratio': [ms_sharpe, mv_sharpe],
    'Positions': [
        (max_sharpe.x > 0.001).sum(),
        (min_vol.x > 0.001).sum()
    ]
})

results

Unnamed: 0,Strategy,Return (%),Volatility (%),Sharpe Ratio,Positions
0,Maximum Sharpe,91.301533,23.405638,2.619093,12
1,Minimum Volatility,40.476658,18.531383,0.565347,26


In [42]:
# Extract top positions
max_sharpe_weights = pd.Series(max_sharpe.x, index=tickers)
max_sharpe_top = max_sharpe_weights[max_sharpe_weights > 0.01].sort_values(ascending=False)

min_vol_weights = pd.Series(min_vol.x, index=tickers)
min_vol_top = min_vol_weights[min_vol_weights > 0.01].sort_values(ascending=False)

print('MAX SHARPE TOP POSITIONS:')
print(max_sharpe_top.head(10))
print('\nMIN VOLATILITY TOP POSITIONS:')
print(min_vol_top.head(10))

MAX SHARPE TOP POSITIONS:
GRSEL.IS    0.150000
ASELS.IS    0.150000
PASEU.IS    0.150000
GENIL.IS    0.144922
GRTHO.IS    0.140993
TAVHL.IS    0.086405
MPARK.IS    0.062383
GENTS.IS    0.042947
ENKAI.IS    0.030296
GARAN.IS    0.020117
dtype: float64

MIN VOLATILITY TOP POSITIONS:
TUPRS.IS    0.150000
TTRAK.IS    0.125815
BTCIM.IS    0.074048
ANSGR.IS    0.066613
GENIL.IS    0.061203
TABGD.IS    0.058805
MPARK.IS    0.058759
PASEU.IS    0.047685
TAVHL.IS    0.042595
GENTS.IS    0.041841
dtype: float64


In [43]:
# Visualize optimal portfolios
fig = go.Figure()

fig.add_trace(go.Bar(
    x=max_sharpe_top.index[:15],
    y=max_sharpe_top.values[:15]*100,
    name='Max Sharpe',
    marker_color='green'
))

fig.update_layout(
    title='Maximum Sharpe Ratio Portfolio - Top 15 Holdings',
    xaxis_title='Ticker',
    yaxis_title='Weight (%)',
    height=500
)

fig.show()

In [44]:
fig2 = go.Figure()

fig2.add_trace(go.Bar(
    x=min_vol_top.index[:15],
    y=min_vol_top.values[:15]*100,
    name='Min Volatility',
    marker_color='blue'
))

fig2.update_layout(
    title='Minimum Volatility Portfolio - Top 15 Holdings',
    xaxis_title='Ticker',
    yaxis_title='Weight (%)',
    height=500
)

fig2.show()

## 📈 Step 5: Efficient Frontier

In [45]:
# Calculate efficient frontier
target_returns = np.linspace(expected_returns.min(), expected_returns.max(), 50)
frontier_vol = []
frontier_ret = []

for target in target_returns:
    cons_ef = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
        {'type': 'eq', 'fun': lambda x: portfolio_stats(x, expected_returns, cov_matrix)[0] - target}
    ]
    # Removed risk_free_rate from args - minimize_vol doesn't need it
    result = minimize(minimize_vol, init, args=(expected_returns, cov_matrix),
                     method='SLSQP', bounds=bnds, constraints=cons_ef)
    if result.success:
        ret, vol = portfolio_stats(result.x, expected_returns, cov_matrix)
        frontier_ret.append(ret)
        frontier_vol.append(vol)

print(f'✓ Efficient frontier calculated ({len(frontier_ret)} points)')

✓ Efficient frontier calculated (35 points)


In [46]:
# Plot efficient frontier
fig = go.Figure()

# Frontier
fig.add_trace(go.Scatter(
    x=np.array(frontier_vol)*100,
    y=np.array(frontier_ret)*100,
    mode='lines',
    name='Efficient Frontier',
    line=dict(color='blue', width=3)
))

# Max Sharpe
fig.add_trace(go.Scatter(
    x=[ms_vol*100],
    y=[ms_ret*100],
    mode='markers',
    name=f'Max Sharpe ({ms_sharpe:.2f})',
    marker=dict(size=15, color='green', symbol='star')
))

# Min Vol
fig.add_trace(go.Scatter(
    x=[mv_vol*100],
    y=[mv_ret*100],
    mode='markers',
    name=f'Min Volatility',
    marker=dict(size=15, color='red', symbol='diamond')
))

fig.update_layout(
    title='BIST 100 Efficient Frontier',
    xaxis_title='Volatility (%)',
    yaxis_title='Expected Return (%)',
    height=600,
    hovermode='closest'
)

fig.show()

## 🎲 Step 6: Monte Carlo Simulation

In [47]:
# Monte Carlo simulation
np.random.seed(42)
num_portfolios = 100000
mc_returns = np.zeros(num_portfolios)
mc_volatility = np.zeros(num_portfolios)
mc_sharpe = np.zeros(num_portfolios)

for i in range(num_portfolios):
    weights = np.random.random(n_assets)
    weights /= np.sum(weights)
    ret, vol = portfolio_stats(weights, expected_returns, cov_matrix)
    mc_returns[i] = ret
    mc_volatility[i] = vol
    mc_sharpe[i] = (ret - risk_free_rate) / vol

print(f'✓ {num_portfolios:,} random portfolios simulated')

✓ 100,000 random portfolios simulated


In [48]:
# Plot Monte Carlo results
fig = go.Figure()

# Random portfolios
fig.add_trace(go.Scatter(
    x=mc_volatility*100,
    y=mc_returns*100,
    mode='markers',
    name='Random Portfolios',
    marker=dict(
        size=3,
        color=mc_sharpe,
        colorscale='RdYlGn',
        showscale=True,
        colorbar=dict(title='Sharpe'),
        opacity=0.5
    )
))

# Efficient frontier
fig.add_trace(go.Scatter(
    x=np.array(frontier_vol)*100,
    y=np.array(frontier_ret)*100,
    mode='lines',
    name='Efficient Frontier',
    line=dict(color='black', width=3)
))

# Optimal portfolios
fig.add_trace(go.Scatter(
    x=[ms_vol*100, mv_vol*100],
    y=[ms_ret*100, mv_ret*100],
    mode='markers',
    name='Optimal Portfolios',
    marker=dict(size=20, color=['green', 'red'], symbol='star',
                line=dict(width=2, color='white'))
))

fig.update_layout(
    title='Portfolio Optimization: Monte Carlo + Efficient Frontier',
    xaxis_title='Volatility (%)',
    yaxis_title='Expected Return (%)',
    height=700
)

fig.show()

## 📝 Summary & Export

In [49]:
# Save optimal weights
optimal_df = pd.DataFrame({
    'Ticker': tickers,
    'Max_Sharpe_Weight': max_sharpe.x,
    'Min_Vol_Weight': min_vol.x
})

optimal_df = optimal_df[
    (optimal_df['Max_Sharpe_Weight'] > 0.001) | 
    (optimal_df['Min_Vol_Weight'] > 0.001)
].sort_values('Max_Sharpe_Weight', ascending=False)

optimal_df.to_csv('bist100_optimal_weights.csv', index=False)
print('✓ Saved: bist100_optimal_weights.csv')

optimal_df.head(20)

✓ Saved: bist100_optimal_weights.csv


Unnamed: 0,Ticker,Max_Sharpe_Weight,Min_Vol_Weight
9,GRSEL.IS,0.15,0.02904681
45,ASELS.IS,0.15,0.01777027
12,PASEU.IS,0.15,0.04768495
56,GENIL.IS,0.1449219,0.06120328
15,GRTHO.IS,0.1409929,0.02531555
13,TAVHL.IS,0.08640538,0.04259528
70,MPARK.IS,0.06238296,0.05875914
81,GENTS.IS,0.04294705,0.04184145
18,ENKAI.IS,0.03029643,0.0350923
31,GARAN.IS,0.02011653,3.71715e-18


## ✅ Optimization Complete!

### Key Results:
- **Maximum Sharpe Ratio Portfolio**: Optimized for risk-adjusted returns
- **Minimum Volatility Portfolio**: Lowest risk exposure
- **Efficient Frontier**: All optimal risk-return combinations
- **Monte Carlo**: 10,000 random portfolios for comparison

### Next Steps:
1. Review optimal weights CSV file
2. Implement chosen portfolio strategy
3. Monitor performance over time
4. Rebalance periodically