In [1]:
from vnstock import Quote
import pandas as pd

# Define the symbols you want to fetch data for
symbols = ['REE', 'FMC', 'DHC']
print(f"Fetching historical price data for: {symbols}")

# Dictionary to store historical data for each symbol
all_historical_data = {}

# Set date range
start_date = '2024-01-01'
end_date = '2025-03-19'
interval = '1D'

# Fetch historical data for each symbol
for symbol in symbols:
    try:
        print(f"\nProcessing {symbol}...")
        quote = Quote(symbol=symbol)
        
        # Fetch historical price data
        historical_data = quote.history(
            start=start_date,
            end=end_date,
            interval=interval,
            to_df=True
        )
        
        if not historical_data.empty:
            all_historical_data[symbol] = historical_data
            print(f"Successfully fetched {len(historical_data)} records for {symbol}")
        else:
            print(f"No historical data available for {symbol}")
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

# Export all historical data to a single CSV file
if all_historical_data:
    # Create a combined DataFrame with all data
    combined_data = pd.DataFrame()
    
    for symbol, data in all_historical_data.items():
        if not data.empty:
            # Make a copy of the data and rename columns to include symbol
            temp_df = data.copy()
            # Keep 'time' column as is for merging
            for col in temp_df.columns:
                if col != 'time':
                    temp_df.rename(columns={col: f'{symbol}_{col}'}, inplace=True)
            
            if combined_data.empty:
                combined_data = temp_df
            else:
                combined_data = pd.merge(combined_data, temp_df, on='time', how='outer')
    
    # Sort by time
    if not combined_data.empty:
        combined_data = combined_data.sort_values('time')
        
        # Display sample of combined data
        print("\nSample of combined data:")
        print(combined_data.head(3))
        
        # Export combined data to CSV
        # b    
    # Also create a combined DataFrame for close prices only (for comparison purposes)
    combined_prices = pd.DataFrame()
    
    for symbol, data in all_historical_data.items():
        if not data.empty:
            # Extract time and close price
            temp_df = data[['time', 'close']].copy()
            temp_df.rename(columns={'close': f'{symbol}_close'}, inplace=True)
            
            if combined_prices.empty:
                combined_prices = temp_df
            else:
                combined_prices = pd.merge(combined_prices, temp_df, on='time', how='outer')
    
    # Sort by time
    if not combined_prices.empty:
        combined_prices = combined_prices.sort_values('time')
        
        # Export combined close prices to CSV
        # combined_close_csv_filename = './outputs/combined_close_prices.csv'
        # combined_prices.to_csv(combined_close_csv_filename, index=False, encoding='utf-8-sig')
        # print(f"Combined close price data exported to {combined_close_csv_filename}")
else:
    print("No historical data was fetched for any symbol.")

Phiên bản Vnstock 3.2.5 đã có mặt, vui lòng cập nhật với câu lệnh : `pip install vnstock --upgrade`.
Lịch sử phiên bản: https://vnstocks.com/docs/tai-lieu/lich-su-phien-ban
Phiên bản hiện tại 3.2.2

Fetching historical price data for: ['REE', 'FMC', 'DHC']

Processing REE...
Successfully fetched 300 records for REE

Processing FMC...
Successfully fetched 300 records for FMC

Processing DHC...
Successfully fetched 300 records for DHC

Sample of combined data:
        time  REE_open  REE_high  REE_low  REE_close  REE_volume  FMC_open  \
0 2024-01-02     48.54     49.21    48.20      48.54      779590     42.97   
1 2024-01-03     48.54     48.79    48.20      48.62      376152     43.54   
2 2024-01-04     48.88     49.64    48.54      48.71      899128     43.88   

   FMC_high  FMC_low  FMC_close  FMC_volume  DHC_open  DHC_high  DHC_low  \
0     44.17    42.97      43.78       18200     36.32     36.46    35.94   
1     43.88    43.50      43.69       26222     36.08     36.41    36.08   
2     44.07    43.64      43.83       19700     36.36     37.50    36.32   

   DHC_close  DHC_volume  
0      36.08      118392  
1      36.36      163177  
2      37.08      319156  


In [2]:
# 1. Make a copy of the combined_prices DataFrame
prices_df = combined_prices.copy()

# 2. Convert the 'time' column to datetime if it's not already
prices_df['time'] = pd.to_datetime(prices_df['time'])

# 3. Set the 'time' column as the index
prices_df.set_index('time', inplace=True)

# 4. Extract only the close price columns and rename them to just the symbol names
close_price_columns = [col for col in prices_df.columns if '_close' in col]
prices_df = prices_df[close_price_columns]
prices_df.columns = [col.replace('_close', '') for col in close_price_columns]

# 5. Make sure there are no NaN values
prices_df = prices_df.dropna()
print(prices_df.head())

              REE    FMC    DHC
time                           
2024-01-02  48.54  43.78  36.08
2024-01-03  48.62  43.69  36.36
2024-01-04  48.71  43.83  37.08
2024-01-05  48.45  43.88  37.98
2024-01-08  47.95  43.88  39.50


In [3]:
risk_free_rate=0.02
risk_aversion=1

In [4]:
from pypfopt.expected_returns import returns_from_prices
log_returns=False
returns = returns_from_prices(prices_df, log_returns=log_returns)
returns.head()

Unnamed: 0_level_0,REE,FMC,DHC
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-03,0.001648,-0.002056,0.007761
2024-01-04,0.001851,0.003204,0.019802
2024-01-05,-0.005338,0.001141,0.024272
2024-01-08,-0.01032,0.0,0.040021
2024-01-09,-0.012304,0.013218,-0.00481


In [5]:
from pypfopt import EfficientFrontier, risk_models, expected_returns, DiscreteAllocation
from pypfopt.exceptions import OptimizationError
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import sample_cov #for covariance matrix, get more methods from risk_models
from pypfopt.efficient_frontier import EfficientFrontier


mu=mean_historical_return(prices_df, log_returns=log_returns ) #Optional: add log_returns=True
"""
For most portfolio optimization purposes, the default simple returns pct_change() are adequate, 
but logarithmic returns can provide more robust results in some cases, 
especially when dealing with volatile assets or longer time horizons.
"""
S=sample_cov(prices_df)

In [6]:
import numpy as np
import pandas as pd
import os
from bokeh.plotting import figure, output_file, save, show
from bokeh.layouts import column, row
from bokeh.palettes import Category10, viridis
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, LinearColorMapper
from bokeh.transform import linear_cmap
from bokeh.models.widgets import Div
from bokeh.io import output_notebook

# Enable Bokeh output in the notebook
output_notebook()

# Create the directory if it doesn't exist
os.makedirs('./outputs', exist_ok=True)

# Calculate portfolio optimization results
# Create a new instance for max Sharpe ratio portfolio
ef_max_sharpe = EfficientFrontier(mu, S)
ef_max_sharpe.max_sharpe(risk_free_rate=risk_free_rate)
weights_max_sharpe = ef_max_sharpe.clean_weights()
ret_tangent, std_tangent, sharpe = ef_max_sharpe.portfolio_performance(risk_free_rate=risk_free_rate)

# Create another separate instance for min volatility portfolio
ef_min_vol = EfficientFrontier(mu, S)
ef_min_vol.min_volatility()
weights_min_vol = ef_min_vol.clean_weights()
ret_min_vol, std_min_vol, sharpe_min_vol = ef_min_vol.portfolio_performance(risk_free_rate=risk_free_rate)

# Create another separate instance for max utility portfolio
ef_max_utility = EfficientFrontier(mu, S)
ef_max_utility.max_quadratic_utility(risk_aversion=risk_aversion, market_neutral=False)
weights_max_utility = ef_max_utility.clean_weights()
ret_utility, std_utility, sharpe_utility = ef_max_utility.portfolio_performance(risk_free_rate=risk_free_rate)

# Generate random portfolios for scatter plot
n_samples = 10000
w = np.random.dirichlet(np.ones(len(mu)), n_samples)
rets = w.dot(mu)
stds = np.sqrt(np.diag(w @ S @ w.T))
sharpes = rets / stds

# Create Efficient Frontier plot
ef_fig = figure(
    width=600, height=500,
    title="Efficient Frontier with Random Portfolios",
    x_axis_label="Annual Volatility",
    y_axis_label="Expected Annual Return"
)

# Add random portfolios
color_mapper = LinearColorMapper(palette=viridis(256), low=min(sharpes), high=max(sharpes))
cbar = ColorBar(color_mapper=color_mapper, title="Sharpe Ratio")
ef_fig.add_layout(cbar, 'right')

random_source = ColumnDataSource(data=dict(
    x=stds,
    y=rets,
    sharpe=sharpes
))
ef_fig.scatter('x', 'y', source=random_source, size=5, 
              color=linear_cmap('sharpe', viridis(256), min(sharpes), max(sharpes)),
              alpha=0.5)

# Add the key portfolios
portfolio_names = ['Max Sharpe', 'Min Volatility', 'Max Utility']
portfolios_source = ColumnDataSource(data=dict(
    x=[std_tangent, std_min_vol, std_utility],
    y=[ret_tangent, ret_min_vol, ret_utility],
    port_name=portfolio_names,
    color=['red', 'green', 'blue'],
    ret=[f"{ret_tangent:.4f}", f"{ret_min_vol:.4f}", f"{ret_utility:.4f}"],
    std=[f"{std_tangent:.4f}", f"{std_min_vol:.4f}", f"{std_utility:.4f}"],
    sharpe=[f"{sharpe:.4f}", f"{sharpe_min_vol:.4f}", f"{sharpe_utility:.4f}"]
))

port_scatter = ef_fig.scatter('x', 'y', source=portfolios_source, size=15, 
                             color='color', line_color="black", line_width=2)

hover = HoverTool(renderers=[port_scatter], 
                 tooltips=[
                     ("Portfolio", "@port_name"),
                     ("Return", "@ret"),
                     ("Volatility", "@std"),
                     ("Sharpe", "@sharpe")
                 ])
ef_fig.add_tools(hover)

# Create Time Series plot
ts_fig = figure(
    width=600, height=300,
    title="Asset Prices Time Series",
    x_axis_type="datetime",
    x_axis_label="Date",
    y_axis_label="Price"
)

# Add a line for each asset in prices_df
colors = Category10[10][:len(prices_df.columns)]
for i, col in enumerate(prices_df.columns):
    ts_data = {
        'x': prices_df.index,
        'y': prices_df[col]
    }
    source = ColumnDataSource(data=ts_data)
    ts_fig.line('x', 'y', source=source, line_width=2, color=colors[i % len(colors)], 
                legend_label=str(col))

ts_fig.legend.location = "top_left"
ts_fig.legend.click_policy = "hide"

# Create pie charts for portfolio allocations
def create_pie_chart(weights, title, width=300, height=300):
    radius = 0.8
    
    # Convert weights dictionary to sorted lists
    assets = list(weights.keys())
    values = [weights[asset] for asset in assets]
    
    # Remove assets with zero weight
    filtered_assets = []
    filtered_values = []
    for a, v in zip(assets, values):
        if v > 0.0001:
            filtered_assets.append(str(a))
            filtered_values.append(v)
    
    # Calculate angles for pie chart
    total = sum(filtered_values)
    angles = [val/total * 2*np.pi for val in filtered_values]
    
    # Calculate start and end angles
    start_angles = [sum(angles[:i]) for i in range(len(angles))]
    end_angles = [sum(angles[:i+1]) for i in range(len(angles))]
    
    # Use colors based on number of filtered assets
    pie_colors = colors[:len(filtered_assets)]
    
    # Prepare data for plotting
    source = ColumnDataSource(data=dict(
        asset_name=filtered_assets,
        values=[f"{v:.2%}" for v in filtered_values],
        start_angle=start_angles,
        end_angle=end_angles,
        color=pie_colors,
    ))
    
    # Create figure
    fig = figure(width=width, height=height, title=title,
                tools="hover", tooltips=[("Asset", "@asset_name"), ("Weight", "@values")],
                x_range=(-1.1, 1.1), y_range=(-1.1, 1.1))
    
    # Add wedges for the pie chart
    fig.wedge(x=0, y=0, radius=radius,
             start_angle='start_angle', end_angle='end_angle',
             line_color="white", fill_color='color', source=source)
    
    # Remove axes and grid
    fig.axis.visible = False
    fig.grid.visible = False
    
    return fig

# Create pie charts
pie_max_sharpe = create_pie_chart(weights_max_sharpe, "Max Sharpe Portfolio Weights")
pie_min_vol = create_pie_chart(weights_min_vol, "Min Volatility Portfolio Weights")
pie_max_utility = create_pie_chart(weights_max_utility, "Max Utility Portfolio Weights")

# Add performance metrics as a header div
performance_html = f"""
<div style="padding: 10px; background: #f8f8f8; border: 1px solid #ddd;">
    <h3>Portfolio Performance Metrics</h3>
    <table style="width: 100%; border-collapse: collapse;">
        <tr style="border-bottom: 1px solid #ddd; font-weight: bold;">
            <td>Portfolio</td>
            <td>Expected Return</td>
            <td>Volatility</td>
            <td>Sharpe Ratio</td>
        </tr>
        <tr style="border-bottom: 1px solid #ddd;">
            <td>Max Sharpe</td>
            <td>{ret_tangent:.4f}</td>
            <td>{std_tangent:.4f}</td>
            <td>{sharpe:.4f}</td>
        </tr>
        <tr style="border-bottom: 1px solid #ddd;">
            <td>Min Volatility</td>
            <td>{ret_min_vol:.4f}</td>
            <td>{std_min_vol:.4f}</td>
            <td>{sharpe_min_vol:.4f}</td>
        </tr>
        <tr>
            <td>Max Utility (Risk Aversion: {risk_aversion})</td>
            <td>{ret_utility:.4f}</td>
            <td>{std_utility:.4f}</td>
            <td>{sharpe_utility:.4f}</td>
        </tr>
    </table>
</div>
"""
header = Div(text=performance_html, width=1200)

# Create layout with two columns
pie_row = row(pie_max_sharpe, pie_min_vol, pie_max_utility)
right_column = column(ts_fig, pie_row)
main_row = row(ef_fig, right_column)
layout = column(header, main_row)

# Display the visualization in the notebook
show(layout)

# Also save to HTML file
output_file('./outputs/index.html', title="Portfolio Optimization")
save(layout)
print(f"Portfolio visualization saved to './outputs/index.html'")

Portfolio visualization saved to './outputs/index.html'
