In [1]:
import pandas as pd
import yfinance as yf 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import plotly.figure_factory as ff
import numpy as np
from plotly.subplots import make_subplots
from scipy.stats import norm, t



df = yf.Ticker("AAPL").history(period="1y").reset_index()
df.reset_index(inplace=True)
df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
df.columns = [c.lower() for c in df.columns]

In [2]:
df.head(10)

Unnamed: 0,date,open,high,low,close,volume
0,2024-06-20 00:00:00-04:00,212.934166,213.242735,207.877826,208.703949,86172500
1,2024-06-21 00:00:00-04:00,209.410665,210.903682,206.145934,206.52417,246421400
2,2024-06-24 00:00:00-04:00,206.753099,211.709914,205.628355,207.171143,80727000
3,2024-06-25 00:00:00-04:00,208.176416,210.396046,207.638936,208.096802,56713900
4,2024-06-26 00:00:00-04:00,210.515486,213.859846,209.659488,212.257339,66213200
5,2024-06-27 00:00:00-04:00,213.690659,214.735774,211.361555,213.103409,49772700
6,2024-06-28 00:00:00-04:00,214.765608,215.064215,209.32107,209.639572,82542700
7,2024-07-01 00:00:00-04:00,211.102731,216.4975,210.933525,215.741043,60402900
8,2024-07-02 00:00:00-04:00,215.143842,219.354163,214.098742,219.244675,58046200
9,2024-07-03 00:00:00-04:00,218.975935,220.518723,218.010448,220.518723,37369800


In [3]:
df.sample(n=5).sort_index()

Unnamed: 0,date,open,high,low,close,volume
12,2024-07-09 00:00:00-04:00,226.869,228.332159,225.316264,227.615509,48076100
105,2024-11-18 00:00:00-05:00,224.707945,229.187145,224.628135,227.471283,44686000
126,2024-12-18 00:00:00-05:00,251.553189,253.668083,247.143828,247.453079,56774100
132,2024-12-27 00:00:00-05:00,257.20953,258.077462,252.451019,254.97493,42355300
213,2025-04-28 00:00:00-04:00,209.724976,211.223011,207.188309,209.864792,38743100


Missing value

In [4]:
rows = []
for col in df.columns:
    nc = df[col].isna().sum()
    tr = len(df[col])
    rate = nc/tr
    rows.append({
    "col_name": col,
    "total_rows": tr,
    "null_rows": nc,
    "rate": rate
    })
ndf = pd.DataFrame(rows)
ndf

Unnamed: 0,col_name,total_rows,null_rows,rate
0,date,250,0,0.0
1,open,250,0,0.0
2,high,250,0,0.0
3,low,250,0,0.0
4,close,250,0,0.0
5,volume,250,0,0.0


Outlier


In [5]:
df_plot = df[["open", "close", "low", "high"]]
df_melt = df_plot.melt(var_name="variable", value_name="value")
fig = px.box(
    df_melt,
    x="variable",
    y="value",
    color="variable",
    title="Boxplot of All Stock Variables",
    height=600
)

fig.update_layout(showlegend=False)
fig.show()

pio.write_html(fig, file="C:/aapl-report/boxplot-variables.html", auto_open=True)

In [7]:
fig = px.box(
    df, y="volume", title="Boxplot of Volume (separate)", height=800
)
fig.update_yaxes(type="log", tickformat=".2s", title="Volume (log scale)")
fig.show()
pio.write_html(fig, file="C:/aapl-report/boxplot-volume.html")

In [84]:
df.describe()   

Unnamed: 0,open,high,low,close,volume
count,250.0,250.0,250.0,250.0,250.0
mean,222.581168,225.154405,220.227331,222.871561,54380120.0
std,15.454854,14.84936,15.748748,15.386981,29907850.0
min,171.724805,190.090729,168.988411,172.194199,23234700.0
25%,212.178497,214.39319,209.668371,212.541088,39534120.0
50%,224.170641,226.07002,222.231372,224.210571,47116000.0
75%,232.447929,234.263584,229.182156,232.515671,59321000.0
max,257.568678,259.474086,257.010028,258.396667,318679900.0


In [None]:
df_plot = df[["open", "close", "low", "high", "volume"]]

df_melt = df_plot.melt(var_name='variable', value_name='value')

df_melt_price = df_melt[df_melt["variable"] != "volume"]
df_melt_volume = df_melt[df_melt["variable"] == "volume"]

fig1 = px.histogram(
    df_melt_price,
    x="value",
    facet_col="variable",
    facet_col_wrap=2,
    color="variable",
    nbins=50,
    title="Distribution of Price Variables",
    height=600
)

fig2 = px.histogram(
    df_melt_volume,
    x="value",
    color="variable",
    nbins=50,
    title="Distribution of Volume (Log Scale)",
    height=400
)
fig2.update_yaxes(type="log", tickformat=".1s") 

with open("C:/aapl-report/distribution/index.html", "w") as f:
    f.write(pio.to_html(fig1, include_plotlyjs='cdn', full_html=True))
    f.write("<hr>")
    f.write(pio.to_html(fig2, include_plotlyjs=False, full_html=False))

fig1.show()
fig2.show()

In [None]:
df_plot = df[["date", "open", "close", "low", "high", "volume"]]
df_melt = df_plot.melt(id_vars="date", var_name="variable", value_name="value")

df_melt_price = df_melt[df_melt["variable"] != "volume"]
df_melt_volume = df_melt[df_melt["variable"] == "volume"]

fig1 = px.line(df_melt_price, x="date", y="value", facet_col="variable",
               facet_col_wrap=1, color="variable", title="Trend of price variable", height=600)

fig2 = px.line(df_melt_volume, x="date", y="value", color="variable",
               title="Trend of Volume (Separate Axis)", height=300)

fig1.show()
fig2.show()

with open("C:/aapl-report/trends/index.html", "w") as f:
    f.write(pio.to_html(fig1, include_plotlyjs='cdn', full_html=True))
    f.write("<hr>")  # ngăn cách 2 biểu đồ
    f.write(pio.to_html(fig2, include_plotlyjs=False, full_html=False))

In [13]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['date'], y=df['open'], mode='lines', name='Open'))
fig.add_trace(go.Scatter(x=df['date'], y=df['close'], mode='lines', name='Close'))
fig.add_trace(go.Scatter(x=df['date'], y=df['low'], mode='lines', name='Low'))
fig.add_trace(go.Scatter(x=df['date'], y=df['high'], mode='lines', name='High'))

fig.update_layout(
    title="Price Over Time",
    xaxis_title="Date",
    yaxis_title="Price",
    hovermode="x unified"
)

fig.show()

pio.write_html(fig, file="C:/aapl-report/another_trends/index.html", auto_open=False)

In [None]:
df['color'] = ['green' if c >= o else 'red' for c, o in zip(df['close'], df['open'])]

fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    row_heights=[0.7, 0.3], vertical_spacing=0.05,
                    subplot_titles=('AAPL Candlestick Chart', 'Volume'))

fig.add_trace(go.Candlestick(
    x=df['date'],
    open=df['open'],
    high=df['high'],
    low=df['low'],
    close=df['close'],
    name='Price'
), row=1, col=1)

fig.add_trace(go.Bar(
    x=df['date'],
    y=df['volume'],
    marker_color=df['color'],
    name='Volume',
    opacity=0.6
), row=2, col=1)

fig.update_layout(
    height=800,
    showlegend=False,
    xaxis2_title='Date',
    yaxis1_title='Price',
    yaxis2_title='Volume',
    xaxis_rangeslider_visible=False
)

fig.show()

pio.write_html(fig, file="C:/aapl-report/index.html", auto_open=True)

SMA

In [101]:
df['sma_12'] = df['close'].rolling(window=12).mean()
df['sma_15'] = df['close'].rolling(window=15).mean()
df['sma_20'] = df['close'].rolling(window=20).mean()
df['sma_30'] = df['close'].rolling(window=30).mean()
df['sma_35'] = df['close'].rolling(window=35).mean()
df['sma_40'] = df['close'].rolling(window=40).mean()
df['sma_45'] = df['close'].rolling(window=45).mean()
df['sma_50'] = df['close'].rolling(window=50).mean()
df['sma_100'] = df['close'].rolling(window=100).mean()

fig = go.Figure()

fig.add_trace(go.Scatter(x=df['date'], y=df['sma_12'], mode='lines', name='SMA 12'))
fig.add_trace(go.Scatter(x=df['date'], y=df['sma_20'], mode='lines', name='SMA 20'))
fig.add_trace(go.Scatter(x=df['date'], y=df['sma_50'], mode='lines', name='SMA 50'))

fig.add_trace(go.Scatter(x=df['date'], y=df['close'], mode='lines', name='Close', line=dict(color='black', width=2)))


fig.update_layout(title="Close with SMA", xaxis_title='Date', yaxis_title='Price', template='plotly_white')

pio.write_html(fig, file="C:/aapl-report/MovingAverage/index.html")

Tính EMA

In [102]:
df['ema_1'] = df['close'].ewm(span=1, adjust=False).mean()
df['ema_2'] = df['close'].ewm(span=2, adjust=False).mean()
df['ema_3'] = df['close'].ewm(span=3, adjust=False).mean()
df['ema_5'] = df['close'].ewm(span=5, adjust=False).mean()
df['ema_8'] = df['close'].ewm(span=8, adjust=False).mean()
df['ema_10'] = df['close'].ewm(span=10, adjust=False).mean()
df['ema_12'] = df['close'].ewm(span=12, adjust=False).mean()
df['ema_15'] = df['close'].ewm(span=15, adjust=False).mean()
df['ema_26'] = df['close'].ewm(span=26, adjust=False).mean()
df['ema_30'] = df['close'].ewm(span=30, adjust=False).mean()
df['ema_35'] = df['close'].ewm(span=35, adjust=False).mean()
df['ema_40'] = df['close'].ewm(span=40, adjust=False).mean()
df['ema_45'] = df['close'].ewm(span=45, adjust=False).mean()
df['ema_50'] = df['close'].ewm(span=50, adjust=False).mean()
df['ema_60'] = df['close'].ewm(span=60, adjust=False).mean()


df['macd'] = df['ema_12'] - df['ema_26']
df['signal'] = df['macd'].ewm(span=9, adjust=False).mean()
df['histogram'] = df['macd'] - df['signal']

Bollinger Bands

In [48]:
df['std_20'] = df['close'].rolling(window=20).std()
df['lower_bb'] = df['close'] - 2 * df['std_20']
df['upper_bb'] = df['close'] + 2 * df['std_20']

RSI


In [49]:
delta = df['close'].diff()
gain = delta.where(delta > 0, 0).rolling(window=14).mean()
loss = -delta.where(delta < 0, 0).rolling(window=14).mean()
rs = gain / loss
df['rsi'] = 100 - (100 / (1 + rs))

Phát hiện nho nhỏ: MACD có thể xấp xỉ bằng hiệu đường trượt của EMA với chính nó với độ lệch pha khoảng 15 ngày 

In [150]:
k = 14
df['ema2_diff_k'] = df['ema_12'] - df['ema_12'].shift(k)
df['ema2_diff_k'] /= 3
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['date'], y=df['macd'], mode='lines', name='MACD', line=dict(color='brown')))
fig.add_trace(go.Scatter(x=df['date'], y=df['ema2_diff_k'], mode='lines', name=f'EMA12(t) - EMA12(t-{k})', line=dict(color='green', dash='dot')))

fig.update_layout(
    title=f'Approximate MACD bằng EMA12(t) - EMA12(t-{k})',
    xaxis_title='Date',
    yaxis_title='Value',
    template='plotly_white',
    hovermode='x unified',
    height=600
)

fig.show()

pio.write_html(fig, file="C:/aapl-report/approximateMACD/index.html")

GMMA

In [None]:
ema_short = [3, 5, 8, 10, 12, 15]    
ema_long  = [30, 35, 40, 45, 50, 60] 

fig = go.Figure()

short_colors = '#4B9CD3'
long_colors  = "#FF7A7A"

fig.add_trace(go.Candlestick(
    x=df['date'],
    open=df['open'],
    high=df['high'],
    low=df['low'],
    close=df['close'],
    name='Candlestick'
))

for i in ema_short:
    fig.add_trace(go.Scatter(
        x=df['date'], y=df[f'ema_{i}'],
        mode='lines',
        name=f'EMA {i}',
        line=dict(dash='solid', color=short_colors)
    ))

for i in ema_long:
    fig.add_trace(go.Scatter(
        x=df['date'], y=df[f'ema_{i}'],
        mode='lines',
        name=f'EMA {i}',
        line=dict(dash='solid', color=long_colors)
    ))

fig.update_layout(
    title='GMMA',
    xaxis_title='Date',
    yaxis_title='Price',
    template='plotly_white',
    hovermode='x unified'
)



pio.write_html(fig, file="C:/aapl-report/GMMA/index.html")


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['date'], y=df['close'], mode='lines', name='Close', line=dict(color='black', width=2)))

fig.add_trace(go.Scatter(x=df['date'], y=df['sma_12'], mode='lines', name='SMA 12', line=dict(color='#FDB366')))
fig.add_trace(go.Scatter(x=df['date'], y=df['sma_20'], mode='lines', name='SMA 20', line=dict(color='#80E27E')))
fig.add_trace(go.Scatter(x=df['date'], y=df['sma_50'], mode='lines', name='SMA 50', line=dict(color='#7FC8F8')))

fig.update_layout(title="Close with SMA", xaxis_title='Date', yaxis_title='Price', height=800, template='plotly_white')

fig.write_html("C:/aapl-report/MovingAverage/index.html")

MACD

In [None]:
fig = go.Figure()

fig.add_trace(go.Candlestick(
    x=df['date'],
    open=df['open'],
    high=df['high'],
    low=df['low'],
    close=df['close'],
    name='Candlestick'
))

fig.add_trace(go.Scatter(
    x=df['date'], y=df[f'ema_12'],
    mode='lines',
    name=f'EMA 12',
    line=dict(dash='solid', color='#fc1d1d', width = 2.5)
))

fig.add_trace(go.Scatter(
    x=df['date'], y=df[f'ema_26'],
    mode='lines',
    name=f'EMA 26',
    line=dict(dash='solid', color='#68cccc', width = 2.5)
))


fig.add_trace(go.Scatter(
    x=df['date'], y=df['macd'],
    mode='lines', name='MACD',
    line=dict(color='#ab63fa', width = 3),
    yaxis='y2'
))

fig.add_trace(go.Scatter(
    x=df['date'],
    y=df['signal'],
    mode='lines',
    name='Signal',
    line=dict(color='#19d3f3', width = 3),
    yaxis='y2' 
))

colors = ["#0AF26B" if v >= 0 else "#FF0000" for v in df['histogram']]

fig.add_trace(go.Bar(
    x=df['date'],
    y=df['histogram'],
    name='MACD His',
    marker_color=colors,
    opacity=0.6,
    yaxis='y2'
))

fig.add_shape(
    type='line',
    x0=df['date'].min(), x1=df['date'].max(),
    y0=0, y1=0,
    line=dict(color='gray', dash='dot'),
    yref='y2', xref='x'
)

min_close = df['close'].min()
max_close = df['high'].max()
range_close = max_close - min_close

range_macd = df['macd'].max() - df['macd'].min()
max_macd = df['macd'].max() + 1.8 * range_macd
min_macd = df['macd'].min()

display_min = min_close - range_close / 2
display_max = max_close

fig.update_layout(
    xaxis=dict(domain=[0, 1]),
    yaxis=dict(title='Close + MACD', range=[display_min, display_max], side='left'),
    yaxis2=dict(title='MACD', range = [min_macd, max_macd], overlaying='y', side='right', showgrid=False), height=1000, width=1800, legend=dict(orientation='h', y=1.02), margin=dict(t=50, b=50)
)

fig.write_html("C:/aapl-report/MACD/index.html")

In [None]:
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, shared_yaxes=False, vertical_spacing=0.008, row_heights=[0.5, 0.25, 0.25])
# row 1
fig.add_trace(go.Candlestick(x=df['date'], low=df['low'], high=df['high'], open=df['open'], close=df['close'], name='CandleStick'), row=1, col=1)

fig.add_trace(go.Scatter(x=df['date'], y=df['ema_12'], name='EMA 12', line=dict(color='#fc1d1d', width=3)), row=1, col=1)
fig.add_trace(go.Scatter(x=df['date'], y=df['ema_26'], name='EMA 26', line=dict(color='#68cccc', width=3)), row=1, col=1)
fig.add_trace(go.Scatter(x=df['date'], y=df['upper_bb'], name='Upper BB', line=dict(color='grey'), opacity=0.4), row=1, col=1)
fig.add_trace(go.Scatter(x=df['date'], y=df['lower_bb'], name='Lower BB', line=dict(color='gray'), fill='tonexty', fillcolor='rgba(150, 150, 150, 0.1)', opacity=0.2), row=1, col=1)

# row 2
fig.add_trace(go.Scatter(x=df['date'], y=df['macd'], name='MACD', line=dict(color='purple', width=3)), row=2, col=1)
fig.add_trace(go.Scatter(x=df['date'], y=df['signal'], name='Signal', line=dict(color='deepskyblue', width=3)), row=2, col=1)
fig.add_trace(go.Bar(x=df['date'], y=df['histogram'], name='Histogram', marker_color=np.where(df['histogram'] >= 0, '#2ECC71', '#E74C3C')), row=2, col=1)

# row 3 
fig.add_trace(go.Scatter(x=df['date'], y=df['rsi'], name='RSI', line=dict(color='orange')), row=3, col=1)

# Overbought
fig.add_annotation(x=df['date'].min(), y=70, text="Overbought (70)", showarrow=False, xanchor='left', yanchor='bottom', font=dict(color='gray', size=12), row=3, col=1)

# Oversold
fig.add_annotation(x=df['date'].min(), y=30, text="Oversold (30)", showarrow=False, xanchor='left', yanchor='top', font=dict(color='gray', size=12), row=3, col=1)


fig.add_shape(type="line", x0=df['date'].min(), x1=df['date'].max(), y0=70, y1=70, line=dict(color='gray', dash='dash', width=2), row=3, col=1)

fig.add_shape(type="line", x0=df['date'].min(), x1=df['date'].max(), y0=30, y1=30, line=dict(color='gray', dash='dash', width=2), row=3, col=1)
# Layout
fig.update_layout(height=900, width=1800, title_text="EMA, Bollinger, MACD and RSI", xaxis_rangeslider_visible=False)



fig.write_html("C:/aapl-report/Boiling_MACD_RSI/index.html")

Return
Ta sẽ lấy thêm dữ liệu để phân tích các phân phối thống kê (2 năm)

In [78]:
df_for_returns = yf.Ticker("AAPL").history(period="3y").reset_index()
df_for_returns = df_for_returns[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
df_for_returns.columns = [c.lower() for c in df_for_returns.columns]
df_for_returns['date'] = pd.to_datetime(df_for_returns['date'])
df_for_returns

Unnamed: 0,date,open,high,low,close,volume
0,2022-06-21 00:00:00-04:00,131.313283,134.895806,131.214871,133.724594,81000500
1,2022-06-22 00:00:00-04:00,132.661690,135.584795,131.795595,133.212860,73409200
2,2022-06-23 00:00:00-04:00,134.659614,136.401655,133.488402,136.086716,72433800
3,2022-06-24 00:00:00-04:00,137.690983,139.669255,137.563046,139.423203,89116800
4,2022-06-27 00:00:00-04:00,140.446774,141.224309,138.744095,139.423203,70207900
...,...,...,...,...,...,...
747,2025-06-12 00:00:00-04:00,199.080002,199.679993,197.360001,199.199997,43904600
748,2025-06-13 00:00:00-04:00,199.729996,200.369995,195.699997,196.449997,51447300
749,2025-06-16 00:00:00-04:00,197.300003,198.690002,196.559998,198.419998,43020700
750,2025-06-17 00:00:00-04:00,197.199997,198.389999,195.210007,195.639999,38856200


In [105]:
df_for_returns['returns'] = df_for_returns['close'].pct_change()
df_for_returns['month'] = df_for_returns['date'].dt.to_period('M')


Converting to PeriodArray/Index representation will drop timezone information.



In [None]:
returns = df_for_returns['returns'].dropna().values  
bin_width = 0.0014 
mean_return = returns.mean()
median_return = np.median(returns)

fig = ff.create_distplot(
    [returns],
    group_labels=['Daily Return'],
    bin_size=bin_width,
    show_rug=False
)

mu, sigma = norm.fit(returns)
x = np.linspace(min(returns), max(returns), 500)
y_norm = norm.pdf(x, mu, sigma) * len(returns) * bin_width

fig.add_trace(go.Scatter(
    x=x,
    y=y_norm,
    mode='lines',
    name='Gaussian PDF',
    line=dict(color='red', width=2, dash='dash')
))

df_t, loc_t, scale_t = t.fit(returns)
y_t = t.pdf(x, df_t, loc=loc_t, scale=scale_t) * len(returns) * bin_width

fig.add_trace(go.Scatter(
    x=x,
    y=y_t,
    mode='lines',
    name="Student's t PDF",
    line=dict(color='green', width=2.5, dash='dot')
))

fig.add_vline(x=mean_return, line=dict(color='blue', width=2, dash='dash'), name='Mean')
fig.add_vline(x=median_return, line=dict(color='purple', width=2, dash='dot'), name='Median')

#mean
fig.add_trace(go.Scatter(
    x=[mean_return, mean_return],
    y=[0, y_max],
    mode='lines',
    name='Mean',
    line=dict(color='blue', width=1.5, dash='dash'),
    hoverinfo='skip'
))

#median
fig.add_trace(go.Scatter(
    x=[median_return, median_return],
    y=[0, y_max],
    mode='lines',
    name='Median',
    line=dict(color='purple', width=1.5, dash='dash'),
    hoverinfo='skip'
))


fig.update_layout(
    height=800,
    title="Return Distribution: KDE + Gaussian vs Student's t",
    xaxis_title='Return',
    yaxis_title='Frequency',
    legend=dict(x=0.01, y=0.99, borderwidth=1),
        shapes=[
        dict(
            type='line',
            x0=0, x1=0,
            y0=0, y1=1,  
            xref='x',
            yref='paper',
            line=dict(color='gray', width=1, dash='dash')
        )
    ]
)


fig.write_html("C:/aapl-report/RETURN/Distribution/index.html")

Violin plot cho từng quý 

In [None]:
df_viz = df_for_returns.dropna(subset=['returns']).copy()
df_viz['quarter'] = df_viz['date'].dt.to_period('Q').astype(str)

df_viz = df_viz[df_viz['quarter'] != '2022Q2']

quarters_sorted = sorted(df_viz['quarter'].unique())

group1 = quarters_sorted[:4]
group2 = quarters_sorted[4:8]
group3 = quarters_sorted[8:]

fig = make_subplots(rows=3, cols=1, shared_xaxes=False, shared_yaxes=True,
                    subplot_titles=["Q3/2022 - Q2/2023", "Q3/2023 - Q2/2024", "Q3/2024 - Current"], vertical_spacing=0.06)

for q in group1:
    data_q = df_viz[df_viz['quarter'] == q]
    fig.add_trace(go.Violin(y=data_q['returns'], name=q, box_visible=True, line_color='#1f77b4', meanline_visible=True, points=False), row=1, col=1)

for q in group2:
    data_q = df_viz[df_viz['quarter'] == q]
    fig.add_trace(go.Violin(y=data_q['returns'], name=q, box_visible=True, line_color='#1f77b4', meanline_visible=True, points=False), row=2, col=1)

for q in group3:
    data_q = df_viz[df_viz['quarter'] == q]
    fig.add_trace(go.Violin(y=data_q['returns'], name=q, box_visible=True, line_color='#1f77b4', meanline_visible=True, points=False), row=3, col=1)
    
ymin = df_viz['returns'].min()
ymax = df_viz['returns'].max()

fig.update_yaxes(range=[ymin, ymax], row=1, col=1)
fig.update_yaxes(range=[ymin, ymax], row=2, col=1)
fig.update_yaxes(range=[ymin, ymax], row=3, col=1)

fig.update_layout(height=1200, title_text="Violin Plot by quarter")


fig.write_html("C:/aapl-report/RETURN/ViolinPlotQuarter/index.html")


Converting to PeriodArray/Index representation will drop timezone information.



In [None]:
fig = go.Figure()

for q in quarters_sorted:
    data_q = df_viz[df_viz['quarter'] == q]
    fig.add_trace(go.Violin(
        x=data_q['returns'],
        y=[q]*len(data_q),
        orientation='h',
        box_visible=True,
        meanline_visible=True,
        points=False,
        line_color='steelblue',
        fillcolor='lightsteelblue',
        opacity=0.7
    ))

fig.update_layout(title='Violin Plot of Returns', height=1000)


fig.write_html("C:/aapl-report/RETURN/ViolinPlot/index.html")

Volatility

In [73]:
monthly_volatility = df_for_returns.groupby('month')['returns'].std().reset_index()
monthly_volatility['month'] = monthly_volatility['month'].dt.to_timestamp()
monthly_volatility['year'] = monthly_volatility['month'].dt.year
monthly_volatility['month_num'] = monthly_volatility['month'].dt.month

In [198]:
heatmap_data = monthly_volatility.pivot(
    index='year',
    columns='month_num',
    values='returns'
).sort_index(ascending=False)

fig = go.Figure(data=go.Heatmap(
    z=heatmap_data.values,
    x=[str(m) for m in heatmap_data.columns],
    y=[str(y) for y in heatmap_data.index],
    colorscale='YlGnBu',
    colorbar=dict(title='Volatility'),
    text = [[f"{v:.2%}" if not pd.isna(v) else "" for v in row] for row in heatmap_data.values],
    hovertemplate="Month: %{x}<br>Year: %{y}<br>Volatility: %{z:.4f}<extra></extra>",
    showscale=True
))

fig.update_traces(texttemplate="%{text}", textfont_size=12)

fig.update_layout(
    title="Monthly Volatility Heatmap",
    xaxis_title="Month",
    yaxis_title="Year",
    height=700, width=1700
)
fig.show()
fig.write_html("C:/aapl-report/Risk/Volatility/index.html")

Drawdown

In [113]:
df_for_returns['cummax'] = df_for_returns['close'].cummax()
df_for_returns['drawdown'] = (df_for_returns['close'] - df_for_returns['cummax']) / df_for_returns['cummax']
df_for_returns['month'] = df_for_returns['date'].dt.to_period('M').astype(str)


Converting to PeriodArray/Index representation will drop timezone information.



In [199]:
df_for_returns['year'] = df_for_returns['month'].str[:4]
df_for_returns['mon'] = df_for_returns['month'].str[5:]

monthly_dd = df_for_returns.groupby('month')['drawdown'].min().reset_index()
monthly_dd['year'] = monthly_dd['month'].str[:4]
monthly_dd['mon'] = monthly_dd['month'].str[5:]

heatmap_data = monthly_dd.pivot(index='year', columns='mon', values='drawdown')


fig = px.imshow(
    heatmap_data,
    labels=dict(x="Month", y="Year", color="Max Drawdown"),
    x=heatmap_data.columns,
    y=heatmap_data.index,
    color_continuous_scale=px.colors.sequential.Reds[::-1],
    text_auto=".2%"
)

fig.update_layout(title="Monthly Max Drawdown Heatmap", xaxis_title="Month", coloraxis_colorbar=dict(title="Drawdown"), autosize=False, yaxis_autorange="reversed", height=800, width=1700)

fig.show()

fig.write_html("C:/aapl-report/Risk/Drawdown/index.html")

In [201]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_for_returns['date'], y=df_for_returns['close'], name='Close', line=dict(color='black'), yaxis='y1'))

fig.add_trace(go.Scatter(x=df_for_returns['date'], y=df_for_returns['cummax'],name='Cumulative Max',line=dict(color='red', dash='dot'),yaxis='y1'))

fig.add_trace(go.Scatter(x=df_for_returns['date'], y=df_for_returns['drawdown'],name='Drawdown %',fill='tozeroy',fillcolor='rgba(0,100,255,0.3)',line=dict(color='blue'),yaxis='y2'))

fig.add_trace(go.Scatter(x=df_for_returns['date'], y=df_for_returns['returns'], name='Return %', mode='lines', line=dict(color='green'), yaxis='y2'))

close_min = df_for_returns['close'].min()
close_max = df_for_returns['close'].max()
range_close = close_max - close_min

yaxis_range = [close_min - 0.5 * range_close, close_max]
yaxis2_range = [-0.35, 0.8]

fig.update_layout(
    title="Drawdown",
    xaxis=dict(title="Date"),

    yaxis=dict(
        title="Price",
        side="left",
        range=yaxis_range
    ),
    yaxis2=dict( 
        title="Drawdown %",
        overlaying="y", 
        side="right",
        tickformat=".0%",
        range=yaxis2_range,
        showgrid=False
    ),
    height=900, width=1600,
    legend=dict(x=0.01, y=0.99)
)

fig.show()

fig.write_html("C:/aapl-report/Risk/Visualize/index.html")

Tháng 4–6/2025: đường drawdown tụt mạnh nhất, vượt −30%.
Khả năng hồi phục sau mỗi cú rơi
Năm 2023: giá giảm mạnh, drawdown lớn, nhưng hồi phục khá nhanh và vượt đỉnh cũ → AAPL vẫn trong trạng thái “khỏe”.

Đợt gần đây (2025): giá chưa hề trở lại đỉnh → có thể cổ phiếu bắt đầu suy yếu hoặc thị trường vào giai đoạn chỉnh sâu hơn.


Correlation

In [None]:
df['returns'] = df['close'].pct_change()

cols = ['close', 'sma_20', 'sma_50', 'ema_20', 'rsi', 'volume', 'returns', 'abs_returns']

fig = go.Figure()
corr_matrix = df[cols].corr().round(2)

fig = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,
    x=cols,
    y=cols,
    colorscale='RdBu',
    zmin=-1,
    zmax=1,
    colorbar=dict(title='Correlation'),
    text=corr_matrix.values,
    hovertemplate="Correlation between %{y} and %{x}: %{z:.2f}<extra></extra>"
))

fig.update_layout(
    title="Correlation Heatmap (1 Year)",
    height=750,
    width=800
)

fig.show()

fig.write_html("C:/aapl-report/Correlation/Heatmap/index.html")

In [204]:
fig = go.Figure()

x = df['close']
y = df['volume']

mask = x.notna() & y.notna()
x = x[mask]
y = y[mask]

slope, intercept = np.polyfit(x, y, 1)
regression_line = slope * x + intercept


fig.add_trace(go.Scatter(x=df['close'], y=df['volume'], mode='markers', marker=dict(size=6, opacity=0.6), hovertemplate='Abs Return: %{x:.4f}<br>Volume: %{y:.0f}<extra></extra>'))

fig.add_trace(go.Scatter(
    x=x,
    y=regression_line,
    mode='lines',
    name='Linear Regression',
    line=dict(color='red'),
    hoverinfo='skip'
))

fig.update_layout(
    title='Close vs Volume',
    xaxis_title='Close',
    yaxis_title='Volume',
    height=800,
    width=1700
)

fig.show()

fig.write_html("C:/aapl-report/Correlation/ClosevsVolume/index.html")

In [None]:
df['abs_returns'] = df['returns'].abs()

x = df['abs_returns']
y = df['volume']

mask = x.notna() & y.notna()
x = x[mask]
y = y[mask]

# Hồi quy tuyến tính
slope, intercept = np.polyfit(x, y, 1)
regression_line = slope * x + intercept

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x,
    y=y,
    mode='markers',
    name='Data',
    marker=dict(size=6, opacity=0.6),
    hovertemplate='Abs Return: %{x:.4f}<br>Volume: %{y:.0f}<extra></extra>'
))

fig.add_trace(go.Scatter(
    x=x,
    y=regression_line,
    mode='lines',
    name='Linear Regression',
    line=dict(color='red'),
    hoverinfo='skip'
))

fig.update_layout(
    title='Scatter Plot: Absolute Returns vs Volume',
    xaxis_title='Absolute Returns',
    yaxis_title='Volume',
    height=800,
    width=1700
)

fig.show()

fig.write_html("C:/aapl-report/Correlation/ReturnsvsVolume/index.html")

In [206]:
x = df['close']
y = df['rsi']

mask = x.notna() & y.notna()
x = x[mask]
y = y[mask]

slope, intercept = np.polyfit(x, y, 1)
regression_line = slope * x + intercept

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x,
    y=y,
    mode='markers',
    name='Data',
    marker=dict(size=6, opacity=0.6),
    hovertemplate='Close: %{x:.2f}<br>RSI: %{y:.2f}<extra></extra>'
))

fig.add_trace(go.Scatter(
    x=x,
    y=regression_line,
    mode='lines',
    name='Linear Regression',
    line=dict(color='red'),
    hoverinfo='skip'
))

fig.update_layout(
    title='Scatter Plot: Close vs RSI',
    xaxis_title='Close Price',
    yaxis_title='RSI',
    height=800,
    width=1700
)

fig.show()

fig.write_html("C:/aapl-report/Correlation/RSIvsClose/index.html")