- 單年的 return 散佈圖+折線圖
- 單年每季的箱型圖
- 各年每月標準差

In [2]:
import pandas as pd
import numpy as np
import datetime
import plotly.express as px
import plotly.graph_objects as go
# from collections import defaultdict

In [6]:
data = pd.read_csv('https://github.com/vanikk06/NOMURA_PROJECT/raw/master/Analysis/Statistics_of_Quantify/Nomura_Global_Equity_Fund_c.csv')

- 【野村環球基金-臺幣累積】：1997/1/4 ~ 2020/4/9
- 🧡假日無資料
- 共5874筆

In [7]:
data = data.drop(['Return(1)', 'Return(10)'], axis=1)
data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')
season = {'s1':[1,2,3], 's2':[4,5,6], 's3':[7,8,9], 's4':[10,11,12]}   
seasonal = []
for j in range(data['Month'].size):
    if data['Month'][j] in season['s1']:
        seasonal.append('s1')
    elif data['Month'][j] in season['s2']:
        seasonal.append('s2')
    elif data['Month'][j] in season['s3']:
        seasonal.append('s3')
    elif data['Month'][j] in season['s4']:
        seasonal.append('s4')
data['Seasonal'] = seasonal

data.head()

Unnamed: 0,Date,Year,Month,Day,Price,Seasonal
0,1997-01-04,1997,1,4,12.14,s1
1,1997-01-06,1997,1,6,12.13,s1
2,1997-01-07,1997,1,7,12.12,s1
3,1997-01-08,1997,1,8,12.17,s1
4,1997-01-09,1997,1,9,12.2,s1


### 單年的 return 分佈圖

In [107]:
def single_year_distribution(return_range=None, year=1997):    
    if not return_range:
        data['Return_1'] = data['Price'].pct_change()*100
        ReturnName = 'Return_1'
    else:
        ReturnName = 'Return_{}'.format(return_range)
        data[ReturnName] = data['Price'].pct_change(return_range)*100
        
    # return & mean
    Return_mean_df = data[['Year', ReturnName]]
    Return_mean_df = Return_mean_df.groupby('Year').mean() #各年的平均數
    Return_mean_dict = Return_mean_df.to_dict()
    data['Year_mean'] = data['Year'].map(Return_mean_dict[ReturnName])
    
    #year
    SingleYear = data[data['Year'] == year]
    Return_std_df = data[['Year', ReturnName]]
    
    #plot
    line = go.Scatter(x=SingleYear['Date'], y=SingleYear[ReturnName], mode='lines+markers', name=ReturnName)
    line_mean = go.Scatter(x=SingleYear['Date'], y=SingleYear['Year_mean'], mode='lines', name='mean')
    fig = go.Figure([line, line_mean])
    fig.update_layout(title = str(year) + ' 年 ' + ReturnName + ' 的分佈圖', 
                      xaxis_title = 'Date',
                      yaxis_title = ReturnName)
    fig.show()
    return SingleYear

In [108]:
single_year_distribution(10, 1997)

Unnamed: 0,Date,Year,Month,Day,Price,Seasonal,Return_10,Year_mean
0,1997-01-04,1997,1,4,12.14,s1,,-0.39592
1,1997-01-06,1997,1,6,12.13,s1,,-0.39592
2,1997-01-07,1997,1,7,12.12,s1,,-0.39592
3,1997-01-08,1997,1,8,12.17,s1,,-0.39592
4,1997-01-09,1997,1,9,12.20,s1,,-0.39592
5,1997-01-10,1997,1,10,12.08,s1,,-0.39592
6,1997-01-11,1997,1,11,11.98,s1,,-0.39592
7,1997-01-13,1997,1,13,11.97,s1,,-0.39592
8,1997-01-14,1997,1,14,12.11,s1,,-0.39592
9,1997-01-15,1997,1,15,12.10,s1,,-0.39592


### 單年每季的箱型圖

In [105]:
def single_year_seasonal(return_range=None, year=1997):
    if not return_range:
        data['Return_1'] = data['Price'].pct_change()*100
        ReturnName = 'Return_1'
    else:
        ReturnName = 'Return_{}'.format(return_range)
        data[ReturnName] = data['Price'].pct_change(return_range)*100
    
    #year & std
    Season_std = data[data['Year'] == year]
    Season_std = Season_std[['Year', 'Seasonal', ReturnName]]
    Season_std_df = Season_std.groupby(['Year', 'Seasonal']).std(ddof=0)
    
    #plot
    fig = px.violin(Season_std, x='Seasonal', y=ReturnName, box=True, points='outliers')
    fig.update_layout(title = str(year) + ' 年' + ReturnName + '四季箱型圖', 
                      xaxis_title = 'Season',
                      yaxis_title = ReturnName)
    fig.show()
    
    return Season_std_df

In [106]:
single_year_seasonal(10, 1997)

Unnamed: 0_level_0,Unnamed: 1_level_0,Return_10
Year,Seasonal,Unnamed: 2_level_1
1997,s1,3.936812
1997,s2,1.693532
1997,s3,4.786684
1997,s4,2.565381


### 各年每月標準差

In [162]:
def yearStdBarChart(return_range, start, end):
    '''Plot standard deviation bar chart between start year and end year'''
    monthStr = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
    ReturnName = 'Return_{}'.format(return_range)
    data[ReturnName] = data['Price'].pct_change(return_range)*100
    stdDataFrame = data.groupby(["Year", "Month"]).std(ddof=0)
    fig = go.Figure()
    
    for i in range (end - start + 1):
        fig.add_trace(go.Bar(x = monthStr, y = stdDataFrame.loc[start + i][ReturnName].tolist(), name = str(start + i)))
    fig.update_layout(title="Year Standard Deviation", xaxis_title="Month", yaxis_title="Std")
    fig.show()

In [166]:
yearStdBarChart(10, 1997, 2010)

---

各年每月的標準差

In [150]:
month_std = data[['Year', 'Month', 'Return_10']]
# month_std
month_std_df = month_std.groupby(['Year', 'Month']).std(ddof=0).reset_index()
month_std_df['Year'] = month_std_df['Year'].astype('object')
month_std_df['Month'] = month_std_df['Month'].astype('object')
month_std_df.head()
# month_std_df

Unnamed: 0,Year,Month,Return_10
0,1997,1,0.866681
1,1997,2,3.916207
2,1997,3,3.92605
3,1997,4,1.180831
4,1997,5,1.409766


In [149]:
px.violin(month_std_df, x='Month', y='Return_10', box=True)

In [48]:
season = {'s1':[1,2,3], 's2':[4,5,6], 's3':[7,8,9], 's4':[10,11,12]}   
seasonal = []
for j in range(data['Month'].size):
    if data['Month'][j] in season['s1']:
        seasonal.append('s1')
    elif data['Month'][j] in season['s2']:
        seasonal.append('s2')
    elif data['Month'][j] in season['s3']:
        seasonal.append('s3')
    elif data['Month'][j] in season['s4']:
        seasonal.append('s4')
data['Seasonal'] = seasonal

In [49]:
data['Return_10'] =  data['Price'].pct_change(10)*100
data.tail()

Unnamed: 0,Date,Year,Month,Day,Price,Seasonal,Return_10
5869,2020-04-01,2020,4,1,16.53,s2,5.353728
5870,2020-04-06,2020,4,6,17.45,s2,10.094637
5871,2020-04-07,2020,4,7,17.41,s2,12.759067
5872,2020-04-08,2020,4,8,17.83,s2,18.15772
5873,2020-04-09,2020,4,9,17.99,s2,10.639606


In [76]:
Season_std = data[data['Year'] == 1997]
Season_std = Season_std[['Year', 'Seasonal','Return_10']]
# Season_std
Season_std_df = Season_std.groupby(['Year', 'Seasonal']).std(ddof=0)
Season_std_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Return_10
Year,Seasonal,Unnamed: 2_level_1
1997,s1,3.936812
1997,s2,1.693532
1997,s3,4.786684
1997,s4,2.565381


In [45]:
px.violin(data_1997, x='Seasonal', y='Return_10', box=True, points='outliers')

In [124]:
#設定要的 return範圍
return_range = 5
data['Return_{}'.format(return_range)] = data['Price'].pct_change(return_range)*100

In [127]:
Return_std_dict = Return_std_df.to_dict()
Return_std_dict
# Return_std_dict['Return_10'][1997]

{'Return_5': {1997: 2.690258306978477,
  1998: 3.182998237553209,
  1999: 2.5620306192430444,
  2000: 3.1864318891472756,
  2001: 3.3103203579511344,
  2002: 3.010971229770601,
  2003: 2.055002014768955,
  2004: 1.2893827586285267,
  2005: 1.073367455040242,
  2006: 1.3672870214202977,
  2007: 1.8880822060754319,
  2008: 3.304727353125022,
  2009: 2.6806587500831847,
  2010: 2.0189722358231825,
  2011: 2.4456180955735003,
  2012: 1.7767296954577207,
  2013: 1.4044132380660053,
  2014: 1.4916695277753147,
  2015: 1.7033930958283467,
  2016: 1.4551868953739038,
  2017: 1.0140580558543557,
  2018: 2.3082978534683254,
  2019: 1.4925795273785434,
  2020: 5.643486590371704}}

In [128]:
Return_std_df = data[['Year', 'Return_{}'.format(return_range)]]
Return_std_df = Return_std_df.groupby('Year').std(ddof=0) #各年的標準差
data['Year_std'] = data['Year'].map(Return_std_dict['Return_{}'.format(return_range)])

In [129]:
data.head(20)

Unnamed: 0,Date,Year,Month,Day,Price,Return_5,Year_std
0,1997-01-04,1997,1,4,12.14,,2.690258
1,1997-01-06,1997,1,6,12.13,,2.690258
2,1997-01-07,1997,1,7,12.12,,2.690258
3,1997-01-08,1997,1,8,12.17,,2.690258
4,1997-01-09,1997,1,9,12.2,,2.690258
5,1997-01-10,1997,1,10,12.08,-0.494234,2.690258
6,1997-01-11,1997,1,11,11.98,-1.236603,2.690258
7,1997-01-13,1997,1,13,11.97,-1.237624,2.690258
8,1997-01-14,1997,1,14,12.11,-0.493016,2.690258
9,1997-01-15,1997,1,15,12.1,-0.819672,2.690258


In [155]:
data_1997.groupby('Year').mean()

Unnamed: 0_level_0,Month,Day,Price,Return_10,Year_std
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1997,6.585366,16.069686,11.131394,-0.39592,3.619078


In [44]:
data_1997 = data[data['Year'] == 1997]
data_1997.head()

Unnamed: 0,Date,Year,Month,Day,Price,Seasonal,Return_10,Year_mean
0,1997-01-04,1997,1,4,12.14,s1,,-0.39592
1,1997-01-06,1997,1,6,12.13,s1,,-0.39592
2,1997-01-07,1997,1,7,12.12,s1,,-0.39592
3,1997-01-08,1997,1,8,12.17,s1,,-0.39592
4,1997-01-09,1997,1,9,12.2,s1,,-0.39592


In [88]:
line = go.Scatter(x=data_1997['Date'], y=data_1997['Return_10'], mode='lines+markers', name='Return_10')
line_std = go.Scatter(x=data_1997['Date'], y=data_1997['Year_std'], mode='lines', name='std')
fig = go.Figure([line, line_std])
fig.update_layout(title = '1997年 return_10 的散佈圖', 
                  xaxis_title = 'Date',
                  yaxis_title = 'return_10')
fig.show()

---

In [83]:
std_1997 = data_1997['Return_10'].std(ddof=0)
std_1997

3.619078049742046