# 讀取資料

In [1]:
import pandas as pd
import numpy as np

In [2]:
merge_data = pd.read_excel('merge_data_fill_na.xlsx')

In [3]:
col = ['ret', 'size_ln', 'op_exp_ln',
       'ROA', 'ROE', 'op_pm', 
       'AT', 'FCF_sd', 'Lev', 'DY', 'TQ']

In [4]:
merge_data["size_ln"] = merge_data["size"].apply(np.log)
merge_data["op_exp_ln"] = merge_data["op_exp"].apply(np.log)

In [5]:
# 定義標準化函數
def standardize(group):
    return (group - group.mean()) / group.std()

# 按股票代號標準化 FCF
merge_data['FCF_sd'] = merge_data.groupby('code')['FCF'].transform(standardize)

In [6]:
research_data = merge_data[col]
research_data = research_data.dropna()

# 要跑模型的變數

In [8]:
# 應變數 Y
y = research_data['ret']

# 自變數 X
X = research_data[["TQ", "size_ln", "FCF_sd", "Lev", "ROE", "op_exp_ln", "DY"]]

# 迴歸模型建立

In [11]:
### https://www.statsmodels.org/dev/install.html
# !pip install statsmodels

In [12]:
import statsmodels.api as sm

In [15]:
## 建立迴歸模型
model = sm.OLS(y, sm.add_constant(X)) 
## Fit model
result = model.fit()

In [16]:
result.summary()

0,1,2,3
Dep. Variable:,ret,R-squared:,0.001
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.994
Date:,"Fri, 15 Nov 2024",Prob (F-statistic):,0.052
Time:,02:35:26,Log-Likelihood:,-18300.0
No. Observations:,9716,AIC:,36620.0
Df Residuals:,9708,BIC:,36670.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.1480,0.132,1.125,0.261,-0.110,0.406
TQ,-0.1788,0.075,-2.385,0.017,-0.326,-0.032
size_ln,0.0947,0.037,2.574,0.010,0.023,0.167
FCF_sd,-0.0141,0.018,-0.808,0.419,-0.048,0.020
Lev,0.0696,0.030,2.284,0.022,0.010,0.129
ROE,0.0084,0.004,1.960,0.050,1.54e-07,0.017
op_exp_ln,-0.0666,0.026,-2.597,0.009,-0.117,-0.016
DY,-0.0233,0.016,-1.482,0.138,-0.054,0.008

0,1,2,3
Omnibus:,2987.942,Durbin-Watson:,1.766
Prob(Omnibus):,0.0,Jarque-Bera (JB):,44964.294
Skew:,1.062,Prob(JB):,0.0
Kurtosis:,13.323,Cond. No.,182.0


In [None]:
research_data.describe()

## 偏態 與 峰態

In [None]:
from scipy import stats

In [None]:
## 偏態
skew = stats.skew(research_data)
## 峰態 fisher=False 是 Eviews 作法
kurtosis = stats.kurtosis(research_data, fisher=False)

print('skew      :  {}'.format(skew))
print('kurtosis  :  {}'.format(kurtosis))