### SAS 코드 구현(ADJRET, B/M)

In [1]:
import numpy as np
import pandas as pd
import datetime

In [2]:
Compustat = pd.read_csv("./compustat_permno.csv")
np.sum(Compustat['pstkrv']<0)

21

In [3]:
#permno 열 Nan이 아닌 행만 선택
Compustat = Compustat[Compustat['permno'].notna()]

# datadate 열을 datetime 형식으로 변환 후 연도 추출 
Compustat['datadate'] = pd.to_datetime(Compustat['datadate'].astype(str), format='%Y%m%d')
Compustat['year'] = Compustat['datadate'].dt.year
Compustat = Compustat.loc[Compustat['permno'] != '0']

#pstkrv열 음수인 값 0으로 변환
Compustat['pstkrv'] = np.where(Compustat['pstkrv'] < 0, 0, Compustat['pstkrv'])

#BVPS: Book value of preferred stock
Compustat['BVPS'] = Compustat['pstkrv'].fillna(Compustat['pstkl']).fillna(Compustat['pstkl']).fillna(Compustat['pstk']).fillna(0)
# BE = SEQ + TXDB + ITCB + BVPS
Compustat['BE'] = Compustat['seq'] + Compustat['txdb'] + Compustat['itcb'].fillna(0) - Compustat['BVPS']
# BE<0이면 NaN처리
Compustat['BE'] = Compustat['BE'].where(Compustat['BE']>0)

compustat_BE = Compustat[['gvkey', 'datadate', 'year', 'BE', 'permno', 'permco' ]]
compustat_BE


Unnamed: 0,gvkey,datadate,year,BE,permno,permco
9,1000,1970-12-31,1970,10.544,25881.0,23369.0
10,1000,1971-12-31,1971,8.382,25881.0,23369.0
11,1000,1972-12-31,1972,7.309,25881.0,23369.0
12,1000,1973-12-31,1973,8.798,25881.0,23369.0
13,1000,1974-12-31,1974,8.279,25881.0,23369.0
...,...,...,...,...,...,...
434210,296318,2012-12-31,2012,2908.515,13013.0,53885.0
434218,296753,2012-12-31,2012,,13255.0,53991.0
434223,296885,2012-12-31,2012,19.018,13707.0,54281.0
434228,297209,2011-12-31,2011,,13104.0,53928.0


In [4]:
compustat_BE = compustat_BE.sort_values(by=['gvkey', 'permno', 'year', 'datadate'])

compustat_BE = compustat_BE.drop_duplicates(subset=['gvkey', 'permno', 'year'], keep='last')

compustat_BE

Unnamed: 0,gvkey,datadate,year,BE,permno,permco
9,1000,1970-12-31,1970,10.544,25881.0,23369.0
10,1000,1971-12-31,1971,8.382,25881.0,23369.0
11,1000,1972-12-31,1972,7.309,25881.0,23369.0
12,1000,1973-12-31,1973,8.798,25881.0,23369.0
13,1000,1974-12-31,1974,8.279,25881.0,23369.0
...,...,...,...,...,...,...
434210,296318,2012-12-31,2012,2908.515,13013.0,53885.0
434218,296753,2012-12-31,2012,,13255.0,53991.0
434223,296885,2012-12-31,2012,19.018,13707.0,54281.0
434228,297209,2011-12-31,2011,,13104.0,53928.0


In [5]:
crsp = pd.read_csv("./CRSP_M.csv")
crsp.columns = crsp.columns.str.lower()
crsp

Unnamed: 0,date,dlstcd,permno,shrcd,exchcd,siccd,dlret,permco,prc,vol,ret,shrout,altprc,rf
0,19610131,,10006,10,1,3740.0,,22156,50.25,939.0,0.322368,1420.0,50.2500,0.0019
1,19610131,,10014,10,1,3710.0,,22157,4.00,395.0,0.000000,2504.0,4.0000,0.0019
2,19610131,,10030,10,1,3310.0,,22160,41.75,280.0,0.087948,1627.0,41.7500,0.0019
3,19610131,,10057,11,1,3540.0,,20020,54.00,152.0,0.142857,500.0,54.0000,0.0019
4,19610131,,10102,10,1,2810.0,,22164,79.50,480.0,0.032468,3965.0,79.5000,0.0019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2921188,20121231,574.0,76999,11,3,7372.0,-0.765517,11056,,123365.0,,6855.0,0.3120,0.0001
2921189,20121231,580.0,93007,11,3,9999.0,-0.774834,53201,,121619.0,,57097.0,0.6307,0.0001
2921190,20121231,584.0,38790,11,2,1311.0,-0.762470,1933,,21350.0,,19048.0,0.3321,0.0001
2921191,20121231,584.0,89761,11,2,3714.0,2.520000,44123,,39636.0,,7107.0,0.3700,0.0001


In [6]:
crsp['Meq'] = np.where((crsp['altprc'].abs() > 0) & (crsp['shrout'] > 0), crsp['altprc'].abs() * crsp['shrout'] / 1000, np.nan)
crsp['date'] = pd.to_datetime(crsp['date'].astype(str), format='%Y%m%d')
crsp['year'] = crsp['date'].dt.year
print(crsp.shape)
crsp.head()

(2921193, 16)


Unnamed: 0,date,dlstcd,permno,shrcd,exchcd,siccd,dlret,permco,prc,vol,ret,shrout,altprc,rf,Meq,year
0,1961-01-31,,10006,10,1,3740.0,,22156,50.25,939.0,0.322368,1420.0,50.25,0.0019,71.355,1961
1,1961-01-31,,10014,10,1,3710.0,,22157,4.0,395.0,0.0,2504.0,4.0,0.0019,10.016,1961
2,1961-01-31,,10030,10,1,3310.0,,22160,41.75,280.0,0.087948,1627.0,41.75,0.0019,67.92725,1961
3,1961-01-31,,10057,11,1,3540.0,,20020,54.0,152.0,0.142857,500.0,54.0,0.0019,27.0,1961
4,1961-01-31,,10102,10,1,2810.0,,22164,79.5,480.0,0.032468,3965.0,79.5,0.0019,315.2175,1961


In [7]:
#if dlret missing, Use Shumway (1997)
crsp['dlret'] = np.where(crsp['dlstcd'].notna() & crsp['dlret'].isna() & ((crsp['dlstcd'].isin([500, 520, 574, 580, 584])) | 
                ((crsp['dlstcd'] >= 551) & (crsp['dlstcd'] <= 573))), -0.3, 
                np.where(crsp['dlstcd'].notna() & crsp['dlret'].isna(), -1, crsp['dlret']))

count = crsp[(crsp['dlret'].isna()) & (crsp['dlstcd'].notna())].shape[0]
print(count)
crsp

0


Unnamed: 0,date,dlstcd,permno,shrcd,exchcd,siccd,dlret,permco,prc,vol,ret,shrout,altprc,rf,Meq,year
0,1961-01-31,,10006,10,1,3740.0,,22156,50.25,939.0,0.322368,1420.0,50.2500,0.0019,71.355000,1961
1,1961-01-31,,10014,10,1,3710.0,,22157,4.00,395.0,0.000000,2504.0,4.0000,0.0019,10.016000,1961
2,1961-01-31,,10030,10,1,3310.0,,22160,41.75,280.0,0.087948,1627.0,41.7500,0.0019,67.927250,1961
3,1961-01-31,,10057,11,1,3540.0,,20020,54.00,152.0,0.142857,500.0,54.0000,0.0019,27.000000,1961
4,1961-01-31,,10102,10,1,2810.0,,22164,79.50,480.0,0.032468,3965.0,79.5000,0.0019,315.217500,1961
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2921188,2012-12-31,574.0,76999,11,3,7372.0,-0.765517,11056,,123365.0,,6855.0,0.3120,0.0001,2.138760,2012
2921189,2012-12-31,580.0,93007,11,3,9999.0,-0.774834,53201,,121619.0,,57097.0,0.6307,0.0001,36.011078,2012
2921190,2012-12-31,584.0,38790,11,2,1311.0,-0.762470,1933,,21350.0,,19048.0,0.3321,0.0001,6.325841,2012
2921191,2012-12-31,584.0,89761,11,2,3714.0,2.520000,44123,,39636.0,,7107.0,0.3700,0.0001,2.629590,2012


In [8]:
# Delisting이 있을 경우 조정 수익률 계산
crsp['retadj'] = np.where(crsp['dlstcd'].notna() & crsp['ret'].notna(),
                (1 + crsp['ret'])*(1 + crsp['dlret']) - 1,
                np.where(crsp['dlstcd'].notna() & crsp['ret'].isna(), crsp['dlret'], crsp['ret'])) 
# 초과 수익 계산
crsp['eretadj'] = crsp['retadj'] - crsp['rf']
crsp_M2 = crsp.sort_values(by=['Meq', 'permco', 'date'])
crsp_M2


Unnamed: 0,date,dlstcd,permno,shrcd,exchcd,siccd,dlret,permco,prc,vol,ret,shrout,altprc,rf,Meq,year,retadj,eretadj
1694053,1994-05-31,580.0,75961,11,3,3660.0,-0.025001,10252,,14609.0,,8.0,1.18750,0.0031,0.009500,1994,-0.025001,-0.028101
1684488,1994-04-29,,75961,11,3,3660.0,,10252,0.03125,62141.0,-0.750000,338.0,0.03125,0.0027,0.010563,1994,-0.750000,-0.752700
1157936,1987-01-30,,31908,10,3,7370.0,,1497,,970.0,,275.0,-0.06250,0.0042,0.017188,1987,,
822289,1982-01-29,,35933,11,3,6799.0,,1785,-0.04688,,-0.250000,591.0,-0.04688,0.0080,0.027706,1982,-0.250000,-0.258000
827437,1982-02-26,,35933,11,3,6799.0,,1785,-0.04688,,0.000000,591.0,-0.04688,0.0092,0.027706,1982,0.000000,-0.009200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369147,1974-02-28,,83097,10,3,6552.0,,25998,-4.12500,,-0.057143,0.0,-4.12500,0.0058,,1974,-0.057143,-0.062943
374282,1974-03-29,500.0,83097,10,3,6552.0,-0.545455,25998,,,,0.0,-3.87500,0.0056,,1974,-0.545455,-0.551055
881578,1982-12-31,,78233,11,3,,,26525,,,,11701.0,,0.0067,,1982,,
881580,1982-12-31,,78250,11,3,,,26858,,,,3092.0,,0.0067,,1982,,


In [9]:
crsp_M3 = crsp_M2.copy()

# 'date'와 'permco' 기준으로 그룹화하여 각 그룹 내의 개수 계산
crsp_M3['group_size'] = crsp_M3.groupby(['date', 'permco'])['Meq'].transform('size')

# 그룹 내 permco가 하나인 경우: ME 값을 Meq로 할당
crsp_M3['ME'] = np.where(crsp_M3['group_size'] == 1, crsp_M3['Meq'], np.nan)

# 그룹 내 permco가 두 개 이상인 경우: Meq의 누적합을 계산하여 ME에 할당
crsp_M3.loc[crsp_M3['group_size'] > 1, 'ME'] = crsp_M3.groupby(['date', 'permco'])['Meq'].cumsum()

# 'date', 'permco' 기준으로 그룹화 후 ME 값이 가장 큰 값이 마지막에 오도록 정렬
crsp_M3 = crsp_M3.sort_values(by=['date', 'permco', 'ME'], ascending=[True, True, False]) 

# 각 그룹의 첫 번째 행만 남기기 (가장 큰 ME 값을 가진 행이 첫 번째로 옴)
crsp_M3 = crsp_M3.drop_duplicates(subset=['date', 'permco'], keep='first').reset_index(drop=True)
crsp_M3 = crsp_M3.drop(columns=['group_size'])

crsp_M3


Unnamed: 0,date,dlstcd,permno,shrcd,exchcd,siccd,dlret,permco,prc,vol,ret,shrout,altprc,rf,Meq,year,retadj,eretadj,ME
0,1961-01-31,,17670,10,1,2080.0,,74,32.00,159.0,0.075000,955.0,32.00,0.0019,30.56000,1961,0.075000,0.073100,30.56000
1,1961-01-31,,18702,10,1,5810.0,,267,35.00,225.0,-0.075908,1587.0,35.00,0.0019,55.54500,1961,-0.075908,-0.077808,55.54500
2,1961-01-31,,68523,11,1,3830.0,,301,95.00,860.0,0.183801,2476.0,95.00,0.0019,235.22000,1961,0.183801,0.181901,235.22000
3,1961-01-31,,20714,10,1,5610.0,,584,19.75,119.0,0.025974,1688.0,19.75,0.0019,33.33800,1961,0.025974,0.024074,33.33800
4,1961-01-31,,11287,10,1,3740.0,,921,14.25,346.0,0.117647,1760.0,14.25,0.0019,25.08000,1961,0.117647,0.115747,25.08000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2892460,2012-12-31,,13730,11,1,2911.0,,54291,29.05,239843.0,,20500.0,29.05,0.0001,595.52500,2012,,,595.52500
2892461,2012-12-31,,13656,11,3,9999.0,,54517,32.56,21929.0,0.032667,23100.0,32.56,0.0001,752.13600,2012,0.032667,0.032567,752.13600
2892462,2012-12-31,,16852,11,3,3670.0,,56028,5.28,105896.0,0.900649,1511.0,5.28,0.0001,7.97808,2012,0.900649,0.900549,7.97808
2892463,2012-12-31,,17279,11,1,2111.0,,56249,116.67,196351.0,-0.037058,129445.0,116.67,0.0001,15102.34815,2012,-0.037058,-0.037158,15102.34815


##### SAS 6

In [10]:
crsp_M3 = crsp_M3.copy()

crsp_M3['date'] = pd.to_datetime(crsp_M3['date'], format='%Y-%m-%d')

# 6월 데이터 필터링
me_jun = crsp_M3[(crsp_M3['date'].dt.month == 6) & (crsp_M3['ME'].notna())].copy()
me_jun['t'] = crsp_M3['date'].dt.year  # 연도 추출
me_jun = me_jun[['permno', 't', 'ME']]  
me_jun.rename(columns={'ME': 'me_jun'}, inplace=True)

# 12월 데이터 필터링
me_last_Dec = crsp_M3[(crsp_M3['date'].dt.month == 12) & (crsp_M3['ME'].notna())].copy()
me_last_Dec['t'] = crsp_M3['date'].dt.year + 1  # 연도 추출 후 +1
me_last_Dec = me_last_Dec[['permno', 't', 'ME']]  
me_last_Dec.rename(columns={'ME': 'me_last_Dec'}, inplace=True)

# BE 데이터 필터링
be_last_year = compustat_BE[compustat_BE['BE'].notna()].copy()  # BE가 있는 데이터만 선택
be_last_year['t'] = be_last_year['year'] + 1  # 연도에 +1
be_last_year = be_last_year[['permno', 't', 'BE']]  # 필요한 열만 선택
be_last_year.rename(columns={'BE': 'be_last_year'}, inplace=True)

# permno, t 기준으로 정렬
me_jun = me_jun.sort_values(by=['permno', 't'])
me_last_Dec = me_last_Dec.sort_values(by=['permno', 't'])
be_last_year = be_last_year.sort_values(by=['permno', 't'])
me_jun, me_last_Dec, be_last_year

(         permno     t       me_jun
 1112263   10000  1986    11.734594
 1183863   10000  1987     0.851594
 1112264   10001  1986     6.033125
 1183864   10001  1987     5.822125
 1258733   10001  1988     6.200000
 ...         ...   ...          ...
 2778355   93435  2010   102.653400
 2825178   93435  2011    28.538400
 2778356   93436  2010  2220.908340
 2825179   93436  2011  3028.966530
 2870168   93436  2012  3295.556670
 
 [239521 rows x 3 columns],
          permno     t  me_last_Dec
 1147621   10000  1987     1.981566
 1147622   10001  1987     6.937000
 1221511   10001  1988     5.828000
 1295310   10001  1989     6.362250
 1365986   10001  1990    10.347750
 ...         ...   ...          ...
 2802015   93435  2011   155.017720
 2847854   93435  2012    23.864000
 2802016   93436  2011  2527.400040
 2847855   93436  2012  2987.490240
 2892130   93436  2013  3868.428180
 
 [242805 rows x 3 columns],
          permno     t  be_last_year
 221708  10000.0  1987         0.418
 2

In [11]:
%%time

"""
data ME_BM; 
    merge ME_Jun (in = a) BE_last_year (in = b) ME_last_Dec (in = c); 
    by permno t; 
    if a & b & c;
    BM = BE_last_year/ME_last_Dec; 
    keep permno t ME_Jun BM; 
run;
"""

# me_bm = pd.merge(me_jun, me_last_Dec, on=['permno', 't'], how='inner')
# me_bm = pd.merge(me_bm, be_last_year, on=['permno', 't'], how='inner')
# me_bm['bm'] = me_bm['be_last_year'] / me_bm['me_last_Dec']

# me_bm = me_bm[['permno', 't', 'me_jun', 'bm']]

# 모든 데이터프레임에서 permno를 int로 변환
me_jun['permno'] = me_jun['permno'].astype(int)
me_last_Dec['permno'] = me_last_Dec['permno'].astype(int)
be_last_year['permno'] = be_last_year['permno'].astype(int)

# BM 계산
me_bm = pd.merge(me_jun, me_last_Dec, on=['permno', 't'], how='inner')
me_bm = pd.merge(me_bm, be_last_year, on=['permno', 't'], how='inner')

# BM 값 계산 (Book-to-Market)
me_bm['bm'] = me_bm['be_last_year'] / me_bm['me_last_Dec']

# 필요한 열만 선택
me_bm = me_bm[['permno', 't', 'me_jun', 'bm']]

# 결과 출력
me_bm

CPU times: total: 31.2 ms
Wall time: 109 ms


Unnamed: 0,permno,t,me_jun,bm
0,10000,1987,0.851594,0.210944
1,10001,1987,5.822125,1.014415
2,10001,1988,6.200000,1.207618
3,10001,1989,7.007000,1.145192
4,10001,1990,10.052250,0.818149
...,...,...,...,...
174164,93434,2011,24.998000,0.796865
174165,93434,2012,36.358170,0.569317
174166,93435,2011,28.538400,0.540183
174167,93436,2011,3028.966530,0.081921


In [12]:
# NOTE: There were 174169 observations read from the data set WORK.ME_BM.
me_bm.shape

(174169, 4)

In [13]:
%%time

"""
* Match each permno's monthly return to the corresponding BM and ME;

data ret; 
    set CRSP_M3; 
    if month(date)>6 then t = year(date); 
    else t = year(date)-1; 
run; 
"""

crsp_M3['t'] = crsp_M3['date'].apply(lambda date: date.year if date.month > 6 else date.year-1)

# proc sort Mata = ret; by permno t date; run; 
crsp_M3 = crsp_M3.sort_values(by=['permno', 't', 'date'])

CPU times: total: 1.2 s
Wall time: 3.58 s


In [14]:
# The data set WORK.RET has 2892465 observations and 20 variables.
crsp_M3.shape

(2892465, 20)

In [15]:
%%time

"""
data ret_ME_BM; 
    merge ret (in = a) ME_BM (in = b); 
    by permno t; 
    if a; 
run;
"""

ret_me_bm = pd.merge(crsp_M3, me_bm, on=['permno', 't'], how='left')
ret_me_bm = ret_me_bm.drop_duplicates(subset=['permno', 'date', 'year'], keep='last')

CPU times: total: 312 ms
Wall time: 957 ms


In [16]:
# proc sort data = ret_ME_BM; by permno date; run; 
ret_me_bm = ret_me_bm.sort_values(by=['permno', 'date'])

In [17]:
# The data set WORK.RET_ME_BM has 2892465 observations and 22 variables.
ret_me_bm.shape

(2892465, 22)

In [18]:
%%time

"""
* Also add the mktcap and stock price from the previous month; 
data ret_ME_BM; 
    set ret_ME_BM;
    
    altprc_lag1 = lag1(altprc); 
    ME_lag1 = lag1(ME);

    permno_lag1 = lag1(permno); 
    date_lag1 = lag1(date);

    if (permno NE permno_lag1) or (intck('month',date_lag1,date)>1) then do; 
        altprc_lag1 = .; 
        ME_lag1 = .; 
            end; 
run;
"""

altprc_lag_df = pd.pivot_table(ret_me_bm, index='date', columns='permno', values='altprc').sort_index().shift(1)
altprc_lag = altprc_lag_df.reset_index().melt(id_vars='date', var_name='permno', value_name='altprc_lag1').dropna()

me_lag_df = pd.pivot_table(ret_me_bm, index='date', columns='permno', values='ME').sort_index().shift(1)
me_lag = me_lag_df.reset_index().melt(id_vars='date', var_name='permno', value_name='me_lag1').dropna()

ret_me_bm = pd.merge(ret_me_bm, altprc_lag, on=['date', 'permno'], how='left')
ret_me_bm = pd.merge(ret_me_bm, me_lag, on=['date', 'permno'], how='left')

CPU times: total: 3.02 s
Wall time: 9.92 s


In [19]:
# NOTE: The data set WORK.TT has 38518 observations and 26 variables.
ret_me_bm['altprc_lag1'].isna().sum()

38518

In [20]:
%%time

"""
* Exclude observations with missing values; 

data assignment1_data; 
    retain permno date year exchcd siccd retadj eretadj altprc_lag1 ME_lag1 ME_Jun BM; 
        set ret_ME_BM; 
        if nmiss(retadj, ME_lag1, ME_Jun, BM) = 0; 
        keep permno date year exchcd siccd retadj eretadj altprc_lag1 ME_lag1 ME_Jun BM;         
run;
"""

ret_me_bm = ret_me_bm[['permno', 'date', 'year', 'exchcd', 'siccd', 'retadj', 'eretadj', 'altprc_lag1', 'me_lag1', 'me_jun', 'bm']]
ret_me_bm = ret_me_bm.dropna(subset=['retadj', 'me_lag1', 'me_jun', 'bm'])

CPU times: total: 78.1 ms
Wall time: 318 ms


In [21]:
# The data set WORK.ASSIGNMENT1_DATA has 1983365 observations and 11
ret_me_bm

Unnamed: 0,permno,date,year,exchcd,siccd,retadj,eretadj,altprc_lag1,me_lag1,me_jun,bm
36,10001,1987-07-31,1987,3,4920.0,0.021277,0.016677,5.8750,5.822125,5.822125,1.014415
37,10001,1987-08-31,1987,3,4920.0,0.083333,0.078633,6.0000,5.946000,5.822125,1.014415
38,10001,1987-09-30,1987,3,4920.0,-0.022308,-0.026808,6.5000,6.441500,5.822125,1.014415
39,10001,1987-10-30,1987,3,4920.0,0.020000,0.014000,6.2500,6.200000,5.822125,1.014415
40,10001,1987-11-30,1987,3,4920.0,-0.029412,-0.032912,6.3750,6.324000,5.822125,1.014415
...,...,...,...,...,...,...,...,...,...,...,...
2892460,93436,2012-08-31,2012,3,9999.0,0.040117,0.040017,27.4200,2890.945440,3295.556670,0.074994
2892461,93436,2012-09-28,2012,3,9999.0,0.026648,0.026548,28.5200,3006.920640,3295.556670,0.074994
2892462,93436,2012-10-31,2012,3,9999.0,-0.039228,-0.039328,29.2800,3097.004160,3295.556670,0.074994
2892463,93436,2012-11-30,2012,3,9999.0,0.202215,0.202115,28.1314,3200.762561,3295.556670,0.074994


##### Save sample data and summary stats

In [22]:
ret_me_bm.iloc[:25]

Unnamed: 0,permno,date,year,exchcd,siccd,retadj,eretadj,altprc_lag1,me_lag1,me_jun,bm
36,10001,1987-07-31,1987,3,4920.0,0.021277,0.016677,5.875,5.822125,5.822125,1.014415
37,10001,1987-08-31,1987,3,4920.0,0.083333,0.078633,6.0,5.946,5.822125,1.014415
38,10001,1987-09-30,1987,3,4920.0,-0.022308,-0.026808,6.5,6.4415,5.822125,1.014415
39,10001,1987-10-30,1987,3,4920.0,0.02,0.014,6.25,6.2,5.822125,1.014415
40,10001,1987-11-30,1987,3,4920.0,-0.029412,-0.032912,6.375,6.324,5.822125,1.014415
41,10001,1987-12-31,1987,3,4920.0,-0.033535,-0.037435,6.1875,6.138,5.822125,1.014415
42,10001,1988-01-29,1988,3,4920.0,0.06383,0.06093,5.875,5.828,5.822125,1.014415
43,10001,1988-02-29,1988,3,4920.0,0.08,0.0754,6.25,6.2,5.822125,1.014415
44,10001,1988-03-31,1988,3,4920.0,-0.076296,-0.080696,6.75,6.696,5.822125,1.014415
45,10001,1988-04-29,1988,3,4920.0,0.030612,0.026012,6.125,6.076,5.822125,1.014415


In [23]:
from pandas.tseries.offsets import MonthEnd

ret_me_bm['date'] = ret_me_bm['date'] + MonthEnd(0)

summ_dates = [str(x) + '-12-31' for x in range(1970, 2013, 1)]
summ_stats1 = pd.DataFrame(index=summ_dates, columns=['mean', 'std', 'min', 'max', "N of permno's"])

for date in summ_dates:
    tmp_eretadj = ret_me_bm[ret_me_bm['date']==date]['eretadj']
    tmp_permno = ret_me_bm[ret_me_bm['date']==date]['permno']
    summ_stats1.loc[date] = [tmp_eretadj.mean(), tmp_eretadj.std(), tmp_eretadj.min(), tmp_eretadj.max(), len(tmp_permno.unique())]

summ_stats1

Unnamed: 0,mean,std,min,max,N of permno's
1970-12-31,0.08084,0.110938,-0.3042,0.707338,1659
1971-12-31,0.118065,0.111185,-0.221091,0.9963,1740
1972-12-31,-0.023024,0.089014,-0.414868,0.948024,1861
1973-12-31,-0.038037,0.133792,-0.501349,1.160267,2497
1974-12-31,-0.083865,0.132955,-0.691211,1.159667,3112
1975-12-31,-0.011791,0.121296,-1.0048,1.4952,3455
1976-12-31,0.095709,0.142698,-0.504,1.638857,3495
1977-12-31,0.01559,0.10888,-1.0049,1.267827,3477
1978-12-31,0.005084,0.116381,-0.9502,1.706486,3368
1979-12-31,0.054384,0.139125,-1.0095,1.4905,3330


In [24]:
specific_years = ['1970-12-31', '1980-12-31', '1990-12-31', '2000-12-31', '2010-12-31']

summ_stats_answer = summ_stats1.loc[specific_years]

summ_stats_answer

Unnamed: 0,mean,std,min,max,N of permno's
1970-12-31,0.08084,0.110938,-0.3042,0.707338,1659
1980-12-31,-0.049328,0.125828,-1.0131,2.272614,3425
1990-12-31,-0.001738,0.185068,-1.006,1.994,4039
2000-12-31,-0.027174,0.258517,-0.999667,3.245,4560
2010-12-31,0.082409,0.141006,-0.8441,2.400847,3269


In [25]:
import logging
from fpdf import FPDF

# Step 1: 로그 파일 작성
log_filename = 'program.log'
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    filename=log_filename,
    filemode='w'
)

logging.info("This is an info message")
logging.warning("This is a warning message")
logging.error("This is an error message")
logging.debug("This is a debug message")

# Step 2: 로그 파일을 PDF로 변환
class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(200, 10, 'Log Report', ln=True, align='C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

def log_to_pdf(log_file, output_pdf):
    pdf = PDF()
    pdf.add_page()

    pdf.set_font('Arial', '', 12)

    # 로그 파일 내용을 읽어서 PDF에 추가
    with open(log_file, 'r') as file:
        for line in file:
            pdf.cell(200, 10, line, ln=True)

    # PDF 파일 저장
    pdf.output(output_pdf)

# PDF 파일 생성
log_to_pdf(log_filename, 'log_report.pdf')

print("PDF 파일이 생성되었습니다.")


2024-09-23 03:16:04,168 - INFO - Logging to the console
