In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

## GET (SEMI-)ANNUAL AVERAGE RETURNS OVER TIME OF THE AVAILABLE FUNDS

In [41]:
import numpy as np

In [3]:
# Import CIKs from the dataframe of all cleaned filings
from utils import *
ciks = ', '.join(list(df_from_filings().cik.unique()))

In [5]:
# Get all cik number available (intersection between our dataframe and the data on CRSP)
ciks_data = db.raw_sql(f'''
        select distinct comp_cik
        from crsp_q_mutualfunds.crsp_cik_map
        where comp_cik in ({ciks})
''')
cik_available = list(ciks_data.comp_cik)
cik_available = ', '.join([str(i) for i in cik_available])

In [6]:
# With the mapping table of CRSP, get all companies' fund number
crsp_fundno_available = db.raw_sql(f'''
        select *
        from crsp_q_mutualfunds.crsp_cik_map
        where comp_cik in ({cik_available})
''')
crsp_fundno_available

Unnamed: 0,crsp_fundno,comp_cik,series_cik,contract_cik
0,3569.0,1830437.0,S000074043,C000231400
1,3691.0,1576367.0,S000013666,C000037424
2,3966.0,1707560.0,S000066202,C000213944
3,5463.0,1650149.0,S000054431,C000170953
4,5669.0,1750821.0,S000070728,C000224837
...,...,...,...,...
1782,99514.0,1587551.0,S000074761,C000232932
1783,99591.0,1810747.0,S000075090,C000233895
1784,99710.0,1587551.0,S000060087,C000206947
1785,99711.0,1587551.0,S000060087,C000225339


In [13]:
print(len(crsp_fundno_available['crsp_fundno'].unique()))

print(len(crsp_fundno_available['comp_cik'].unique()))

1787
183


In [7]:
# Convert the list of fund numbers to a list of string type for future SQL queries
list_float_fundnos = list(crsp_fundno_available.crsp_fundno.unique())
crsp_fundno_available_list_str = ', '.join([str(i) for i in list_float_fundnos])

In [14]:
# Get all available returns for each fund
monthly_returns_all_funds = db.raw_sql(f'''
        select *
        from crsp_q_mutualfunds.monthly_returns
        where crsp_fundno in ({crsp_fundno_available_list_str})
        order by caldt desc
''')
monthly_returns_all_funds = pd.merge(left=monthly_returns_all_funds,right=crsp_fundno_available, how='inner', left_on='crsp_fundno', right_on='crsp_fundno')
monthly_returns_all_funds

Unnamed: 0,crsp_fundno,caldt,mret,comp_cik,series_cik,contract_cik
0,88660.0,2022-03-31,0.025914,1604813.0,S000054995,C000172974
1,88660.0,2022-02-28,-0.019510,1604813.0,S000054995,C000172974
2,88660.0,2022-01-31,-0.063994,1604813.0,S000054995,C000172974
3,88660.0,2021-12-31,0.056790,1604813.0,S000054995,C000172974
4,88660.0,2021-11-30,-0.007847,1604813.0,S000054995,C000172974
...,...,...,...,...,...,...
90891,6157.0,1997-02-28,0.001866,1738073.0,S000062375,
90892,6157.0,1997-01-31,0.002886,1738073.0,S000062375,
90893,6157.0,1996-12-31,-0.005900,1738073.0,S000062375,
90894,6157.0,1996-11-29,0.011823,1738073.0,S000062375,


In [67]:
# Create a new dataframe with some statistic and the average return
monthly_returns_all_funds_aggregated = monthly_returns_all_funds[['caldt', 'mret', 'comp_cik', 'crsp_fundno']].groupby(by=['comp_cik','caldt']).agg(
    count=('crsp_fundno', 'count'),
    mret=('mret', 'mean'),
)

monthly_returns_all_funds_aggregated.sort_values(by=['comp_cik', 'caldt'], inplace=True)

# Create temporary column to compute later the cumulative return
monthly_returns_all_funds_aggregated['mret_add_1']=monthly_returns_all_funds_aggregated['mret']+1

# Compite the cumulative return of each CIK number
monthly_returns_all_funds_aggregated['cum_return']=monthly_returns_all_funds_aggregated.groupby(['comp_cik'])['mret_add_1'].cumprod()-1

# Display dataframe
display(monthly_returns_all_funds_aggregated)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mret,mret_add_1,cum_return
comp_cik,caldt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1567101.0,2013-10-31,1,,,
1567101.0,2013-11-29,1,-0.002499,0.997501,-0.002499
1567101.0,2013-12-31,1,-0.006302,0.993698,-0.008786
1567101.0,2014-01-31,1,0.014236,1.014236,0.005325
1567101.0,2014-02-28,1,0.005596,1.005596,0.010951
...,...,...,...,...,...
1860434.0,2021-11-30,2,-0.008976,0.991024,-0.013280
1860434.0,2021-12-31,3,0.016158,1.016158,0.002663
1860434.0,2022-01-31,3,-0.068211,0.931789,-0.065729
1860434.0,2022-02-28,3,-0.023373,0.976627,-0.087566


In [97]:
df_from_filings()

Unnamed: 0,cik,report_type,report_identity,file,text
0,0001566196,N-CSR,0001193125-13-485974,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
1,0001566196,N-CSR,0001193125-14-452923,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
2,0001566196,N-CSR,0001193125-16-805370,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
3,0001566196,N-CSR,0001193125-15-414451,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
4,0001566196,N-CSR,0001193125-17-380306,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
...,...,...,...,...,...
2639,0001864290,N-CSRS,0001821268-22-000009,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
2640,0001864843,N-CSR,0001193125-22-065581,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
2641,0001865389,N-CSRS,0001193125-22-098364,full-submission.txt,UNITED STATES SECURITIES AND EXCHANGE COMMISS...
2642,0001866278,N-CSR,0001580642-21-005722,full-submission.txt,united states securities and exchange commiss...


In [96]:
df = monthly_returns_all_funds_aggregated.reset_index()
df[df['caldt'].dt.month ==6]

AttributeError: Can only use .dt accessor with datetimelike values

In [92]:
monthly_returns_all_funds_aggregated.reset_index()['caldt'][0]

datetime.date(2013, 10, 31)