In [1]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='darkgrid')

from sklearn.metrics import r2_score

def get_fund_k_history(fund_code: str, pz: int = 1000) -> pd.DataFrame:
    '''
    根据基金代码和要获取的页码抓取基金净值信息

    Parameters
    ----------
    fund_code : 6位基金代码
    page : 页码 1 为最新页数据

    Return
    ------
    DataFrame : 包含基金历史k线数据
    '''
    # 请求头
    EastmoneyFundHeaders = {
        'User-Agent': 'EMProjJijin/6.2.8 (iPhone; iOS 13.6; Scale/2.00)',
        'GTOKEN': '98B423068C1F4DEF9842F82ADF08C5db',
        'clientInfo': 'ttjj-iPhone10,1-iOS-iOS13.6',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': 'fundmobapi.eastmoney.com',
        'Referer': 'https://mpservice.com/516939c37bdb4ba2b1138c50cf69a2e1/release/pages/FundHistoryNetWorth',
    }
    # 请求参数
    data = {
        'FCODE': f'{fund_code}',
        'appType': 'ttjj',
        'cToken': '1',
        'deviceid': '1',
        'pageIndex': '1',
        'pageSize': f'{pz}',
        'plat': 'Iphone',
        'product': 'EFund',
        'serverVersion': '6.2.8',
        'version': '6.2.8'
    }
    url = 'https://fundmobapi.eastmoney.com/FundMNewApi/FundMNHisNetList'
    json_response = requests.get(
        url, headers=EastmoneyFundHeaders, data=data).json()
    rows = []
    columns = ['date', 'NAV', 'NAV-Cum', 'change']
    if json_response is None:
        return pd.DataFrame(rows, columns=columns)
    datas = json_response['Datas']
    if len(datas) == 0:
        return pd.DataFrame(rows, columns=columns)
    rows = []
    for stock in datas:
        date = stock['FSRQ']
        rows.append({
            'date': date,
            'NAV': stock['DWJZ'],
            'NAV-Cum': stock['LJJZ'],
            'change': stock['JZZZL']
        })

    df = pd.DataFrame(rows)
    df['NAV'] = pd.to_numeric(df['NAV'], errors='coerce')

    df['NAV-Cum'] = pd.to_numeric(df['NAV-Cum'], errors='coerce')

    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    return df


In [2]:
fund_codes = {
    '003318': "500dibo - test", 
    '019359': "500dibo - hold", 
    '014907': "xiaofeidianzi - test",
    '018897': "xiaofeidianzi - hold",
    '008021': "rengongzhineng - test",
    '012734': "rengongzhineng - hold",
    '012737': "chuangxinyao - test",
    '019667': "chuangxinyao - hold",
    '501012': "zhongyao - test",
    '016892': "zhongyao - hold",
    '005562': "honglidibo - test",
    '020603': "honglidibo - hold",
    # '004424': "yule",
    # '004753': "chuanmei", -- not too correlated
}

# 遍历基金代码列表获取数据
l_dfs = []
for fund_code in fund_codes:
    # 调用函数获取基金历史净值数据
    fund_data = get_fund_k_history(fund_code)
    fund_data['code'] = fund_code
    fund_data['name'] = fund_codes[fund_code]
    l_dfs.append(fund_data)


df_fund = pd.concat(l_dfs)
df_fund.head()

Unnamed: 0,date,NAV,NAV-Cum,change,code,name
0,2024-04-09,1.3326,1.3326,0.61,3318,500dibo - test
1,2024-04-08,1.3245,1.3245,-1.38,3318,500dibo - test
2,2024-04-03,1.3431,1.3431,0.19,3318,500dibo - test
3,2024-04-02,1.3406,1.3406,0.05,3318,500dibo - test
4,2024-04-01,1.3399,1.3399,1.85,3318,500dibo - test


In [3]:
df_fund.groupby('name')['date'].min()

name
500dibo - hold          2023-09-11
500dibo - test          2020-02-28
chuangxinyao - hold     2023-11-14
chuangxinyao - test     2021-07-07
honglidibo - hold       2024-03-12
honglidibo - test       2020-02-28
rengongzhineng - hold   2022-03-01
rengongzhineng - test   2020-04-23
xiaofeidianzi - hold    2023-10-17
xiaofeidianzi - test    2022-02-16
zhongyao - hold         2022-11-11
zhongyao - test         2020-02-28
Name: date, dtype: datetime64[ns]

In [4]:
s1 = df_fund[(df_fund['name']=='500dibo - test') & (df_fund['date']>='2023-09-11')]['NAV']
s2 = df_fund[(df_fund['name']=='500dibo - hold') & (df_fund['date']>='2023-09-11')]['NAV']
X_b = np.c_[np.ones((len(s1), 1)), s1]  # Add a bias term (intercept) to feature matrix
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(s2)

intercept, slope = theta_best[0], theta_best[1]
print(s1.corr(s2))
print(intercept, slope)
r2_score(s2, intercept+slope*s1)

0.9999780427227951
-0.0073434564833321225 1.0048900481553327


0.9999560859277115

In [5]:
s1 = df_fund[(df_fund['name']=='chuangxinyao - test') & (df_fund['date']>='2023-11-24')]['NAV']
s2 = df_fund[(df_fund['name']=='chuangxinyao - hold') & (df_fund['date']>='2023-11-24')]['NAV']
X_b = np.c_[np.ones((len(s1), 1)), s1]  # Add a bias term (intercept) to feature matrix
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(s2)

intercept, slope = theta_best[0], theta_best[1]
print(s1.corr(s2))

print(intercept, slope)
r2_score(s2, intercept+slope*s1)

0.9998129071548827
0.024832455949158394 1.5806351127281


0.999625849313498

In [6]:
s1 = df_fund[(df_fund['name']=='rengongzhineng - test') & (df_fund['date']>='2022-03-11')]['NAV']
s2 = df_fund[(df_fund['name']=='rengongzhineng - hold') & (df_fund['date']>='2022-03-11')]['NAV']
X_b = np.c_[np.ones((len(s1), 1)), s1]  # Add a bias term (intercept) to feature matrix
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(s2)

intercept, slope = theta_best[0], theta_best[1]
print(s1.corr(s2))

print(intercept, slope)
r2_score(s2, intercept+slope*s1)

0.9919135986066606
0.03143138548282798 1.1966602735037515


0.983892587100815

In [7]:
s1 = df_fund[(df_fund['name']=='xiaofeidianzi - test') & (df_fund['date']>='2023-10-25')]['NAV']
s2 = df_fund[(df_fund['name']=='xiaofeidianzi - hold') & (df_fund['date']>='2023-10-25')]['NAV']
X_b = np.c_[np.ones((len(s1), 1)), s1]  # Add a bias term (intercept) to feature matrix
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(s2)

intercept, slope = theta_best[0], theta_best[1]
print(s1.corr(s2))

print(intercept, slope)
r2_score(s2, intercept+slope*s1)

0.9987492317340898
-0.043927512087530596 1.4333848766908623


0.9975000278894344

In [8]:
[e for e in df_fund[(df_fund['name']=='zhongyao - test') & (df_fund['date']>='2022-11-11')]['date'].unique()
 if e not in
 df_fund[(df_fund['name']=='zhongyao - hold') & (df_fund['date']>='2022-11-11')]['date'].unique()]


[Timestamp('2022-11-28 00:00:00'),
 Timestamp('2022-11-24 00:00:00'),
 Timestamp('2022-11-23 00:00:00'),
 Timestamp('2022-11-22 00:00:00'),
 Timestamp('2022-11-21 00:00:00'),
 Timestamp('2022-11-17 00:00:00'),
 Timestamp('2022-11-16 00:00:00'),
 Timestamp('2022-11-15 00:00:00'),
 Timestamp('2022-11-14 00:00:00')]

In [9]:
s1 = df_fund[(df_fund['name']=='zhongyao - test') & (df_fund['date']>='2022-11-29')]['NAV']
s2 = df_fund[(df_fund['name']=='zhongyao - hold') & (df_fund['date']>='2022-11-29')]['NAV']
X_b = np.c_[np.ones((len(s1), 1)), s1]  # Add a bias term (intercept) to feature matrix
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(s2)

intercept, slope = theta_best[0], theta_best[1]
print(s1.corr(s2))

print(intercept, slope)
r2_score(s2, intercept+slope*s1)

0.9982268575694311
0.011480447566149451 0.7644602284518763


0.9964568591729415

In [10]:
s1 = df_fund[(df_fund['name']=='honglidibo - test') & (df_fund['date']>='2024-03-14')]['NAV']
s2 = df_fund[(df_fund['name']=='honglidibo - hold') & (df_fund['date']>='2024-03-14')]['NAV']
X_b = np.c_[np.ones((len(s1), 1)), s1]  # Add a bias term (intercept) to feature matrix
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(s2)

intercept, slope = theta_best[0], theta_best[1]
print(s1.corr(s2))

print(intercept, slope)
r2_score(s2, intercept+slope*s1)

0.9935372439823131
0.07927750001695077 0.504527891091394


0.9871162551799711