## IC均值加权

### 对factor013，015，016合成因子

In [26]:
import pandas as pd
import numpy as np

# 读取施密特正交化后的因子数据
orthogonalized_factors_df = pd.read_csv('zhengjiao2.csv')

# 读取IC值数据
ic_files = ['ic013.csv', 'ic015.csv', 'ic016.csv']
ics = []

for file in ic_files:
    df_ic = pd.read_csv(file)
    ics.append(df_ic)

# 合成因子
def weighted_factor_synthesis(orthogonalized_factors_df, ics):
    # 创建一个空的DataFrame用于存储合成因子
    combined_factor = pd.DataFrame(columns=['date', 'synthesized_factor'])

    for i, ic_df in enumerate(ics):
        # 检查日期是否在因子数据中出现
        valid_dates = set(ic_df['date']).intersection(set(orthogonalized_factors_df['date']))
        ic_df = ic_df[ic_df['date'].isin(valid_dates)]

        # 使用IC均值作为权重
        ic_mean = ic_df.iloc[:, 1:].mean(axis=1)

        # 对每个日期进行加权合成因子
        for date in valid_dates:
            factors = orthogonalized_factors_df.loc[orthogonalized_factors_df['date'] == date, 'factor'].values
            ic_weight = ic_mean[ic_df['date'] == date].values[0]
            weighted_factor = factors * ic_weight
            

            # 使用pd.concat进行DataFrame拼接
            temp_df = pd.DataFrame({'date': [date], 'synthesized_factor': [np.sum(weighted_factor)]})
            combined_factor = pd.concat([combined_factor, temp_df], ignore_index=True)

    return combined_factor

# 调用合成因子函数
combined_factor = weighted_factor_synthesis(orthogonalized_factors_df, ics)

# 打印合成因子
combined_factor.to_csv('icmeanfactor.csv')


## 加权

In [13]:
import pandas as pd

# 读取数据
icmea_df = pd.read_csv('icmeanfactor.csv')

# 按日期对 factor 进行求和
icmea_sum = icmea_df.groupby('date')['synthesized_factor'].sum().reset_index()

# 去除重复日期
icmea_unique = icmea_sum.drop_duplicates(subset='date')

# 按日期排序
icmea_sorted = icmea_unique.sort_values('date')

# 将结果保存为新的 CSV 文件
icmea_sorted.to_csv('icmean_date.csv', index=False)


## 映射

In [11]:
import pandas as pd

# 读取数据
icmean_date = pd.read_csv('icmean_date.csv')
factor_data = pd.read_csv('factor_data013.csv')

# 将两个数据框按日期合并
merged_data = pd.merge(icmean_date, factor_data[['date', 'asset']], on='date', how='outer')

# 删除 asset 列为空的行
merged_data.dropna(subset=['asset'], inplace=True)


# 填充 synthesized_factor 列的空值为均值
mean_value = merged_data['synthesized_factor'].mean()
merged_data['synthesized_factor'].fillna(mean_value, inplace=True)

merged_data = merged_data[['date', 'asset', 'synthesized_factor']]
# 保存结果到新的 CSV 文件
merged_data.to_csv('icmean_date_asset.csv', index=False)


In [1]:
import pandas as pd
df=pd.read_csv('icmean_date_asset.csv')
df.head()

Unnamed: 0,date,asset,synthesized_factor
0,2021-01-06,300612.SZ,-0.000567
1,2021-01-06,600526.SH,0.000131
2,2021-01-06,600527.SH,-0.005436
3,2021-01-06,600528.SH,-0.008112
4,2021-01-06,600529.SH,-0.003674
