In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler,LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU,Dense
from tqdm import tqdm


In [76]:
data = pd.read_csv('./raw_stocks.csv')
#单独取出股票的数据以及去除退市、st的数据
data = data.loc[(data.loc[:,'status']==1)&(data.loc[:,'isST']==0),:]
data.loc[:,['amount','volume']] = data.loc[:,['amount','volume']].fillna(0)
data = data.fillna(1)
data.loc[:,['open', 'high', 'low', 'close', 'preclose', 'volume','amount', 'pbMRQ', \
            'peTTM', 'turn', 'tradestatus', 'pctChg']]= \
                data.loc[:,['open', 'high', 'low',  'close', 'preclose', 'volume','amount',\
                             'pbMRQ', 'peTTM', 'turn', 'tradestatus', 'pctChg']].astype('float')
data['date'] = pd.to_datetime(data['date'])

codes = data.loc[:,'code'].unique().tolist()
min_date = data.date.min()
max_date = data.date.max()
date_series = data.date.unique()
date_series = sorted(date_series)
date_list = date_series*len(codes)

ndate = len(date_series)
code_ndate_list = []
for i in codes:
    temp_l = [i]*ndate
    code_ndate_list.extend(temp_l)
code_ndate_df = pd.DataFrame(zip(code_ndate_list,date_list))
code_ndate_df.columns=['code','date']

code_ndate_df = code_ndate_df.merge(data,how='left',on=['code','date'])
#data = data.merge(date_df,how='left',left_on='date',right_on='date_dt')
#data.drop(columns = ['date_dt','is_holiday'],inplace=True)
#data.head(10)
data = code_ndate_df
del code_ndate_df

data.sort_values(by=['code','date'],inplace=True)

data.reset_index(drop=True,inplace=True)

data.loc[:,'isonline'] = 1
data.loc[data.close.isna(),'isonline']=0

"""
data.loc[:,['open', 'high', 'low', 'close', 'preclose', 'volume','amount', 'pbMRQ', 'peTTM', 'turn', 'tradestatus', 'pctChg','industry']]\
    = data.groupby(['code'])[['open', 'high', 'low', 'close', 'preclose', 'volume','amount', 'pbMRQ', \
            'peTTM', 'turn', 'tradestatus', 'pctChg','industry']].fillna(method='bfill')
"""


data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['dayofweek'] = data['date'].dt.dayofweek
data.loc[:,'market']= data.code.str[0:2]
#data_sh = data.loc[data.loc[:,'market']=='sh',:]
data = data.loc[data.loc[:,'market']=='sz',:]
data = data.drop(columns=['market'])

In [77]:

nna = data.loc[data.loc[:,'close'].isna(),:].groupby('code',as_index=False)['date'].count()
data = data.loc[~data.code.isin(nna.code),:]


In [78]:
data.isnull().sum()

code           0
date           0
open           0
high           0
low            0
close          0
preclose       0
volume         0
amount         0
pbMRQ          0
peTTM          0
turn           0
tradestatus    0
pctChg         0
isST           0
industry       0
type           0
status         0
isonline       0
year           0
month          0
day            0
dayofweek      0
dtype: int64

In [79]:
gen_trend = data.groupby(['date'],as_index=False)[['volume','amount']].sum()
gen_trend = gen_trend.sort_values(by=['date'],ascending=True).reset_index(drop=True)
#增加行业一天以上的数据情况
industry_trend = data.groupby(['date','industry'],as_index=False)[['volume','amount']].sum()
industry_trend = industry_trend.sort_values(by=['industry','date'],ascending=True).reset_index(drop=True)

In [80]:
date_series = pd.date_range(start=min_date,end = max_date,freq='D')
date_df =pd.DataFrame(date_series)
date_df.columns=['date']
#针对不同类型的holiday做了不同权重的分配
from chinese_calendar import is_holiday,get_holiday_detail
#from sklearn.preprocessing import LabelEncoder
#lencoder = LabelEncoder()
holidays = {"New Year's Day":1,"Spring Festival":7,"Mid-autumn Festival":3,"Labour Day":2,"Tomb-sweeping Day":4,"Dragon Boat Festival":3,"National Day":6}
date_df.loc[:,'is_holiday'] = pd.to_datetime(date_df.loc[:,'date']).apply(lambda x:get_holiday_detail(x)[1])
date_df.loc[:,'is_holiday'] = date_df.loc[:,'is_holiday'].apply(lambda x:holidays[x] if x in holidays else 0)
date_df.loc[:,'is_holiday'].groupby(date_df.loc[:,'is_holiday']).count()
date_df.loc[:,'last_holiday'] = date_df.loc[:,'is_holiday'].shift(1)
date_df.loc[:,'next_holiday'] = date_df.loc[:,'is_holiday'].shift(-1)
date_df.loc[:,'last2_holiday'] = date_df.loc[:,'is_holiday'].shift(2)
date_df.loc[:,'next2_holiday'] = date_df.loc[:,'is_holiday'].shift(-2)
date_df.loc[:,['last_holiday','next_holiday','last2_holiday','next2_holiday']]=date_df.loc[:,['last_holiday','next_holiday','last2_holiday','next2_holiday']].fillna(0)
date_df.loc[:,['last_holiday','next_holiday','last2_holiday','next2_holiday']] = date_df.loc[:,['last_holiday','next_holiday','last2_holiday','next2_holiday']].astype('int')
date_df.loc[:,'date_dt']= pd.to_datetime(date_df.loc[:,'date'])
date_df.drop(columns=['date'],inplace=True)
data.date = pd.to_datetime(data.date)


In [81]:
data = data.merge(date_df,how='left',left_on='date',right_on='date_dt')

data.drop(columns=['date_dt','is_holiday'],inplace=True)


In [82]:
gen_trend.rename(columns ={'amount':'market_amt','volume':'market_vol'},inplace=True)
industry_trend.rename(columns = {'amount':'indu_amount','volume':'indu_volume'},inplace=True)
data = data.merge(gen_trend,how='left',on='date')
data = data.merge(industry_trend,how='left',on=['date','industry'])

In [83]:
windows = [3,5,12,20]  # 可以根据需要调整窗口大小
rol_cols = []
for window in windows:
    rolling_mean = data.groupby(['code'],as_index=False)[['open','high','low','close','volume','amount']].rolling(window=window).mean()
    rolling_mean.columns = ['code']+[f'{col}_roll_mean_{window}' for col in rolling_mean.columns if col not in ['code']]
    rolling_mean.drop(columns=['code'],inplace=True)
    rolling_std = data.groupby(['code'],as_index=False)[['open','high','low','close','volume','amount','pbMRQ','peTTM','turn']].rolling(window=window).std()
    rolling_std.columns = ['code']+[f'{col}_roll_std_{window}' for col in rolling_std.columns if col not in ['code']]
    rolling_std.drop(columns=['code'],inplace=True)
    df = pd.concat([rolling_mean,rolling_std],axis=1)
    rol_cols.append(df)
lags = [5,12,20]  # 可以根据需要调整滞后阶数
for lag in lags:
    shift_value = data.groupby(['code'],as_index=False)[['open','high','low','close','volume','amount','pbMRQ','peTTM','turn']].shift(lag)
    shift_value.columns = [f'{col}_roll_shift_{lag}' for col in shift_value.columns if col not in ['code']]
    #shift_value.drop(columns=['code'],inplace=True)
    rol_cols.append(shift_value)
leads = [-3,-4,-5,-6,-7]

y_cols=[]
for lead in leads:
    shift_value = data.groupby(['code'],as_index=False)[['close']].shift(lead)
    shift_value.loc[:,'increase'] = shift_value.loc[:,'close']/data.loc[:,'close']-1
    shift_value.loc[:,'if_inc'] = 0
    shift_value.loc[shift_value.loc[:,'increase']>=0.1,'if_inc']=1
    new_shift_value = pd.DataFrame(shift_value.loc[:,'if_inc'])
    temp_cols = [f'{col}_increase_lead_{lead}' for col in new_shift_value.columns if col not in ['code']]
    new_shift_value.columns = temp_cols
    rol_cols.append(new_shift_value)
    y_cols.extend(temp_cols)
df = pd.concat(rol_cols,axis=1)

data = pd.concat([data,df],axis=1)
del df,date_df

In [84]:
desc_y_cols = data.loc[:,y_cols].astype('int')
desc_y_cols.loc[:,'sum_col'] = desc_y_cols.sum(axis=1)
desc_y_cols.loc[:,'if_inc'] = desc_y_cols.loc[:,'sum_col'].apply(lambda x:1 if x>0 else 0 )
#desc_y_cols.loc[:,'if_inc'].groupby(desc_y_cols.loc[:,'if_inc']).count()
data.loc[:,'if_inc'] = desc_y_cols.loc[:,'if_inc']

In [85]:
data.loc[:,'if_inc'].groupby(data.loc[:,'if_inc']).count()

if_inc
0    1579035
1     203517
Name: if_inc, dtype: int64

In [86]:
ts_cols = data.columns.tolist()
ts_cols = list(set(ts_cols)-set(['date','code','isST','type','status','industry','if_inc'])-set(y_cols))
ts_cols

['volume_roll_std_5',
 'close_roll_shift_20',
 'close_roll_shift_12',
 'turn_roll_std_20',
 'turn_roll_std_3',
 'isonline',
 'peTTM',
 'amount_roll_mean_5',
 'peTTM_roll_std_3',
 'next_holiday',
 'high_roll_shift_12',
 'open_roll_std_20',
 'pbMRQ_roll_shift_12',
 'preclose',
 'close_roll_std_20',
 'high',
 'turn',
 'high_roll_std_5',
 'volume_roll_mean_12',
 'amount_roll_shift_20',
 'open_roll_shift_12',
 'low_roll_shift_12',
 'turn_roll_shift_12',
 'volume_roll_shift_20',
 'last2_holiday',
 'open_roll_std_3',
 'pbMRQ_roll_shift_5',
 'open',
 'open_roll_std_12',
 'volume_roll_std_3',
 'open_roll_mean_5',
 'turn_roll_std_5',
 'peTTM_roll_shift_12',
 'pbMRQ',
 'close_roll_std_12',
 'high_roll_std_12',
 'amount_roll_shift_5',
 'close_roll_shift_5',
 'open_roll_mean_20',
 'amount_roll_mean_12',
 'close_roll_mean_3',
 'low_roll_std_3',
 'amount',
 'amount_roll_mean_3',
 'close_roll_mean_20',
 'volume_roll_mean_20',
 'pbMRQ_roll_std_20',
 'close_roll_std_3',
 'peTTM_roll_std_5',
 'indu_amoun

In [87]:
code_encoder = LabelEncoder()
ind_encoder = LabelEncoder()
data.loc[:,'code'] = code_encoder.fit_transform(data.loc[:,'code'])
data.loc[:,'industry'] = ind_encoder.fit_transform(data.loc[:,'industry'].astype('str'))

#scaler = StandardScaler()
#data.loc[:,ts_cols] = scaler.fit_transform(data.loc[:,ts_cols])

In [88]:
x_cols = ts_cols+['code','industry']

In [89]:
data.dropna(subset=['amount_roll_shift_20'],inplace=True,how='any')

In [90]:
a = data.isnull().sum()
a[a>0]

Series([], dtype: int64)

In [91]:
train_length = 132
steps = 7


In [18]:
import numpy as np
from scipy.interpolate import interp1d


def time_warp_multivariate(time_series,cat_n=2,distortion_factor=0.1):
    """
    对多维时间序列应用时间扭曲。
    
    参数:
    - time_series: 多维时间序列数据 (numpy array)
    - distortion_factor: 扭曲强度因子
    
    返回:
    - warped_series: 扭曲后的多维时间序列数据
    """
    n_samples, n_timesteps, n_features = time_series.shape
    warped_series = np.empty_like(time_series)
    num_features = n_features-cat_n
    for i in range(n_samples):
        for j in range(n_features):
            if j < num_features:
                original_series = time_series[i, :, j]
                # 扭曲原始时间轴
                original_time = np.arange(n_timesteps)
                distortion = distortion_factor * np.random.randn(n_timesteps)
                warped_time = original_time + distortion
                warped_time = np.clip(warped_time, 0, n_timesteps - 1)
                
                # 插值以生成扭曲后的时间序列
                interpolation = interp1d(warped_time, original_series, kind='linear', fill_value='extrapolate')
                warped_series[i, :, j] = interpolation(original_time)
            else:
                warped_series[i, :, j] = original_time
    return warped_series

In [92]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def create_date_window(df, date_col, code_col, x_cols, label_col, train_length, steps,ratio=0.6,keep=-3):
    # 获取唯一的代码列表
    codes = df[code_col].unique()
    
    X_train,X_eval,X_test, y_train,y_eval,y_test,X_pred = [], [],[],[],[],[],[]
    
    for code in tqdm(codes):
        # 筛选出特定代码的数据并按日期排序
        df_code = df[df[code_col] == code].sort_values(by=date_col)
        
        dates = df_code[date_col].values
        labels = df_code[label_col].values
        data = df_code[x_cols].astype('float32').values
        
        scaler = MinMaxScaler()
        data = scaler.fit_transform(data)

        n = len(dates)
        m = n%steps
        s = n//(train_length+steps)
           
        X,y = [],[]
        for i in range(m, n - train_length, steps):
            end_index = i + train_length
            
            temp_ar = data[i:end_index]
            temp_labels = labels[i:end_index]
            if temp_ar.shape[0] == train_length:
                X.append(temp_ar)
                y.append(temp_labels[-1])

        if len(X) != 0:
            #ratio=0.6
            train_size = int(ratio*(len(X)-1))
            eval_size = int((1-ratio)/2*(len(X)-1))

            X_train.extend(X[0:train_size])
            y_train.extend(y[0:train_size])
            #X_eval.extend(X[train_size:(train_size+eval_size)])
            #y_eval.extend(y[train_size:(train_size+eval_size)])
            #X_test.extend(X[(train_size+eval_size):-1])
            #y_test.extend(y[(train_size+eval_size):-1])
            X_eval.extend(X[train_size:keep])
            y_eval.extend(y[train_size:keep])
            X_test.append(X[keep])
            y_test.append(y[keep])
            X_pred.append([X[-1]])

    X_train= np.array(X_train)
    X_eval = np.array(X_eval)
    y_train = np.array(y_train,dtype=np.float32)
    y_eval=np.array(y_eval,dtype=np.float32)
    X_test = np.array(X_test)
    y_test = np.array(y_test,dtype=np.float32)
    X_pred = np.array(X_pred)

    #X_train = time_warp_multivariate(X_train, distortion_factor=0.1)
    #X_eval = time_warp_multivariate(X_eval, distortion_factor=0.1)
    #X_pred = time_warp_multivariate(X_pred, distortion_factor=0.1)

    
    return X_train,X_eval,X_test, y_train,y_eval,y_test,X_pred



df = pd.DataFrame(data)
del data
date_col = 'date'
code_col = 'code'
label_col = 'if_inc'

train_length = 132
steps = 7



X_train,X_eval,X_test, y_train,y_eval,y_test,X_pred = create_date_window(df, date_col, code_col, x_cols, label_col, train_length, steps,0.6,-2)

del df
#print("X shape:", X_train.shape)  # 输出应为 (num_samples, train_length, num_features)
#print("y shape:", y_train.shape)


100%|██████████| 2056/2056 [01:36<00:00, 21.32it/s]


In [66]:
#diy under sample
indices = np.where(y_train==0)[0]
ind_1 = np.where(y_train==1)[0]
X_train_cls_0 = X_train[indices]
X_train_cls_1 = X_train[ind_1]

ratio = y_train[y_train==1].shape[0]/y_train[y_train==0].shape[0]
sample_size = int(ratio*len(X_train_cls_0))
sample_indices = np.random.choice(len(X_train_cls_0),sample_size,replace=False)
sampled_X_train_cls_0 = X_train_cls_0[sample_indices]
sampled_y_train_cls_0 = y_train[sample_indices]
print(sampled_y_train_cls_0.shape)

resampled_train_X = np.concatenate([X_train_cls_1,sampled_X_train_cls_0],axis=0)
resampled_train_y = np.concatenate([y_train[ind_1],sampled_y_train_cls_0],axis=0)

order = np.arange(len(resampled_train_y))
np.random.shuffle(order)

resampled_train_X = resampled_train_X[order]
resampled_train_y = resampled_train_y[order]

del X_train_cls_0,X_train_cls_1,sampled_X_train_cls_0,sampled_y_train_cls_0,X_train,y_train

(14129,)


In [None]:
#smote 

In [32]:
del data

In [21]:
resampled_train_X.shape

(28258, 132, 114)

In [44]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GRU, Dense, Dropout,BatchNormalization,Bidirectional
from tensorflow.keras import initializers
from tensorflow.keras.metrics import Recall,Precision,F1Score,AUC
#from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras.optimizers import legacy
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
#from tensorflow.keras.optimizers.legacy import Adam
#class_weights = {0: 1.0, 1: y_train[y_train==0].shape[0]/y_train[y_train==1].shape[0]}
#class_weights = {0: 1.0, 1: resampled_train_y[resampled_train_y==0].shape[0]/resampled_train_y[resampled_train_y==1].shape[0]}
# 定义模型
model = Sequential()

# 添加 1D 卷积层：因为卷基层有更好的特征提取能力，kernel_size为3 会生成一个大小为3的时间窗提取特征，因此kernel_size越小，会提取更多更细微的特征。
model.add(Conv1D(filters=64, kernel_size=3, activation='relu',kernel_regularizer=l2(0.03), input_shape=(train_length, len(x_cols)),kernel_initializer=initializers.HeNormal()))
#批量归一化：作用1.对每一层的输入进行标准化 2.减少内部协变量偏移 3.提供正则化效果
model.add(BatchNormalization())  
#池化：提取重要特征 例：[1,2,3,4,5,6,7,8] 经过pool_size=2的池化就会提取出【2，4，6，8】
model.add(MaxPooling1D(pool_size=2))

# 添加更多卷积层：网络越深，可以增加kernel_size
model.add(Conv1D(filters=128, kernel_size=5, activation='relu',kernel_regularizer=l2(0.03)))
model.add(BatchNormalization())  
model.add(MaxPooling1D(pool_size=2))

# 添加 GRU 层
model.add(Bidirectional(GRU(128, return_sequences=True,kernel_initializer=initializers.HeNormal(),recurrent_dropout=0.3)))
model.add(BatchNormalization())  
model.add(Dropout(0.4))

# 添加更多的 GRU 层（可选）
model.add(Bidirectional(GRU(64,kernel_initializer=initializers.HeNormal(),recurrent_dropout=0.3)))
model.add(BatchNormalization())  
model.add(Dropout(0.4))
# 添加输出层
model.add(Dense(128, activation='relu', kernel_initializer=initializers.HeNormal(),kernel_regularizer=l2(0.03))) 
model.add(Dense(64, activation='relu', kernel_initializer=initializers.HeNormal(),kernel_regularizer=l2(0.03))) 
model.add(Dense(1, activation='sigmoid',kernel_initializer=initializers.HeNormal()))  # 对于回归任务可以使用 'linear'

optimizer = legacy.Adam(learning_rate=0.001,clipnorm=1.0)
#optimizer = RMSprop(learning_rate=0.001)
# 编译模型
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=[Recall(),Precision(),AUC(name='prc', curve='PR')])
early_stopping = EarlyStopping(
    monitor='val_precision_9', 
    mode = 'max',          # 监控的指标，这里选择验证集上的损失
    patience=10,                # 在指标停止改进后的容忍轮次
    restore_best_weights=True     # 是否恢复到最佳的权重
)

# 训练模型
#model.fit(X_train,y_train, epochs=50, batch_size=64, validation_data=(X_eval, y_eval), callbacks=[early_stopping],class_weight=class_weights)
model.fit(resampled_train_X,resampled_train_y, epochs=50, batch_size=64, validation_data=(X_eval, y_eval), callbacks=[early_stopping])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


<keras.src.callbacks.History at 0x1364f2130>

In [71]:
from tensorflow.keras.models import load_model
model = load_model('conv_gru_blance_prc.h5')
#model.save('conv_gru_blance_prc.h5')

#GRU 神经网络

In [101]:
y_pred = model.predict(X_test)



In [102]:
def evaluate_model(y_true,y_pred):
    #
    data = pd.DataFrame({'y_true':y_true,'y_pred':y_pred})
    data.loc[:,'rnk'] = data.loc[:,'y_pred'].rank(ascending=False)
    nsamples = len(data)
    data.loc[:,'pert'] = data.loc[:,'rnk']/nsamples
    data.loc[:,'cat']= pd.cut(data.loc[:,'y_pred'],bins=10)
    new_df = data.groupby(['cat'])['y_true'].agg(['count','sum'])
    new_df.loc[:,'id']= [i for i in range(len(new_df))]
    new_df.sort_values(by=['id'],ascending=False,inplace=True)
    new_df.reset_index(inplace=True)
    new_df.loc[:,'accsum']= new_df.loc[:,'sum'].cumsum()
    good_cnt = new_df.loc[:,'sum'].sum()
    new_df.loc[:,'recall'] = new_df.loc[:,'accsum']/good_cnt
    return new_df

In [103]:
result = pd.concat([pd.Series(y_pred.reshape((-1,))),pd.Series(y_test.reshape((-1,)))],axis=1,ignore_index=True)
result.columns = ['prob','act']
result = result.sort_values(by=['prob'],ascending=False).reset_index(drop=True)
print(result.loc[0:10,:])
print("top 10 acc: ",result.loc[0:10,'act'].sum())
cls_eval = evaluate_model(pd.Series(y_test.reshape((-1,))),pd.Series(y_pred.reshape((-1,))))
#print(f"dateis:{date_test[i]}")
print(cls_eval)

        prob  act
0   0.722259  0.0
1   0.717964  0.0
2   0.715934  1.0
3   0.712860  0.0
4   0.712605  0.0
5   0.712443  0.0
6   0.710410  0.0
7   0.710308  0.0
8   0.709085  0.0
9   0.708576  1.0
10  0.706418  0.0
top 10 acc:  2.0
              cat  count   sum  id  accsum    recall
0  (0.687, 0.722]     42   7.0   9     7.0  0.050000
1  (0.651, 0.687]     70   5.0   8    12.0  0.085714
2  (0.615, 0.651]    181  15.0   7    27.0  0.192857
3   (0.58, 0.615]    451  29.0   6    56.0  0.400000
4   (0.544, 0.58]    428  26.0   5    82.0  0.585714
5  (0.509, 0.544]    338  27.0   4   109.0  0.778571
6  (0.473, 0.509]    226  17.0   3   126.0  0.900000
7  (0.437, 0.473]    147  14.0   2   140.0  1.000000
8  (0.402, 0.437]     91   0.0   1   140.0  1.000000
9  (0.366, 0.402]     82   0.0   0   140.0  1.000000


In [None]:
y_pred

array([[0.22064424],
       [0.22064424],
       [0.22064424],
       ...,
       [0.22064424],
       [0.22064424],
       [0.22064424]], dtype=float32)

In [None]:
import tensorflow as tf
from tensorflow.keras import backend as K

def focal_loss(gamma=2.0, alpha=0.25):
    """
    Focal Loss for classification.
    :param gamma: Focusing parameter (default=2.0).
    :param alpha: Balance parameter (default=0.25).
    :return: A loss function.
    """
    def focal_loss_fixed(y_true, y_pred):
        # Ensure alpha is of float type
        #alpha = tf.cast(alpha, tf.float32)
        #gamma = tf.cast(gamma, tf.float32)
        
        # 1. Define epsilon for numerical stability
        epsilon = K.epsilon()
        
        # 2. Clip predictions to avoid log(0) issues
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
        
        # 3. Calculate alpha_t for each sample
        alpha_t = y_true * alpha + (K.ones_like(y_true) - y_true) * (1 - alpha)
        
        # 4. Calculate p_t for each sample
        p_t = y_true * y_pred + (K.ones_like(y_true) - y_true) * (1 - y_pred)
        
        # 5. Calculate focal loss
        fl = - alpha_t * K.pow((1 - p_t), gamma) * K.log(p_t)
        
        # 6. Return mean of focal loss
        return K.mean(fl)
    
    return focal_loss_fixed


In [None]:
model.save('cov_gru_128_loss_bidirection_features_blance.h5')

  saving_api.save_model(


### transformer


In [None]:
import tensorflow as tf

# Define the Positional Encoding layer to add time-related information to the input
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        # Create a positional encoding matrix (max_len, d_model) with sine and cosine functions
        self.pos_encoding = self.positional_encoding(max_len, d_model)

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / tf.pow(10000, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        return pos * angle_rates

    def positional_encoding(self, max_len, d_model):
        angle_rads = self.get_angles(tf.range(max_len)[:, tf.newaxis],
                                     tf.range(d_model)[tf.newaxis, :],
                                     d_model)
        # Apply sin to even indices in the array; cos to odd indices
        angle_rads[:, 0::2] = tf.math.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = tf.math.cos(angle_rads[:, 1::2])

        pos_encoding = angle_rads[tf.newaxis, ...]
        return tf.cast(pos_encoding, tf.float32)

    def call(self, x):
        # Add positional encoding to the input tensor
        return x + self.pos_encoding[:, :tf.shape(x)[1], :]

# Multi-head attention layer
def create_attention_head(d_model, num_heads):
    return tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)

# Transformer Encoder Layer
def transformer_encoder_layer(d_model, num_heads, ff_dim, dropout_rate=0.1):
    inputs = tf.keras.Input(shape=(None, d_model))
    
    # Multi-head self-attention
    attention_output = create_attention_head(d_model, num_heads)(inputs, inputs)
    attention_output = tf.keras.layers.Dropout(dropout_rate)(attention_output)
    attention_output = tf.keras.layers.LayerNormalization(epsilon=1e-6)(attention_output + inputs)
    
    # Feed-forward network
    ff_output = tf.keras.layers.Dense(ff_dim, activation='relu')(attention_output)
    ff_output = tf.keras.layers.Dense(d_model)(ff_output)
    ff_output = tf.keras.layers.Dropout(dropout_rate)(ff_output)
    outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6)(ff_output + attention_output)
    
    return tf.keras.Model(inputs=inputs, outputs=outputs)

# Full Transformer model for time series classification
def create_transformer_model(num_features, d_model, num_heads, ff_dim, num_layers, num_classes):
    inputs = tf.keras.Input(shape=(None, num_features))
    
    # Linear layer to map input features to d_model dimensions
    x = tf.keras.layers.Dense(d_model)(inputs)
    
    # Positional encoding layer
    x = PositionalEncoding(d_model)(x)
    
    # Stacking transformer encoder layers
    for _ in range(num_layers):
        x = transformer_encoder_layer(d_model, num_heads, ff_dim)(x)
    
    # Use the first time step (or another aggregation method) for classification
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    
    # Output layer for classification
    outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(x)
    
    return tf.keras.Model(inputs=inputs, outputs=outputs)

# Example of training the transformer model for classification

# Assuming you have input data X_train of shape (batch_size, sequence_length, num_features)
X_train = tf.random.normal((32, 132, 114))  # 32 samples, 132 time steps, 114 features
y_train = tf.random.uniform((32,), maxval=2, dtype=tf.int32)  # Binary classification

# Create the model
model = create_transformer_model(num_features=114, d_model=64, num_heads=8, ff_dim=128, num_layers=2, num_classes=2)

# Compile the model with loss function, optimizer, and evaluation metrics
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32)
