In [1]:
from ovito.io import *
from ovito.data import *
from ovito.modifiers import ExpressionSelectionModifier, DeleteSelectedModifier, \
InvertSelectionModifier, ExpandSelectionModifier
#, CoordinationAnalysisModifier, WignerSeitzAnalysisModifier, ClusterAnalysisModifier, ConstructSurfaceModifier,
from ovito.pipeline import *
import glob
import numpy as np




  import ovito._extensions.pyscript


In [2]:
def NearestModify(Pipeline):
    Pipeline.modifiers.append(ExpressionSelectionModifier(expression='ParticleType==2 && ParticleIdentifier==2001'))
    Pipeline.modifiers.append(ExpandSelectionModifier(mode=ExpandSelectionModifier.ExpansionMode.Nearest, num_neighbors=4))
    Pipeline.modifiers.append(InvertSelectionModifier())
    Pipeline.modifiers.append(DeleteSelectedModifier())
    return
    
def Nearest4PID(InputFile, ):
    pipeline = import_file(InputFile)
    NearestModify(pipeline)
    Pid_List = []
    for nframe in range(pipeline.source.num_frames):
        Data = pipeline.compute(nframe)
        Ptype = Data.particles["Particle Type"]
        Pid = Data.particles['Particle Identifier']
        Pid_tmp = list(Pid[Ptype == 1])
        Pid_List.append(Pid_tmp)
    return Pid_List

def MigrationLabel(Temper, FilePath, ):
    Pid_List = []
    FileNum = len(glob.glob(f"{FilePath}/dump_temp*"))
    Time_List = list(range(FileNum*1000+1))
    for i in range(1, FileNum+1):
        Input_File = f"{FilePath}/dump_temp.{Temper}.{i}"  
        Pid_tmp = Nearest4PID(Input_File)
        if i != 1:
            del(Pid_tmp[0])
        Pid_List += Pid_tmp
    Mig_Label = [0]

    for t in range(1, len(Pid_List)):
        if set(Pid_List[t-1])==set(Pid_List[t]):
            Mig_Label += [0]
        else:
            Mig_Label += [1]
    return Time_List, Mig_Label

temper = 300 
path = "./data"
time_series, y_label = MigrationLabel(temper, f"{path}")
    # save_data = np.column_stack((np.array(time_series), np.array(y_label)))
    # np.savetxt(f"{path}/y_label.txt", save_data, fmt='%d', header='time mig_label')
# def MigrationLabel_fixed(Temper, FilePath, n):
#     Pid_List = []
#     FileNum = len(glob.glob(f"{FilePath}/dump_temp*"))
#     Time_List = list(range(FileNum*1000+1))
#     for i in range(1, FileNum+1):
#         Input_File = f"{FilePath}/dump_temp.{Temper}.{i}"  
#         Pid_tmp = Nearest4PID(Input_File)
#         if i != 1:
#             del(Pid_tmp[0])
#         Pid_List += Pid_tmp
#     Mig_Label = [0]

#     for t in range(1, len(Pid_List)-1):
#         InterSection = list(set(Pid_List[t-1]) & set(Pid_List[t+1]))
#         if len(InterSection) == 4:
#             # Check if the Pid_list will return to its original state after n steps
#             Mig_Label += [0]
#         else:
#             if(t+n<len(Pid_List)-1):
#                 if set(Pid_List[t+n]) == set(Pid_List[t-1]):
#                     Mig_Label += [0]
#                 else:
#                     Mig_Label += [1]
#     Mig_Label += [0]
#     return Time_List, Mig_Label


In [3]:

def get_migration_label(
    path:str
):
    """
    input: 
        temperature 

    return: 
        mig_label 
    """
    with open(path, 'r', encoding='utf-8') as fin:
        mig_label = []
        for i, line in enumerate(fin.readlines()[1:]):
            parts = line.strip().split()
            mig_label.append(bool(int(parts[1])))
        mig_label = np.array(mig_label)
        
    # with open(MIGRATION_PATH.format(File='index', Temperature=temperature), 'r', encoding='utf-8') as fin:
    #     mig_index = []
    #     for i, line in enumerate(fin.readlines()[1:]):
    #         mig_index.append(int(line.strip()))
    #     mig_index = np.array(mig_index)
    return mig_label

In [4]:
get_migration_label('data/y_label_2002.txt')

array([False, False, False, ..., False, False, False])

In [5]:
def get_original_data(
   path:str
):
    """
    input temperature : 温度
    
    return t, msd, csp, xyz, r_, v_xyz, v_, angle, g

    t       时间序列(单位:ps)
    msd     单He的msd(均方位移)
    csp     CSP(中心对称参数)
    xyz     单He的xyz坐标
    r_      单He离原点距离
    v_xyz   单He的沿xyz坐标的速度分量
    v_      单He的速度大小
    """
    with open(path, 'r', encoding='utf-8') as fin:
        t = []      # 时间序列(单位:ps)
        msd = []    # 单He的msd(均方位移)
        csp = []    # CSP(中心对称参数)
        xyz = []    # 单He的xyz坐标
        r_ = []     # 单He离原点距离
        v_xyz = []  # 单He的沿xyz坐标的速度分量
        v_ = []     # 单He的速度大小
        for i, line in enumerate(fin.readlines()[1:]):
            data = list(map(float, line.strip().split(' ')))
            t.append(data[0])
            msd.append(data[1])
            csp.append(data[2:8])
            xyz.append(data[8:11])
            r_.append(data[11])
            v_xyz.append(data[12:15])
            v_.append(data[15])

    # with open(G_PATH.format(Temperature=temperature), 'r', encoding='utf-8') as fin:
    #     g = []      # g参数
    #     for i, line in enumerate(fin.readlines()[1:]):
    #         data = list(map(float, line.strip().split(' ')))
    #         g.append(data[1:7])
    indices_to_remove = np.arange(1001, len(t) - 1, 1001)
    t = np.array(t)
    t = np.delete(t, indices_to_remove)
    msd = np.array(msd)
    msd = np.delete(msd, indices_to_remove)
    csp = np.array(csp)
    csp = np.delete(csp, indices_to_remove, axis=0)
    xyz = np.array(xyz)
    xyz = np.delete(xyz, indices_to_remove, axis=0)
    r_ = np.sqrt(np.array(r_))
    r_ = np.delete(r_, indices_to_remove)
    v_xyz = np.array(v_xyz)
    v_xyz = np.delete(v_xyz, indices_to_remove, axis=0)
    v_ = np.sqrt(np.array(v_))
    v_ = np.delete(v_, indices_to_remove)
    angle = np.arccos(v_xyz / v_.reshape(len(t), 1))
    # g = np.array(g)

    return t, msd, csp, xyz, r_, v_xyz, v_, angle# , g


In [15]:
data1 = get_original_data('data/timedt.dataHe1.300')
data2 = get_original_data('data/timedt.dataHe2.300')

In [37]:
label1 = get_migration_label('data/y_label_2001.txt').reshape(-1, 1)
label2 = get_migration_label('data/y_label_2002.txt').reshape(-1, 1)
labels = np.column_stack((label1, label2))

In [40]:
import numpy as np
positive10_indices = np.where((labels[:, 0] == 1) & (labels[:, 1] == 0))[0]
positive01_indices = np.where((labels[:, 0] == 0) & (labels[:, 1] == 1))[0]
positive11_indices = np.where((labels[:, 0] == 1) & (labels[:, 1] == 1))[0]
negative_indices = np.where((labels[:, 0] == 0) & (labels[:,1]==0))[0]

In [41]:
n_samples = min(len(positive10_indices), len(positive01_indices), len(positive11_indices), len(negative_indices))
np.random.shuffle(positive10_indices)
np.random.shuffle(positive01_indices)
np.random.shuffle(positive11_indices)
np.random.shuffle(negative_indices)

selected_positive01_indices = positive01_indices[:n_samples]
selected_positive10_indices = positive10_indices[:n_samples]
selected_positive11_indices = positive11_indices[:n_samples]
selected_negative_indices = negative_indices[:n_samples]

# 合并并排序索引，以便后续操作
selected_indices = np.sort(np.concatenate([selected_positive01_indices, selected_positive10_indices, selected_positive11_indices, selected_negative_indices]))


In [None]:
n_frames = 3  # 每个样本需要包括自身及前2帧

final_data = []
for idx in selected_indices:
    start_idx = max(idx - n_frames + 1, 0)  # 确保索引不会小于0
    extracted_data = data[start_idx:idx + 1]  # 从start_idx到idx抽取数据
    final_data.append(extracted_data)

# 转换为numpy数组，如果需要
final_data = np.array(final_data, dtype=object)