## RNN for Skyrmion trajectories prediction

**Imports**

In [50]:
# Standard imports
import pandas as pd
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt

# change the following to %matplotlib notebook for interactive plotting
%matplotlib inline

# Optionally, tweak styles.
mpl.rc('figure',  figsize=(20, 10))
mpl.rc('image', cmap='gray')

import trackpy as tp

from tqdm import tqdm  # for progress bar

import tensorflow as tf

from collections import deque

import random

**Read the data**

In [2]:
directory = 'Rec_EDGE_300K_1L_50MA.out'

data = pd.read_csv(directory + '/trajectories.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,y,x,mass,size,ecc,signal,raw_mass,ep,frame,particle
0,0,24.420047,61.809992,33.895088,3.785002,0.048289,0.451671,91.250977,0.000493,0,0
1,1,31.518261,109.009463,33.850101,3.792741,0.067499,0.45527,91.282349,0.000493,0,1
2,2,51.658864,41.007417,34.208199,3.811746,0.062159,0.449871,92.517654,0.000486,0,2
3,3,60.994689,82.173861,34.559098,3.818268,0.046138,0.45527,93.368622,0.000482,0,3
4,4,61.572998,129.252586,33.747531,3.807508,0.059245,0.45347,91.835289,0.00049,0,4


**Drop the unused columns**

In [3]:
unused_columns = ['Unnamed: 0', 'mass', 'size', 'ecc', 'signal', 'raw_mass', 'ep']

data = data.drop(columns=unused_columns)
data.head()

Unnamed: 0,y,x,frame,particle
0,24.420047,61.809992,0,0
1,31.518261,109.009463,0,1
2,51.658864,41.007417,0,2
3,60.994689,82.173861,0,3
4,61.572998,129.252586,0,4


**Fill in missing values with average positions (if a skyrmion is missing for more than one frame, it might not be very precise, but it should not be a big issue here)**

In [4]:
no_skyrmions = data[data['frame'] == 0].shape[0]
no_skyrmions

15

In [5]:
# ids of initial particles
ids = list(range(no_skyrmions))

# iterate through the frames
for f in tqdm(data['frame'].unique()):
    for p in range(no_skyrmions):
        # this means the skyrmion p is missing in frame f
        if not any(data[data['frame'] == f]['particle'] == p):
            
            # find previous coorinates
            x_prev = data[(data['frame'] == f-1) & (data['particle'] == p)]['x'].values[0]
            y_prev = data[(data['frame'] == f-1) & (data['particle'] == p)]['y'].values[0]
            
            x_next = x_prev
            y_next = y_prev
            
            #find next coordinates
            for next_frame in range((f+1).astype(int), len(data['frame'].unique())):
                if any(data[data['frame'] == f]['particle'] == p):
                    x_next = data[(data['frame'] == next_frame) & (data['particle'] == p)]['x'].values[0]
                    y_next = data[(data['frame'] == next_frame) & (data['particle'] == p)]['y'].values[0]
                    break
                    
            # new coordinates
            x_new = (x_prev + x_next) / 2
            y_new = (y_prev + y_next) / 2
            
            data = data.append({'y' : y_new,
                                'x' : x_new,
                                'frame' : f,
                                'particle': p}, ignore_index=True)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:15<00:00, 51.98it/s]


In [6]:
data = data.sort_values(by=['frame', 'particle'])

In [7]:
data

Unnamed: 0,y,x,frame,particle
0,24.420047,61.809992,0.0,0.0
1,31.518261,109.009463,0.0,1.0
2,51.658864,41.007417,0.0,2.0
3,60.994689,82.173861,0.0,3.0
4,61.572998,129.252586,0.0,4.0
...,...,...,...,...
11922,26.838018,9158.734705,799.0,10.0
11927,92.153535,8909.539660,799.0,11.0
11935,160.675052,8812.401110,799.0,12.0
11934,156.663224,8600.194927,799.0,13.0


**Check that there are no more missing values**

In [8]:
for f in tqdm(data['frame'].unique()):
    if (data[data['frame'] == f]['particle'].shape[0] < no_skyrmions):
        print(f)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:00<00:00, 3026.97it/s]


**Put data in list**

In [9]:
no_skyrmions = int(max(data[data['frame'] == 0]['particle']) + 1)

frames = None

# iterate through the frames
for f in tqdm(data['frame'].unique()):
    coordinates = None
    for p in data[data['frame'] == f]['particle']:
        particle = data[(data['frame'] == f) & (data['particle'] == p)]
        coordinates = np.append(coordinates, [particle['x'].values[0], particle['y'].values[0]]) if coordinates is not None else [particle['x'].values[0], particle['y'].values[0]]
    
    frames = np.append(frames, coordinates) if frames is not None else [coordinates]
                                                                        
frames = frames.reshape(-1, 2 * no_skyrmions)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:06<00:00, 119.21it/s]


**Place frames in dataframe**

In [79]:
main_df = pd.DataFrame(columns=['frame', 'coordinates'])

for i in tqdm(range(len(frames))):
    main_df = main_df.append({'frame': i, 'coordinates': frames[i]}, ignore_index=True)

main_df.set_index('frame', inplace=True)
main_df

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:01<00:00, 523.25it/s]


Unnamed: 0_level_0,coordinates
frame,Unnamed: 1_level_1
0,"[61.80999150562753, 24.420046719048628, 109.00..."
1,"[70.80009877085162, 22.786106233538195, 115.03..."
2,"[78.53846153846153, 23.79722075869336, 130.692..."
3,"[92.43681939593179, 22.50037668652832, 141.306..."
4,"[104.59782115297321, 22.04513325984048, 153.67..."
...,...
795,"[8981.526291116494, 162.7594343308071, 8905.46..."
796,"[8990.157367074604, 162.0370600843532, 8920.93..."
797,"[9007.540353356892, 162.55978798586568, 8926.0..."
798,"[9017.586395147311, 165.976863084922, 8941.781..."


**Set prediction parameters**

In [80]:
SEQ_LEN = 20
FUTURE_PERIOD_TO_PREDICT = 3

main_df['future'] = main_df['coordinates'].shift(-FUTURE_PERIOD_TO_PREDICT)
main_df

Unnamed: 0_level_0,coordinates,future
frame,Unnamed: 1_level_1,Unnamed: 2_level_1
0,"[61.80999150562753, 24.420046719048628, 109.00...","[92.43681939593179, 22.50037668652832, 141.306..."
1,"[70.80009877085162, 22.786106233538195, 115.03...","[104.59782115297321, 22.04513325984048, 153.67..."
2,"[78.53846153846153, 23.79722075869336, 130.692...","[114.654700661428, 17.817739838317603, 167.487..."
3,"[92.43681939593179, 22.50037668652832, 141.306...","[130.2578821609651, 15.584474619733086, 177.42..."
4,"[104.59782115297321, 22.04513325984048, 153.67...","[137.88253604193972, 15.713794233289649, 183.3..."
...,...,...
795,"[8981.526291116494, 162.7594343308071, 8905.46...","[9017.586395147311, 165.976863084922, 8941.781..."
796,"[8990.157367074604, 162.0370600843532, 8920.93...","[9030.630412102242, 161.97515649452268, 8950.1..."
797,"[9007.540353356892, 162.55978798586568, 8926.0...",
798,"[9017.586395147311, 165.976863084922, 8941.781...",


**Separate training nd validtion data**

In [81]:
last_20pct = main_df.index.values[-int(0.20*len(frames))]
last_20pct

640

In [82]:
validation_main_df = main_df[(main_df.index >= last_20pct)]
main_df = main_df[(main_df.index < last_20pct)]

In [87]:
def preprocess_df(df):
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_frames = deque(maxlen=SEQ_LEN)
    
    for i in tqdm(df.values):
        # not taking future
        prev_frames.append([n for n in i[:-1]])
        if(len(prev_frames) == SEQ_LEN):
            sequential_data.append([np.array(prev_frames), i[-1]])
            
    random.shuffle(sequential_data)
    
    X = []
    y = []
    
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    
    return np.array(X), y

In [94]:
train_x, train_y = preprocess_df(main_df)
val_x, val_y = preprocess_df(validation_main_df)

print(f"train data: {train_x.shape} validation: {val_x.shape}")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 640/640 [00:00<00:00, 49265.96it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 157/157 [00:00<00:00, 52491.49it/s]

train data: (621, 20, 1, 30) validation: (138, 20, 1, 30)





In [102]:
optimizer = 'NAdam'
loss = 'mae'
metrics = ['accuracy']

epochs = 10
batch_size = 64

In [131]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, SimpleRNN, BatchNormalization

In [122]:
model = Sequential()

model.add(SimpleRNN(128, input_shape=(train_x.shape[1:]), return_sequences=True, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(SimpleRNN(128, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2 * no_skyrmions, activation='relu'))

In [123]:
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [124]:
history = model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, validation_data=(val_x, val_y))

ValueError: Data cardinality is ambiguous:
  x sizes: 621
  y sizes: 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30
Please provide data which shares the same first dimension.