## Linear and Polynomial Regression for Skyrmion trajectories prediction

**Imports**

In [1]:
# Standard imports
import pandas as pd
import numpy as np

from tqdm import tqdm  # for progress bar

**Read the data**

In [2]:
directory = 'Rec_EDGE_300K_1L_50MA.out'

data = pd.read_csv(directory + '/trajectories.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,y,x,mass,size,ecc,signal,raw_mass,ep,frame,particle
0,0,24.420047,61.809992,33.895088,3.785002,0.048289,0.451671,91.250977,0.000493,0,0
1,1,31.518261,109.009463,33.850101,3.792741,0.067499,0.45527,91.282349,0.000493,0,1
2,2,51.658864,41.007417,34.208199,3.811746,0.062159,0.449871,92.517654,0.000486,0,2
3,3,60.994689,82.173861,34.559098,3.818268,0.046138,0.45527,93.368622,0.000482,0,3
4,4,61.572998,129.252586,33.747531,3.807508,0.059245,0.45347,91.835289,0.00049,0,4


**Drop the unused columns**

In [3]:
unused_columns = ['Unnamed: 0', 'mass', 'size', 'ecc', 'signal', 'raw_mass', 'ep']

data = data.drop(columns=unused_columns)
data.head()

Unnamed: 0,y,x,frame,particle
0,24.420047,61.809992,0,0
1,31.518261,109.009463,0,1
2,51.658864,41.007417,0,2
3,60.994689,82.173861,0,3
4,61.572998,129.252586,0,4


**Fill in missing values with average positions (if a skyrmion is missing for more than one frame, it might not be very precise, but it should not be a big issue here)**

In [4]:
no_skyrmions = data[data['frame'] == 0].shape[0]
no_skyrmions

15

In [5]:
# ids of initial particles
ids = list(range(no_skyrmions))

# iterate through the frames
for f in tqdm(data['frame'].unique()):
    for p in range(no_skyrmions):
        # this means the skyrmion p is missing in frame f
        if not any(data[data['frame'] == f]['particle'] == p):
            
            # find previous coorinates
            x_prev = data[(data['frame'] == f-1) & (data['particle'] == p)]['x'].values[0]
            y_prev = data[(data['frame'] == f-1) & (data['particle'] == p)]['y'].values[0]
            
            x_next = x_prev
            y_next = y_prev
            
            #find next coordinates
            for next_frame in range((f+1).astype(int), len(data['frame'].unique())):
                if any(data[data['frame'] == f]['particle'] == p):
                    x_next = data[(data['frame'] == next_frame) & (data['particle'] == p)]['x'].values[0]
                    y_next = data[(data['frame'] == next_frame) & (data['particle'] == p)]['y'].values[0]
                    break
                    
            # new coordinates
            x_new = (x_prev + x_next) / 2
            y_new = (y_prev + y_next) / 2
            
            data = data.append({'y' : y_new,
                                'x' : x_new,
                                'frame' : f,
                                'particle': p}, ignore_index=True)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:15<00:00, 51.34it/s]


In [6]:
data = data.sort_values(by=['frame', 'particle'])

In [7]:
data

Unnamed: 0,y,x,frame,particle
0,24.420047,61.809992,0.0,0.0
1,31.518261,109.009463,0.0,1.0
2,51.658864,41.007417,0.0,2.0
3,60.994689,82.173861,0.0,3.0
4,61.572998,129.252586,0.0,4.0
...,...,...,...,...
11922,26.838018,9158.734705,799.0,10.0
11927,92.153535,8909.539660,799.0,11.0
11935,160.675052,8812.401110,799.0,12.0
11934,156.663224,8600.194927,799.0,13.0


**Check that there are no more missing values**

In [8]:
for f in tqdm(data['frame'].unique()):
    if (data[data['frame'] == f]['particle'].shape[0] < no_skyrmions):
        print(f)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:00<00:00, 2875.06it/s]


**Format the data so that it is in the format (frame, next_frame)**

In [9]:
frames = []

# iterate through the frames
for f in tqdm(data['frame'].unique()):
    coordinates = None
    for p in data[data['frame'] == f]['particle']:
        particle = data[(data['frame'] == f) & (data['particle'] == p)]
        coordinates = np.append(coordinates, [particle['x'].values[0], particle['y'].values[0]]) if coordinates is not None else [particle['x'].values[0], particle['y'].values[0]]
    frames.append(list(coordinates))

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:06<00:00, 121.64it/s]


**Place data in DataFrame**

In [10]:
# data in columns ['frame', 'next_frame']
df = pd.DataFrame(columns=['frame', 'next_frame'])

for i in range(1, len(frames)):
    df = df.append({'frame': frames[i-1], 'next_frame': frames[i]}, ignore_index=True)

df

Unnamed: 0,frame,next_frame
0,"[61.80999150562753, 24.420046719048628, 109.00...","[70.80009877085162, 22.786106233538195, 115.03..."
1,"[70.80009877085162, 22.786106233538195, 115.03...","[78.53846153846153, 23.79722075869336, 130.692..."
2,"[78.53846153846153, 23.79722075869336, 130.692...","[92.43681939593179, 22.50037668652832, 141.306..."
3,"[92.43681939593179, 22.50037668652832, 141.306...","[104.59782115297321, 22.04513325984048, 153.67..."
4,"[104.59782115297321, 22.04513325984048, 153.67...","[114.654700661428, 17.817739838317603, 167.487..."
...,...,...
794,"[8971.234802590348, 161.42225471763803, 8889.3...","[8981.526291116494, 162.7594343308071, 8905.46..."
795,"[8981.526291116494, 162.7594343308071, 8905.46...","[8990.157367074604, 162.0370600843532, 8920.93..."
796,"[8990.157367074604, 162.0370600843532, 8920.93...","[9007.540353356892, 162.55978798586568, 8926.0..."
797,"[9007.540353356892, 162.55978798586568, 8926.0...","[9017.586395147311, 165.976863084922, 8941.781..."


**Split data for training and testing**

In [11]:
from sklearn.model_selection import train_test_split

X = df['frame'].tolist()
y = df['next_frame'].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

**Train model with Linear Regression**

In [12]:
from sklearn.linear_model import LinearRegression

lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

LinearRegression()

**Model evaluation**

In [13]:
from sklearn.metrics import mean_squared_error, r2_score

# model evaluation for training set
y_train_predict = lin_model.predict(X_train)
rmse = (np.sqrt(mean_squared_error(y_train, y_train_predict)))
r2 = r2_score(y_train, y_train_predict)

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")

# model evaluation for testing set
y_test_predict = lin_model.predict(X_test)
rmse = (np.sqrt(mean_squared_error(y_test, y_test_predict)))
r2 = r2_score(y_test, y_test_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))

The model performance for training set
--------------------------------------
RMSE is 3.435986414665386
R2 score is 0.9881696083884253


The model performance for testing set
--------------------------------------
RMSE is 3.5791488423876037
R2 score is 0.9859669037408627


**Train with Polynoial Regression**

In [14]:
from sklearn.preprocessing import PolynomialFeatures

def create_polynomial_regression_model(degree):
    poly_features = PolynomialFeatures(degree=degree)

    # transform the features to higher degree features.
    X_train_poly = poly_features.fit_transform(X_train)

    # fit the transformed features to Linear Regression
    poly_model = LinearRegression()
    poly_model.fit(X_train_poly, y_train)

    # predicting on training data-set
    y_train_predicted = poly_model.predict(X_train_poly)

    # predicting on test data-set
    y_test_predict = poly_model.predict(poly_features.fit_transform(X_test))

    # evaluating the model on training dataset
    rmse_train = np.sqrt(mean_squared_error(y_train, y_train_predicted))
    r2_train = r2_score(y_train, y_train_predicted)

    # evaluating the model on test dataset
    rmse_test = np.sqrt(mean_squared_error(y_test, y_test_predict))
    r2_test = r2_score(y_test, y_test_predict)

    print("The model performance for the training set")
    print("-------------------------------------------")
    print("RMSE of training set is {}".format(rmse_train))
    print("R2 score of training set is {}".format(r2_train))

    print("\n")

    print("The model performance for the test set")
    print("-------------------------------------------")
    print("RMSE of test set is {}".format(rmse_test))
    print("R2 score of test set is {}".format(r2_test))

In [15]:
create_polynomial_regression_model(2)

The model performance for the training set
-------------------------------------------
RMSE of training set is 1.477664193172833
R2 score of training set is 0.9978300150049069


The model performance for the test set
-------------------------------------------
RMSE of test set is 6.739025988622936
R2 score of test set is 0.9503022773760215


In [16]:
create_polynomial_regression_model(3)

The model performance for the training set
-------------------------------------------
RMSE of training set is 1.0231375975174897e-07
R2 score of training set is 1.0


The model performance for the test set
-------------------------------------------
RMSE of test set is 22.630242458555895
R2 score of test set is 0.7460104255008766


## How many frames in the future can I predict given one frame?

**Try to predict n frames in the future**

In [17]:
n = 5

# data in format (frame, next n frames)
dfn = pd.DataFrame(columns=['X', 'y'])

for i in range(n, len(frames)):
    target = []
    for j in range(n-1, -1, -1):
        target = target + frames[i-j]
    dfn = dfn.append({'X': frames[i-2], 'y': target}, ignore_index=True)

dfn

Unnamed: 0,X,y
0,"[92.43681939593179, 22.50037668652832, 141.306...","[70.80009877085162, 22.786106233538195, 115.03..."
1,"[104.59782115297321, 22.04513325984048, 153.67...","[78.53846153846153, 23.79722075869336, 130.692..."
2,"[114.654700661428, 17.817739838317603, 167.487...","[92.43681939593179, 22.50037668652832, 141.306..."
3,"[130.2578821609651, 15.584474619733086, 177.42...","[104.59782115297321, 22.04513325984048, 153.67..."
4,"[137.88253604193972, 15.713794233289649, 183.3...","[114.654700661428, 17.817739838317603, 167.487..."
...,...,...
790,"[8963.531319216798, 166.59466643040668, 8880.4...","[8940.46656641604, 155.12932330827067, 8861.91..."
791,"[8971.234802590348, 161.42225471763803, 8889.3...","[8954.623459439574, 159.4531853577779, 8867.34..."
792,"[8981.526291116494, 162.7594343308071, 8905.46...","[8963.531319216798, 166.59466643040668, 8880.4..."
793,"[8990.157367074604, 162.0370600843532, 8920.93...","[8971.234802590348, 161.42225471763803, 8889.3..."


In [18]:
from sklearn.model_selection import train_test_split

X = dfn['X'].tolist()
y = dfn['y'].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

**Linear Regression**

In [19]:
from sklearn.linear_model import LinearRegression

lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

LinearRegression()

In [20]:
from sklearn.metrics import mean_squared_error, r2_score

# model evaluation for training set
y_train_predict = lin_model.predict(X_train)
rmse = (np.sqrt(mean_squared_error(y_train, y_train_predict)))
r2 = r2_score(y_train, y_train_predict)

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")

# model evaluation for testing set
y_test_predict = lin_model.predict(X_test)
rmse = (np.sqrt(mean_squared_error(y_test, y_test_predict)))
r2 = r2_score(y_test, y_test_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))

The model performance for training set
--------------------------------------
RMSE is 3.3385514284927384
R2 score is 0.9885757950488309


The model performance for testing set
--------------------------------------
RMSE is 3.525847658528089
R2 score is 0.9861678142482105


**Poly regression**

In [21]:
from sklearn.preprocessing import PolynomialFeatures

def create_polynomial_regression_model(degree):
    poly_features = PolynomialFeatures(degree=degree)

    # transform the features to higher degree features.
    X_train_poly = poly_features.fit_transform(X_train)

    # fit the transformed features to Linear Regression
    poly_model = LinearRegression()
    poly_model.fit(X_train_poly, y_train)

    # predicting on training data-set
    y_train_predicted = poly_model.predict(X_train_poly)

    # predicting on test data-set
    y_test_predict = poly_model.predict(poly_features.fit_transform(X_test))

    # evaluating the model on training dataset
    rmse_train = np.sqrt(mean_squared_error(y_train, y_train_predicted))
    r2_train = r2_score(y_train, y_train_predicted)

    # evaluating the model on test dataset
    rmse_test = np.sqrt(mean_squared_error(y_test, y_test_predict))
    r2_test = r2_score(y_test, y_test_predict)

    print("The model performance for the training set")
    print("-------------------------------------------")
    print("RMSE of training set is {}".format(rmse_train))
    print("R2 score of training set is {}".format(r2_train))

    print("\n")

    print("The model performance for the test set")
    print("-------------------------------------------")
    print("RMSE of test set is {}".format(rmse_test))
    print("R2 score of test set is {}".format(r2_test))

In [22]:
create_polynomial_regression_model(2)

The model performance for the training set
-------------------------------------------
RMSE of training set is 2.7627584984303724
R2 score of training set is 0.9920769193136397


The model performance for the test set
-------------------------------------------
RMSE of test set is 7.581637208477363
R2 score of test set is 0.9352817611348883


In [23]:
create_polynomial_regression_model(3)

The model performance for the training set
-------------------------------------------
RMSE of training set is 9.230284616724732e-08
R2 score of training set is 1.0


The model performance for the test set
-------------------------------------------
RMSE of test set is 18.47457704517218
R2 score of test set is 0.8365884007144077


## How are the results affected if I try to predict on using my prediction as input data? For how many rounds can this be accurate enough?

**Try to predict n times**

In [24]:
n = 50

# data in columns format (frame, next frame, next next frame, ...)
df_n_rounds = pd.DataFrame(columns=['X', 'y_1'])

for i in range(1, n):
    df_n_rounds['y_' + str(i+1)] = ''

for i in range(n+1):
    col = []
    for j in range(i, len(frames)-n+i):
        col.append(frames[j])
    if i == 0:
        df_n_rounds['X'] = col
    else:
        df_n_rounds['y_' + str(i)] = col

df_n_rounds

Unnamed: 0,X,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,...,y_41,y_42,y_43,y_44,y_45,y_46,y_47,y_48,y_49,y_50
0,"[61.80999150562753, 24.420046719048628, 109.00...","[70.80009877085162, 22.786106233538195, 115.03...","[78.53846153846153, 23.79722075869336, 130.692...","[92.43681939593179, 22.50037668652832, 141.306...","[104.59782115297321, 22.04513325984048, 153.67...","[114.654700661428, 17.817739838317603, 167.487...","[130.2578821609651, 15.584474619733086, 177.42...","[137.88253604193972, 15.713794233289649, 183.3...","[151.1877135081929, 20.428405998494586, 191.79...","[165.68593072270755, 20.306249618180708, 203.6...",...,"[548.3095747526077, 55.4499866274405, 542.7064...","[559.3591679425583, 54.50747056649596, 551.733...","[571.6816533108394, 56.70940531421341, 561.582...","[579.4599029964786, 54.63856222177928, 574.765...","[595.5333143912487, 52.484301747407294, 584.49...","[608.6178598298512, 54.07367324497946, 596.594...","[616.5485332302082, 54.14087619468538, 610.544...","[630.1560680698611, 48.47305567995223, 624.534...","[645.4160783268949, 54.60836552550953, 632.508...","[655.4035507361932, 59.76984000425239, 648.778..."
1,"[70.80009877085162, 22.786106233538195, 115.03...","[78.53846153846153, 23.79722075869336, 130.692...","[92.43681939593179, 22.50037668652832, 141.306...","[104.59782115297321, 22.04513325984048, 153.67...","[114.654700661428, 17.817739838317603, 167.487...","[130.2578821609651, 15.584474619733086, 177.42...","[137.88253604193972, 15.713794233289649, 183.3...","[151.1877135081929, 20.428405998494586, 191.79...","[165.68593072270755, 20.306249618180708, 203.6...","[175.51762512381282, 22.851074986890406, 214.4...",...,"[559.3591679425583, 54.50747056649596, 551.733...","[571.6816533108394, 56.70940531421341, 561.582...","[579.4599029964786, 54.63856222177928, 574.765...","[595.5333143912487, 52.484301747407294, 584.49...","[608.6178598298512, 54.07367324497946, 596.594...","[616.5485332302082, 54.14087619468538, 610.544...","[630.1560680698611, 48.47305567995223, 624.534...","[645.4160783268949, 54.60836552550953, 632.508...","[655.4035507361932, 59.76984000425239, 648.778...","[664.2836707746479, 64.61612382629107, 654.376..."
2,"[78.53846153846153, 23.79722075869336, 130.692...","[92.43681939593179, 22.50037668652832, 141.306...","[104.59782115297321, 22.04513325984048, 153.67...","[114.654700661428, 17.817739838317603, 167.487...","[130.2578821609651, 15.584474619733086, 177.42...","[137.88253604193972, 15.713794233289649, 183.3...","[151.1877135081929, 20.428405998494586, 191.79...","[165.68593072270755, 20.306249618180708, 203.6...","[175.51762512381282, 22.851074986890406, 214.4...","[187.1709068399876, 20.589291241101826, 222.27...",...,"[571.6816533108394, 56.70940531421341, 561.582...","[579.4599029964786, 54.63856222177928, 574.765...","[595.5333143912487, 52.484301747407294, 584.49...","[608.6178598298512, 54.07367324497946, 596.594...","[616.5485332302082, 54.14087619468538, 610.544...","[630.1560680698611, 48.47305567995223, 624.534...","[645.4160783268949, 54.60836552550953, 632.508...","[655.4035507361932, 59.76984000425239, 648.778...","[664.2836707746479, 64.61612382629107, 654.376...","[681.5213026017112, 66.98795180722891, 667.341..."
3,"[92.43681939593179, 22.50037668652832, 141.306...","[104.59782115297321, 22.04513325984048, 153.67...","[114.654700661428, 17.817739838317603, 167.487...","[130.2578821609651, 15.584474619733086, 177.42...","[137.88253604193972, 15.713794233289649, 183.3...","[151.1877135081929, 20.428405998494586, 191.79...","[165.68593072270755, 20.306249618180708, 203.6...","[175.51762512381282, 22.851074986890406, 214.4...","[187.1709068399876, 20.589291241101826, 222.27...","[204.5807988024767, 16.58671837790025, 232.493...",...,"[579.4599029964786, 54.63856222177928, 574.765...","[595.5333143912487, 52.484301747407294, 584.49...","[608.6178598298512, 54.07367324497946, 596.594...","[616.5485332302082, 54.14087619468538, 610.544...","[630.1560680698611, 48.47305567995223, 624.534...","[645.4160783268949, 54.60836552550953, 632.508...","[655.4035507361932, 59.76984000425239, 648.778...","[664.2836707746479, 64.61612382629107, 654.376...","[681.5213026017112, 66.98795180722891, 667.341...","[688.0223759593558, 65.73148848773107, 682.724..."
4,"[104.59782115297321, 22.04513325984048, 153.67...","[114.654700661428, 17.817739838317603, 167.487...","[130.2578821609651, 15.584474619733086, 177.42...","[137.88253604193972, 15.713794233289649, 183.3...","[151.1877135081929, 20.428405998494586, 191.79...","[165.68593072270755, 20.306249618180708, 203.6...","[175.51762512381282, 22.851074986890406, 214.4...","[187.1709068399876, 20.589291241101826, 222.27...","[204.5807988024767, 16.58671837790025, 232.493...","[209.63801231212915, 22.24546614164495, 243.42...",...,"[595.5333143912487, 52.484301747407294, 584.49...","[608.6178598298512, 54.07367324497946, 596.594...","[616.5485332302082, 54.14087619468538, 610.544...","[630.1560680698611, 48.47305567995223, 624.534...","[645.4160783268949, 54.60836552550953, 632.508...","[655.4035507361932, 59.76984000425239, 648.778...","[664.2836707746479, 64.61612382629107, 654.376...","[681.5213026017112, 66.98795180722891, 667.341...","[688.0223759593558, 65.73148848773107, 682.724...","[700.2392071106094, 66.80523419864559, 692.610..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,"[8437.670223857713, 135.98620055197793, 8354.1...","[8446.438829999202, 139.95640392125608, 8373.5...","[8466.83463489252, 134.7544481120683, 8386.770...","[8469.190992493744, 138.5072679613964, 8395.26...","[8482.545345967035, 138.31156939246756, 8406.1...","[8492.787600980304, 137.14967777071794, 8418.2...","[8506.449072044425, 145.54815139558673, 8428.5...","[8510.666243413116, 139.60396165322837, 8440.1...","[8522.268657609671, 140.52361003052246, 8452.9...","[8535.609989238463, 139.60087358359183, 8459.2...",...,"[8883.88434368326, 159.93483379880618, 8802.23...","[8897.635639854818, 161.3784685634001, 8813.47...","[8910.625345877144, 159.51474424855718, 8821.4...","[8923.702892001807, 159.2881834613647, 8836.41...","[8935.34010033835, 157.95111422237778, 8847.53...","[8940.46656641604, 155.12932330827067, 8861.91...","[8954.623459439574, 159.4531853577779, 8867.34...","[8963.531319216798, 166.59466643040668, 8880.4...","[8971.234802590348, 161.42225471763803, 8889.3...","[8981.526291116494, 162.7594343308071, 8905.46..."
746,"[8446.438829999202, 139.95640392125608, 8373.5...","[8466.83463489252, 134.7544481120683, 8386.770...","[8469.190992493744, 138.5072679613964, 8395.26...","[8482.545345967035, 138.31156939246756, 8406.1...","[8492.787600980304, 137.14967777071794, 8418.2...","[8506.449072044425, 145.54815139558673, 8428.5...","[8510.666243413116, 139.60396165322837, 8440.1...","[8522.268657609671, 140.52361003052246, 8452.9...","[8535.609989238463, 139.60087358359183, 8459.2...","[8543.693538597363, 136.3635159741765, 8465.12...",...,"[8897.635639854818, 161.3784685634001, 8813.47...","[8910.625345877144, 159.51474424855718, 8821.4...","[8923.702892001807, 159.2881834613647, 8836.41...","[8935.34010033835, 157.95111422237778, 8847.53...","[8940.46656641604, 155.12932330827067, 8861.91...","[8954.623459439574, 159.4531853577779, 8867.34...","[8963.531319216798, 166.59466643040668, 8880.4...","[8971.234802590348, 161.42225471763803, 8889.3...","[8981.526291116494, 162.7594343308071, 8905.46...","[8990.157367074604, 162.0370600843532, 8920.93..."
747,"[8466.83463489252, 134.7544481120683, 8386.770...","[8469.190992493744, 138.5072679613964, 8395.26...","[8482.545345967035, 138.31156939246756, 8406.1...","[8492.787600980304, 137.14967777071794, 8418.2...","[8506.449072044425, 145.54815139558673, 8428.5...","[8510.666243413116, 139.60396165322837, 8440.1...","[8522.268657609671, 140.52361003052246, 8452.9...","[8535.609989238463, 139.60087358359183, 8459.2...","[8543.693538597363, 136.3635159741765, 8465.12...","[8555.416489178977, 138.29708004122296, 8474.3...",...,"[8910.625345877144, 159.51474424855718, 8821.4...","[8923.702892001807, 159.2881834613647, 8836.41...","[8935.34010033835, 157.95111422237778, 8847.53...","[8940.46656641604, 155.12932330827067, 8861.91...","[8954.623459439574, 159.4531853577779, 8867.34...","[8963.531319216798, 166.59466643040668, 8880.4...","[8971.234802590348, 161.42225471763803, 8889.3...","[8981.526291116494, 162.7594343308071, 8905.46...","[8990.157367074604, 162.0370600843532, 8920.93...","[9007.540353356892, 162.55978798586568, 8926.0..."
748,"[8469.190992493744, 138.5072679613964, 8395.26...","[8482.545345967035, 138.31156939246756, 8406.1...","[8492.787600980304, 137.14967777071794, 8418.2...","[8506.449072044425, 145.54815139558673, 8428.5...","[8510.666243413116, 139.60396165322837, 8440.1...","[8522.268657609671, 140.52361003052246, 8452.9...","[8535.609989238463, 139.60087358359183, 8459.2...","[8543.693538597363, 136.3635159741765, 8465.12...","[8555.416489178977, 138.29708004122296, 8474.3...","[8569.926518992972, 139.0579129970643, 8482.14...",...,"[8923.702892001807, 159.2881834613647, 8836.41...","[8935.34010033835, 157.95111422237778, 8847.53...","[8940.46656641604, 155.12932330827067, 8861.91...","[8954.623459439574, 159.4531853577779, 8867.34...","[8963.531319216798, 166.59466643040668, 8880.4...","[8971.234802590348, 161.42225471763803, 8889.3...","[8981.526291116494, 162.7594343308071, 8905.46...","[8990.157367074604, 162.0370600843532, 8920.93...","[9007.540353356892, 162.55978798586568, 8926.0...","[9017.586395147311, 165.976863084922, 8941.781..."


In [25]:
from sklearn.model_selection import train_test_split

X = df_n_rounds['X'].tolist()
y = df_n_rounds['y_1'].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

**Linear Regression**

In [26]:
from sklearn.linear_model import LinearRegression

lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

LinearRegression()

In [27]:
from sklearn.metrics import mean_squared_error, r2_score

# model evaluation for training set
y_train_predict = lin_model.predict(X_train)
rmse = (np.sqrt(mean_squared_error(y_train, y_train_predict)))
r2 = r2_score(y_train, y_train_predict)

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")

# model evaluation for testing set
y_test_predict = lin_model.predict(X_test)
rmse = (np.sqrt(mean_squared_error(y_test, y_test_predict)))
r2 = r2_score(y_test, y_test_predict)

print("The model performance for testing set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))

The model performance for training set
--------------------------------------
RMSE is 3.4216421378407977
R2 score is 0.9881815863411103


The model performance for testing set
--------------------------------------
RMSE is 3.542423187412883
R2 score is 0.9853221120870211


**Let's feed the prediction into the model to predict the next frames**

In [32]:
y_i_predict =  lin_model.predict(X)
for i in range(1, n):
    y_i_predict = lin_model.predict(y_i_predict)
    rmse = (np.sqrt(mean_squared_error(df_n_rounds['y_' + str(i+1)].tolist(), y_i_predict)))
    r2 = r2_score(df_n_rounds['y_' + str(i+1)].tolist(), y_i_predict)

    print("The model performance for y_{} -> y_{}".format(i, i+1))
    print("--------------------------------------")
    print('RMSE is {}'.format(rmse))
    print('R2 score is {}'.format(r2))
    print("\n")

The model performance for y_1 -> y_2
--------------------------------------
RMSE is 4.088886337504698
R2 score is 0.9821609173155942


The model performance for y_2 -> y_3
--------------------------------------
RMSE is 4.6096097787500705
R2 score is 0.9768303638256536


The model performance for y_3 -> y_4
--------------------------------------
RMSE is 5.030527195416445
R2 score is 0.9720078683771687


The model performance for y_4 -> y_5
--------------------------------------
RMSE is 5.394038465690987
R2 score is 0.9676808854265829


The model performance for y_5 -> y_6
--------------------------------------
RMSE is 5.702346094745274
R2 score is 0.9636946702918652


The model performance for y_6 -> y_7
--------------------------------------
RMSE is 5.978272645537575
R2 score is 0.9599106846969748


The model performance for y_7 -> y_8
--------------------------------------
RMSE is 6.218565633136309
R2 score is 0.9565880107653213


The model performance for y_8 -> y_9
-----------------

## Analyse behaviour of subsets of skyrmions (only cosider the ones in the top half, bottom half, centre)

In [29]:
top = data.copy()
bottom = data.copy()
centre = data.copy()

# drop particles that do not start in the top/bottom/centre of the 