**This notebook contains all the code for running Time Series Linear Regression Models. There are two sections: Models using Steady State Dynamics Data and Models using Transient Dynamics Data. The datasets used for these two models are different. The steady state models are built on a 32 agent system of 3000 time steps and the transient models are built on 10 agent system with 250 time steps.**

**Note: The data for transients and steady state are different and thus have to be imported with their respective paths in the drive.*

## **IMPORTS**

In [0]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.integrate import odeint
from matplotlib import animation
import matplotlib
import time
import sys
import math
from matplotlib import animation, rc, rcParams
from IPython.display import HTML
import time
import csv
from IPython.display import display
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import torch.nn.functional as F
import random
import numpy as np
import matplotlib
import pandas as pd
import dill
from PIL import Image
import random
import torchvision
import torch.optim as optim
import matplotlib.pyplot as plt
import plotly.express as plotx

## **STEADY STATE BEHAVIOR MODEL**

### **SETTING UP DATA**

In [129]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# ENTER THE FILE PATH BY UPLOADING IT IN DRIVE OR LOCALLY
file_name = '/content/drive/My Drive/522_HW4_Files/swarm_03242020_3000steps.csv'
my_data = []
with open(file_name) as csvDataFile:
  csvReader = csv.reader(csvDataFile)
  for row in csvReader:
    my_data.append(row)
pos_from_file = []
vel_from_file =[]
pos_line = []
vel_line = []

for i in range(len(my_data)):
  for j in range(int(len(my_data[0])/2)):
    try:
      pos_line_str = my_data[i][j*2].lstrip('[').rstrip(']').split()
      vel_line_str = my_data[i][j*2+1].lstrip('[').rstrip(']').split()
    except:
      pass
    pos_line.append([float(pos_line_str[0]), float(pos_line_str[1])])
    vel_line.append([float(vel_line_str[0]), float(vel_line_str[1])])
  pos_from_file.append(pos_line)
  vel_from_file.append(vel_line)
  pos_line = []
  vel_line = []

In [0]:
pos = []
vel =[]
for i in range(len(pos_from_file)):
    pos.append(np.asarray(pos_from_file[i]))
    vel.append(np.asarray(vel_from_file[i]))
pos = np.asarray(pos)
vel = np.asarray(vel)
x = pos[:,:,0]
y = pos[:,:,1]
vx = vel[:,:,0]
vy = vel[:,:,1]

### **CREATING TRAINING DATA SET**

In [0]:
def create_train(array,in_samples,out_samples):
    l = len(array)
    x_train = []
    y_train = []
    for i in range(l-in_samples-1):
        row_x = array[i:i+in_samples] 
        row_y = array[i+in_samples:i+in_samples+out_samples]
        x_train.append(row_x)
        y_train.append(row_y) 
    #print(np.asarray(y_train).shape)
    return x_train,y_train

### **TRAINING A L2-NORM REGRESSION MODEL**

In [0]:
# FITTING A LINEAR REGRESSION MODEL USING THE TIME-SERIES DATA ABOVE

NUMBER_OF_AGENTS = 32
TOTAL_SAMPLES = x.shape[0]
TRAIN_SAMPLES_LENGTH = 1000
TEST_SAMPLES = TOTAL_SAMPLES - TRAIN_SAMPLES_LENGTH

posx = pos[:,:,0][:TRAIN_SAMPLES_LENGTH]
posy = pos[:,:,1][:TRAIN_SAMPLES_LENGTH]
velx = vel[:,:,0][:TRAIN_SAMPLES_LENGTH]
vely = vel[:,:,1][:TRAIN_SAMPLES_LENGTH]

x_array = {}
x_label = {}
y_array = {}
y_label = {}
vx_array = {}
vx_label = {}
vy_array = {}
vy_label = {}

solx = {}
soly = {}
solvx = {}
solvy = {}
# DEFINE THE NUMBER OF IN_SAMPLES GIVEN TO MODEL AND NUMBER OF OUTSAMPLES THE MODEL HAS TO PREDICT
# AT AN GIVEN INSTANCE 
# FOR EXAMPLE: THE MODEL TAKES 10 VALUES (IN_SAMPLES = 10) AND PREDICTS THE 11TH TO 15TH SAMPLES (OUT_SAMPLES = 5 )
in_samples = 50
out_samples = 1
for a in range(NUMBER_OF_AGENTS):
    x_array[a],x_label[a] = create_train(posx[:,a],in_samples,out_samples)
    y_array[a],y_label[a] = create_train(posy[:,a],in_samples,out_samples)
    vx_array[a],vx_label[a] = create_train(velx[:,a],in_samples,out_samples)
    vy_array[a],vy_label[a] = create_train(vely[:,a],in_samples,out_samples)

    solx[a] = np.linalg.lstsq(np.asarray(x_array[a]),np.asarray(x_label[a]) , rcond=1)[0]
    soly[a] = np.linalg.lstsq(np.asarray(y_array[a]),np.asarray(y_label[a]) , rcond=1)[0]
    solvx[a] = np.linalg.lstsq(np.asarray(vx_array[a]),np.asarray(vx_label[a]) , rcond=1)[0]
    solvy[a] = np.linalg.lstsq(np.asarray(vy_array[a]),np.asarray(vy_label[a]) , rcond=1)[0]

### **TESTING THE MODEL ON TEST SET**

In [0]:
# DEFINING TEST DATA
test_posx = pos[:,:,0][TRAIN_SAMPLES_LENGTH:]
test_posy = pos[:,:,1][TRAIN_SAMPLES_LENGTH:]
test_velx = vel[:,:,0][TRAIN_SAMPLES_LENGTH:]
test_vely = vel[:,:,1][TRAIN_SAMPLES_LENGTH:]
xin = posx[-in_samples:,:]
yin = posy[-in_samples:,:]
vxin = velx[-in_samples:,:]
vyin = vely[-in_samples:,:]

In [0]:
solution_x = np.asarray([np.hstack(np.asarray(solx[a])) for a in range(NUMBER_OF_AGENTS)])
solution_y = np.asarray([np.hstack(np.asarray(soly[a])) for a in range(NUMBER_OF_AGENTS)])
solution_vx =np.asarray([np.hstack(np.asarray(solvx[a])) for a in range(NUMBER_OF_AGENTS)])
solution_vy =np.asarray([np.hstack(np.asarray(solvy[a])) for a in range(NUMBER_OF_AGENTS)])

In [0]:
def predict_next_sample(in_array,weight_vector):
    # weight_vector is of size (1,in_samples)
    # in_array is a vector of size (in_samples,1)
    pred = np.matmul(weight_vector,in_array)
    return pred  

In [0]:
# RUNNING THE LEARNT MODEL ON TEST DATA
xin = posx[-in_samples:,:]
yin = posy[-in_samples:,:]
vxin = velx[-in_samples:,:]
vyin = vely[-in_samples:,:]
xpr = []
ypr = []
vxpr = []
vypr = []

for i in range(TEST_SAMPLES):
    #print(i)
    px = np.diagonal(np.matmul(solution_x,xin))
    #print(px.shape)
    py = np.diagonal(np.matmul(solution_y,yin))
    pvx = np.diagonal(np.matmul(solution_vx,vxin))
    pvy = np.diagonal(np.matmul(solution_vy,vyin))
    
    px = px.reshape(1,NUMBER_OF_AGENTS)
    #print(px.shape)
    py = py.reshape(1,NUMBER_OF_AGENTS)
    pvx = pvx.reshape(1,NUMBER_OF_AGENTS)
    pvy = pvy.reshape(1,NUMBER_OF_AGENTS)
    
    xin = np.delete(xin, (0), axis=0)
    yin = np.delete(yin, (0), axis=0)
    vxin = np.delete(vxin, (0), axis=0)
    vyin = np.delete(vyin, (0), axis=0)
    
    xin = np.vstack([xin,px])
    #print(xin.shape)
    yin = np.vstack([yin,py])
    vxin = np.vstack([vxin,pvx])
    vyin = np.vstack([vyin,pvy])
    
    xpr.append(px)
    ypr.append(py)
    vxpr.append(pvx)
    vypr.append(pvy)
xpr = np.asarray(xpr).reshape(TEST_SAMPLES,32)
ypr = np.asarray(ypr).reshape(TEST_SAMPLES,32)
vxpr = np.asarray(vxpr).reshape(TEST_SAMPLES,32)
vypr = np.asarray(vypr).reshape(TEST_SAMPLES,32)

In [0]:
# PLOTTING GROUND TRUTH VS PREDICTED ON TEST DATA FOR X AND Y COORDINATES OF 32 AGENTS
do_you_want_to_plot = False
if do_you_want_to_plot:
  for a in range(NUMBER_OF_AGENTS):
      truth, = plt.plot(test_velx[:,a],color = 'red',label = 'Ground Truth')
      predicted, = plt.plot(vxpr[:,a],color = 'blue',label = 'Predicted')
      plt.legend(handles=[truth, predicted])
      plt.show()

### **EVALUATING THE MODEL PREDICTION USING MEAN FIELD ANALYSIS**

In [0]:
AGENT_COUNT = 32
x = xpr
y = ypr
x_gt = test_posx
y_gt = test_posy
mean_x = np.sum(x,axis=1)/AGENT_COUNT
mean_y = np.sum(y,axis=1)/AGENT_COUNT
mean_x_gt = np.sum(x_gt,axis=1)/AGENT_COUNT
mean_y_gt = np.sum(y_gt,axis=1)/AGENT_COUNT

In [140]:
mean_field_distance = np.sqrt(((mean_x-mean_x_gt)**2 +(mean_y-mean_y_gt)**2 ))
title = "Mean Field Error [Trained on Steady State]"
xlabel = "Samples in Test Data"
ylabel = "Distance between Predicted and True Mean Field"
ts = [t for t in range(len(mean_field_distance))]
df = pd.DataFrame(list(zip(ts,mean_field_distance)),columns =[xlabel, ylabel]) 
fig = plotx.line(df, x=xlabel, y=ylabel)
fig.update_layout(
    title={
        'text': title,
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="White",
)
fig.update_yaxes(range=[0, 6])
fig.show()

### **GENERATING A VIDEO OF SWARM ON TEST SET**

In [0]:
############ REPLAYING FILES TO GENERATE VIDEO OF SWARM ON TEST DATA ############
from matplotlib.lines import Line2D
import matplotlib.lines as mlines

blue_dot = mlines.Line2D([], [], color='blue', marker='.', linestyle='None',
                          markersize=10, label='Prediction')
red_dot = mlines.Line2D([], [], color='red', marker='.', linestyle='None',
                          markersize=10, label='Ground Truth')
plt.legend(handles=[blue_dot, red_dot])
pred_len = len(xpr)
print("Predicted number of time steps: ", pred_len)


fig = plt.figure()
fig.set_size_inches(5, 5)
ax = plt.axes(xlim=(2,8),ylim=(2,8))

x = xpr
y = ypr

x_gt = test_posx
y_gt = test_posy


g = [Line2D([x_gt[0][0], x[0][0]],[y_gt[0][0], y[0][0]]) for i in range(32)]
for j in range(32):
  g[j], = ax.plot([x_gt[0][j], x[0][j]],[y_gt[0][j], y[0][j]], c='mediumspringgreen',marker='.')
d, = ax.plot(x[0],y[0], c='blue',marker='.',linestyle='None')
e, = ax.plot(x_gt[0],y_gt[0], c='red',marker='.',linestyle='None')


def animate(i):
    plt.legend(handles=[blue_dot, red_dot])
    # plotting a line between every pair of points
    for j in range(32):
      g[j].set_data([x_gt[i][j], x[i][j]],[y_gt[i][j], y[i][j]])
    
    d.set_data(x[i],y[i])
    e.set_data(x_gt[i],y_gt[i])
    return d, e

# call the animator.  blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, frames=700, interval=20, blit=True)
anim.save('animation.mp4',writer=animation.FFMpegWriter(fps=10))
rc('animation', html='jshtml')
anim

## **TRANSIENT BEHAVIOR MODEL**

### **SETTING UP DATA**

In [0]:
# READ THE DATA FROM 
file_name = "/content/drive/My Drive/522_PROJECT/diff_init_04112020/10_agent_2.csv"
my_data = []
with open(file_name) as csvDataFile:
  csvReader = csv.reader(csvDataFile)
  for row in csvReader:
    my_data.append(row)
pos_from_file = []
vel_from_file =[]
pos_line = []
vel_line = []

for i in range(len(my_data)):
  for j in range(int(len(my_data[0])/2)):
    try:
      pos_line_str = my_data[i][j*2].lstrip('[').rstrip(']').split()
      vel_line_str = my_data[i][j*2+1].lstrip('[').rstrip(']').split()
    except:
      pass
    pos_line.append([float(pos_line_str[0]), float(pos_line_str[1])])
    vel_line.append([float(vel_line_str[0]), float(vel_line_str[1])])
  pos_from_file.append(pos_line)
  vel_from_file.append(vel_line)
  pos_line = []
  vel_line = []

In [0]:
pos = []
vel =[]
for i in range(len(pos_from_file)):
    pos.append(np.asarray(pos_from_file[i]))
    vel.append(np.asarray(vel_from_file[i]))
pos = np.asarray(pos)
vel = np.asarray(vel)
x = pos[:,:,0]
y = pos[:,:,1]
vx = vel[:,:,0]
vy = vel[:,:,1]
TOTAL_SAMPLES = len(x)

### **CREATING TRAINING DATA SET**

In [0]:
def create_train(array,in_samples,out_samples):
    l = len(array)
    x_train = []
    y_train = []
    for i in range(l-in_samples-1):
        row_x = array[i:i+in_samples] 
        row_y = array[i+in_samples:i+in_samples+out_samples]
        x_train.append(row_x)
        y_train.append(row_y) 
    #print(np.asarray(y_train).shape)
    return x_train,y_train

### **TRAINING A L2-NORM REGRESSION MODEL**

In [0]:
NUMBER_OF_AGENTS = 10
# SPLIT INDEX: 0: TRAIN_SAMPLES_LENGTH is for TRAINING AND TRAIN_SAMPLES_LENGTH: end is for testing
TRAIN_SAMPLES_LENGTH = 150
TOTAL_SAMPLES = 250
TEST_SAMPLES_LENGTH = TOTAL_SAMPLES - TRAIN_SAMPLES_LENGTH
posx = pos[:,:,0][:TRAIN_SAMPLES_LENGTH]
posy = pos[:,:,1][:TRAIN_SAMPLES_LENGTH]
velx = vel[:,:,0][:TRAIN_SAMPLES_LENGTH]
vely = vel[:,:,1][:TRAIN_SAMPLES_LENGTH]

x_array = {}
x_label = {}
y_array = {}
y_label = {}
vx_array = {}
vx_label = {}
vy_array = {}
vy_label = {}

solx = {}
soly = {}
solvx = {}
solvy = {}

in_samples = 50
out_samples = 1
for a in range(NUMBER_OF_AGENTS):
    x_array[a],x_label[a] = create_train(posx[:,a],in_samples,out_samples)
    y_array[a],y_label[a] = create_train(posy[:,a],in_samples,out_samples)
    vx_array[a],vx_label[a] = create_train(velx[:,a],in_samples,out_samples)
    vy_array[a],vy_label[a] = create_train(vely[:,a],in_samples,out_samples)
    #print(a)
    
    solx[a] = np.linalg.lstsq(np.asarray(x_array[a]),np.asarray(x_label[a]) , rcond=1)[0]
    soly[a] = np.linalg.lstsq(np.asarray(y_array[a]),np.asarray(y_label[a]) , rcond=1)[0]
    solvx[a] = np.linalg.lstsq(np.asarray(vx_array[a]),np.asarray(vx_label[a]) , rcond=1)[0]
    solvy[a] = np.linalg.lstsq(np.asarray(vy_array[a]),np.asarray(vy_label[a]) , rcond=1)[0]

### **TESTING THE MODEL ON TEST SET**

In [0]:
test_posx = pos[:,:,0][TRAIN_SAMPLES_LENGTH:]
test_posy = pos[:,:,1][TRAIN_SAMPLES_LENGTH:]
test_velx = vel[:,:,0][TRAIN_SAMPLES_LENGTH:]
test_vely = vel[:,:,1][TRAIN_SAMPLES_LENGTH:]
xin = posx[-in_samples:,:]
yin = posy[-in_samples:,:]
vxin = velx[-in_samples:,:]
vyin = vely[-in_samples:,:]

In [0]:
solution_x = np.asarray([np.hstack(np.asarray(solx[a])) for a in range(NUMBER_OF_AGENTS)])
solution_y = np.asarray([np.hstack(np.asarray(soly[a])) for a in range(NUMBER_OF_AGENTS)])
solution_vx =np.asarray([np.hstack(np.asarray(solvx[a])) for a in range(NUMBER_OF_AGENTS)])
solution_vy =np.asarray([np.hstack(np.asarray(solvy[a])) for a in range(NUMBER_OF_AGENTS)])

In [0]:
def predict_next_sample(in_array,weight_vector):
    # weight_vector is 1 X 10
    # in_array is 10 X 1
    pred = np.matmul(weight_vector,in_array)
    return pred  

In [0]:
xin = posx[-in_samples:,:]
yin = posy[-in_samples:,:]
vxin = velx[-in_samples:,:]
vyin = vely[-in_samples:,:]

xpr = []
ypr = []
vxpr = []
vypr = []
for i in range(TEST_SAMPLES_LENGTH):
    #print(i)
    px = np.diagonal(np.matmul(solution_x,xin))
    #print(px.shape)
    py = np.diagonal(np.matmul(solution_y,yin))
    pvx = np.diagonal(np.matmul(solution_vx,vxin))
    pvy = np.diagonal(np.matmul(solution_vy,vyin))
    
    px = px.reshape(1,NUMBER_OF_AGENTS)
    #print(px.shape)
    py = py.reshape(1,NUMBER_OF_AGENTS)
    pvx = pvx.reshape(1,NUMBER_OF_AGENTS)
    pvy = pvy.reshape(1,NUMBER_OF_AGENTS)
    
    xin = np.delete(xin, (0), axis=0)
    yin = np.delete(yin, (0), axis=0)
    vxin = np.delete(vxin, (0), axis=0)
    vyin = np.delete(vyin, (0), axis=0)
    
    xin = np.vstack([xin,px])
    #print(xin.shape)
    yin = np.vstack([yin,py])
    vxin = np.vstack([vxin,pvx])
    vyin = np.vstack([vyin,pvy])
    
    xpr.append(px)
    ypr.append(py)
    vxpr.append(pvx)
    vypr.append(pvy)
xpr = np.asarray(xpr).reshape(TEST_SAMPLES_LENGTH,NUMBER_OF_AGENTS)
ypr = np.asarray(ypr).reshape(TEST_SAMPLES_LENGTH,NUMBER_OF_AGENTS)
vxpr = np.asarray(vxpr).reshape(TEST_SAMPLES_LENGTH,NUMBER_OF_AGENTS)
vypr = np.asarray(vypr).reshape(TEST_SAMPLES_LENGTH,NUMBER_OF_AGENTS)

In [0]:
do_you_want_to_plot = False
if do_you_want_to_plot:
  for a in range(NUMBER_OF_AGENTS):
      truth, = plt.plot(test_velx[:,a],color = 'red',label = 'Ground Truth')
      predicted, = plt.plot(vxpr[:,a],color = 'blue',label = 'Predicted')
      plt.legend(handles=[truth, predicted])
      plt.show()

### **EVALUATING THE MODEL PREDICTION USING MEAN FIELD ANALYSIS**

In [0]:
AGENT_COUNT = 10
x = xpr
y = ypr
x_gt = test_posx
y_gt = test_posy
mean_x = np.sum(x,axis=1)/AGENT_COUNT
mean_y = np.sum(y,axis=1)/AGENT_COUNT
mean_x_gt = np.sum(x_gt,axis=1)/AGENT_COUNT
mean_y_gt = np.sum(y_gt,axis=1)/AGENT_COUNT

In [152]:
mean_field_distance = np.sqrt(((mean_x-mean_x_gt)**2 +(mean_y-mean_y_gt)**2 ))
title = "Mean Field Error [Trained on Transients]"
xlabel = "Samples in Test Data"
ylabel = "Distance between Predicted and True Mean Field"
ts = [t for t in range(len(mean_field_distance))]
df = pd.DataFrame(list(zip(ts,mean_field_distance)),columns =[xlabel, ylabel]) 
fig = plotx.line(df, x=xlabel, y=ylabel)
fig.update_layout(
    title={
        'text': title,
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="White",
)
fig.update_yaxes(range=[0, 6])
fig.show()

### **GENERATING A VIDEO OF SWARM ON TEST SET**

In [0]:
############ REPLAYING FILES TO GENERATE VIDEO OF SWARM ON TEST DATA  ############
from matplotlib.lines import Line2D
import matplotlib.lines as mlines

blue_dot = mlines.Line2D([], [], color='blue', marker='.', linestyle='None',
                          markersize=10, label='Prediction')
red_dot = mlines.Line2D([], [], color='red', marker='.', linestyle='None',
                          markersize=10, label='Ground Truth')
plt.legend(handles=[blue_dot, red_dot])
pred_len = len(xpr)
print("Predicted number of time steps: ", pred_len)


fig = plt.figure()
fig.set_size_inches(5, 5)
ax = plt.axes(xlim=(2,8),ylim=(2,8))

x = xpr
y = ypr

x_gt = test_posx
y_gt = test_posy


g = [Line2D([x_gt[0][0], x[0][0]],[y_gt[0][0], y[0][0]]) for i in range(NUMBER_OF_AGENTS)]
for j in range(NUMBER_OF_AGENTS):
  g[j], = ax.plot([x_gt[0][j], x[0][j]],[y_gt[0][j], y[0][j]], c='mediumspringgreen',marker='.')
d, = ax.plot(x[0],y[0], c='blue',marker='.',linestyle='None')
e, = ax.plot(x_gt[0],y_gt[0], c='red',marker='.',linestyle='None')


def animate(i):
    plt.legend(handles=[blue_dot, red_dot])
    # plotting a line between every pair of points
    for j in range(NUMBER_OF_AGENTS):
      g[j].set_data([x_gt[i][j], x[i][j]],[y_gt[i][j], y[i][j]])
    
    d.set_data(x[i],y[i])
    e.set_data(x_gt[i],y_gt[i])
    return d, e

# call the animator.  blit=True means only re-draw the parts that have changed.
anim = animation.FuncAnimation(fig, animate, frames=TEST_SAMPLES_LENGTH, interval=20, blit=True)
anim.save('compare_700steps.mp4',writer=animation.FFMpegWriter(fps=10))
rc('animation', html='jshtml')
anim