### Acknowledgement

Upon building the final model we use for the result of the competition, we consulted [TensorFlow tutorials](https://www.tensorflow.org/text/tutorials/transformer) and PyTorch tutorials with practical examples such as [Language Modeling](https://pytorch.org/tutorials/beginner/transformer_tutorial.html) and [Language Translation](https://pytorch.org/tutorials/beginner/translation_transformer.html.)

### Setup

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim.lr_scheduler import ReduceLROnPlateau
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
import math

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

ROOT_PATH = "./"
cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


### Dataset

In [2]:
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def transform_data(np_data, bch_id):
    df = pd.DataFrame(np_data[bch_id], columns = ['x','y'])
    df['x_vel'] = np.gradient(df.x)
    df['y_vel'] = np.gradient(df.y)
    df['vel'] = np.sqrt(df.x_vel**2 + df.y_vel**2)
    df['x_acc'] = np.gradient(df.x_vel)
    df['y_acc'] = np.gradient(df.y_vel)
    df['acc'] = np.gradient(df.vel)
    tangent = np.array([1/df.vel]*2).T * np.array([df.x_vel, df.y_vel]).T
    df['curvature'] = np.abs(df.x_acc * df.y_vel - df.x_vel * df.y_acc) / (df.vel)**3
    out = df[['x', 'y', 'curvature']]
    return out.to_numpy()


def rotate(X, startpoint, endpoint, default_angle):
    
    # Find the slope of the path
    dx = X[:, endpoint, 0] - X[:, startpoint, 0]
    dy = X[:, endpoint, 1] - X[:, startpoint, 1]
    
    # Convert theta to degree in the range(0, 360)
    theta = np.arctan2(dy, dx)
    angle = np.degrees(theta)
    angle[angle < 0] += 360
    
    # Generate the degree we want to rotate by and convert back to theta
    rotate_degree = -1 * (angle - default_angle)
    rotate_theta = np.deg2rad(rotate_degree)
    
    # Reshape the array from [4, batchsize] to [batchsize, 2, 2]
    rot = np.array([np.cos(rotate_theta), -np.sin(rotate_theta),
                np.sin(rotate_theta), np.cos(rotate_theta)])
    rot = rot.T.reshape(-1, 2, 2)
    
    return rot


def get_city_trajectories(city="palo-alto", split="train", normalized=False):
    f_in = ROOT_PATH + split + "/" + city + "_inputs"
    inputs = pickle.load(open(f_in, "rb"))
    inputs = np.asarray(inputs)
    
    outputs = None
    
    if split=="train":
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)

    return inputs, outputs


class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None, normalized=False):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform
        self.normalized = normalized
        self.split = split

        self.inputs, self.outputs = self.get_city_trajectories(city=city, split=split)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        if self.split == 'train':
        
            data = (self.inputs[idx], self.outputs[idx])

#             if self.transform:
#                 data = self.transform(data)

            return data
        
        return self.inputs[idx]
    
    def get_city_trajectories(self, city="palo-alto", split="train"):
        assert city in cities and split in splits

        # get input
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        inputs = np.asarray(inputs)

        # store input starting positions and rotation matrix
        start_pos = inputs[:, 0, :].copy()
        rotate_factor = rotate(inputs, 0, 49, 30)
        max_factor = inputs.max(axis=1)
        
#         print(inputs.reshape(-1, 2).mean(axis=0))
#         print(inputs.reshape(-1, 2).std(axis=0))

        # normalize inputs (translation + rotation)
        if self.normalized:
            for i in range(len(inputs)):
                inputs[i] -= start_pos[i, :]
                
            for i in range(len(inputs)):
                inputs[i] = inputs[i] @ rotate_factor[i].T
            
            max_factor = inputs.max(axis=1)
            
#             for i in range(len(inputs)):
#                 inputs[i] = inputs[i] / max_factor[i]

        # get output
        outputs = None
        if split == "train":  # get and normalize outputs
            f_out = ROOT_PATH + split + "/" + city + "_outputs"
            outputs = pickle.load(open(f_out, "rb"))
            outputs = np.asarray(outputs)
            if self.normalized:
                for i in range(len(inputs)):
                    outputs[i] -= start_pos[i, :]
                    
                for i in range(len(inputs)):
                    outputs[i] = outputs[i] @ rotate_factor[i].T
                
#                 for i in range(len(inputs)):
#                     outputs[i] = outputs[i] / max_factor[i]
        
#             print(inputs.shape)
#             print(outputs.shape)
        
            # Adding curvature as features
            if self.transform:
#                 print(inputs.shape)
#                 print(outputs.shape)
                inputs = np.array([transform_data(inputs, i) for i in range(len(inputs))])
#                 print(inputs.shape)

        self.start_pos = start_pos
        self.rotate_matrix = rotate_factor # np.linalg.inv(rot[i].T) to reverse back
        
        if self.normalized:
            self.n_max = max_factor

        return inputs, outputs

###  Transformer Model

In [3]:
class MultiHeadAttention(nn.Module):
    '''Multi-head self-attention module'''
    def __init__(self, D, H):
        super(MultiHeadAttention, self).__init__()
        self.H = H # number of heads
        self.D = D # dimension
        
        self.wq = nn.Linear(D, D*H)
        self.wk = nn.Linear(D, D*H)
        self.wv = nn.Linear(D, D*H)

        self.dense = nn.Linear(D*H, D)

    def concat_heads(self, x):
        B, H, S, D = x.shape
        x = x.permute((0, 2, 1, 3)).contiguous() 
        x = x.reshape((B, S, H*D))
        return x

    def split_heads(self, x):
        B, S, D_H = x.shape
        x = x.reshape(B, S, self.H, self.D)
        x = x.permute((0, 2, 1, 3))
        return x

    def forward(self, x, mask):

        q = self.wq(x)
        k = self.wk(x)
        v = self.wv(x)

        q = self.split_heads(q)
        k = self.split_heads(k)
        v = self.split_heads(v)

        attention_scores = torch.matmul(q, k.transpose(-1, -2))
        attention_scores = attention_scores / math.sqrt(self.D)

        # add the mask to the scaled tensor.
        if mask is not None:
            attention_scores += (mask * -1e9)
        
        attention_weights = nn.Softmax(dim=-1)(attention_scores)
        scaled_attention = torch.matmul(attention_weights, v)
        concat_attention = self.concat_heads(scaled_attention)
        output = self.dense(concat_attention)

        return output, attention_weights

In [6]:
# Positional encodings
def get_angles(pos, i, D):
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(D))
    return pos * angle_rates


def positional_encoding(D, position=60, dim=3, device=device):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(D)[np.newaxis, :],
                            D)
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    if dim == 3:
        pos_encoding = angle_rads[np.newaxis, ...]
    elif dim == 4:
        pos_encoding = angle_rads[np.newaxis,np.newaxis,  ...]
    return torch.tensor(pos_encoding, device=device)

In [7]:
def create_look_ahead_mask(size, device=device):
    mask = torch.ones((size, size), device=device)
    mask = torch.triu(mask, diagonal=1)
    return mask

In [8]:
class TransformerLayer(nn.Module):
    def __init__(self, D, H, hidden_mlp_dim, dropout_rate):
        super(TransformerLayer, self).__init__()
        self.dropout_rate = dropout_rate
        self.mlp_hidden = nn.Linear(D, hidden_mlp_dim)
        self.mlp_out = nn.Linear(hidden_mlp_dim, D)
        self.layernorm1 = nn.LayerNorm(D, eps=1e-9)
        self.layernorm2 = nn.LayerNorm(D, eps=1e-9)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.dropout2 = nn.Dropout(dropout_rate)

        self.mha = MultiHeadAttention(D, H)


    def forward(self, x, look_ahead_mask):
        
        attn, attn_weights = self.mha(x, look_ahead_mask)
        attn = self.dropout1(attn)
        attn = self.layernorm1(attn + x)

        mlp_act = torch.relu(self.mlp_hidden(attn))
        mlp_act = self.mlp_out(mlp_act)
        mlp_act = self.dropout2(mlp_act)
        
        output = self.layernorm2(mlp_act + attn)

        return output, attn_weights

In [9]:
class Transformer(nn.Module):
    '''
    Transformer Encoder
    '''
    def __init__(self, num_layers, D, H, hidden_mlp_dim, inp_features,
                 out_features, dropout_rate, batch_size, kernel_size):
        super(Transformer, self).__init__()
        self.batch_size = batch_size
        self.sqrt_D = torch.tensor(math.sqrt(D))
        self.num_layers = num_layers
        self.input_projection = nn.Sequential(
            nn.Linear(inp_features, hidden_mlp_dim),
            nn.LeakyReLU(), 
            nn.Linear(hidden_mlp_dim, hidden_mlp_dim),
            nn.LeakyReLU(), 
            nn.Linear(hidden_mlp_dim, hidden_mlp_dim),
            nn.LeakyReLU(), 
            nn.Linear(hidden_mlp_dim, D),
            nn.LeakyReLU()
        )
        
        self.output_projection = nn.Sequential(
            nn.Linear(50*D, hidden_mlp_dim),
            nn.LeakyReLU(), 
            nn.Linear(hidden_mlp_dim, hidden_mlp_dim),
            nn.LeakyReLU(), 
            nn.Linear(hidden_mlp_dim, hidden_mlp_dim),
            nn.LeakyReLU(), 
            nn.Linear(hidden_mlp_dim, hidden_mlp_dim),
            nn.LeakyReLU(), 
            nn.Linear(hidden_mlp_dim, out_features)
        )
        
        self.pos_encoding = positional_encoding(D)
        self.dec_layers = nn.ModuleList([TransformerLayer(D, H, hidden_mlp_dim, 
                                        dropout_rate=dropout_rate
                                       ) for _ in range(num_layers)])
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, mask):
        B, S, D = x.shape
        attention_weights = {}
        
        x = self.input_projection(x)
        
        x *= self.sqrt_D
        
        x += self.pos_encoding[:, :S, :]

        x = self.dropout(x)

        for i in range(self.num_layers):
            x, block = self.dec_layers[i](x=x,
                                          look_ahead_mask=mask)
            attention_weights['decoder_layer{}'.format(i + 1)] = block
        
        x = self.output_projection(x)
        
        return x, attention_weights
    
    def auto_regressor(self, x, mask, step):
        
        B, S, D = x.shape
        new_inputs = torch.clone(x)
        temp_pred, atn = self.forward(new_inputs, mask)
        temp_pred = temp_pred.reshape(B, -1, 2)
        new_inputs = torch.cat((new_inputs, temp_pred), 1)
        
        
        for idx in range(step, 60, step):
            train_inputs = new_inputs[:, idx:idx+50, :]
            
            starting_pos = torch.unsqueeze(train_inputs[:, 0, :], dim=1)
            Q = torch.from_numpy(rotate(train_inputs.cpu().detach().numpy(), 0, 9, 30)).to(device)
            trans_inputs = torch.matmul((train_inputs - starting_pos),
                                        torch.transpose(Q, 1, 2))
            
            temp_pred, attention = self.forward(train_inputs, mask)
            temp_pred = temp_pred.reshape(B, -1, 2)
            temp_pred = (torch.matmul(temp_pred, Q) + starting_pos)
            new_inputs = torch.cat((new_inputs, temp_pred), 1)
            
        return new_inputs[:, 50:].reshape(B, -1), attention

In [17]:
def make_pred(test_loader, batch_sz, model):
    '''
    Remember to use test_dataset stats, NOT train_dataset
    '''
    count_row = 0
    out = []

    for X in test_loader:
        if len(X) != batch_sz:
            print(len(X))
            to_fill = np.zeros([batch_sz-len(X), 50, 2])
            X = torch.from_numpy(np.append(X, to_fill, axis=0))
            
#             a = test_dataset.rotate_matrix[-1].T
#             temp = X[20]@np.linalg.inv(a) + test_dataset.start_pos[-1]
#             plt.scatter(temp[:, 0], temp[:, 1], label='input')

        X = X.to(device).float()
    
        S = X.shape[1]
        mask = create_look_ahead_mask(S)

        pred = model(X, mask)[0].reshape(batch_sz, -1, 2).cpu().detach().numpy()

        for i in range(batch_sz):
            if count_row >= len(test_dataset):
                break

            rotation =  test_dataset.rotate_matrix[count_row].T
            pred[i] = pred[i] @ np.linalg.inv(rotation)
            pred[i] = pred[i] + test_dataset.start_pos[count_row, : ]

                
            out.append(pred[i])
            count_row += 1 


    out = np.array(out).reshape(len(test_dataset), -1)

    return out


### Fine Tune

In [11]:
def fine_tune(city, model_path, n_epochs, learning_rate, factor, patience, device=device, batch_size=32):
    # create data
    dataset = ArgoverseDataset(city = city, split = 'train', transform=False, normalized=True)
    train_sz = int(len(dataset) * 0.9)
    val_sz = len(dataset) - train_sz
    train_loader, val_loader = torch.utils.data.random_split(dataset, [train_sz, val_sz])
    train_loader = DataLoader(train_loader, batch_size=batch_size, drop_last=True, shuffle=True)
    val_loader = DataLoader(val_loader, batch_size=batch_size, drop_last=True)
    
    # load model and utility
    transformer = torch.load(model_path)
    optimizer = torch.optim.Adam(transformer.parameters(), lr=learning_rate) 
    loss_function = nn.MSELoss()
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=factor,
                                  patience=patience, verbose=True) 
    
    # start training
    avg_train_loss, avg_val_loss = [], []
    train_time, elapsed_time = [], []
    best_val_score = float('inf')
    
    for epoch in tqdm(list(range(n_epochs))):
        print(f'Epoch {epoch+101}')
        print('Training & Validating ', end='')
        
        start_time = time.time()
        train_loss, val_loss = [], []
        
        # Training set
        for batches, (X, y) in enumerate(train_loader):
            X = X.to(DEVICE).float()
            y = y.to(DEVICE).float()
            
            # Track progress
            if (batches + 1) % 20 == 0:
                print('-', end='')
            
            # Forward pass
            optimizer.zero_grad()
            S = X.shape[1]
            mask = create_look_ahead_mask(S)
            out, _ = transformer(X, mask) # .auto_regressor(X, mask, step)
            
#             print(out.shape)
#             print(y.reshape(batch_size, -1).shape)
            
            # Backpropagation
            loss = loss_function(out, y.reshape(batch_size, -1)) # y.reshape(batch_size, -1)
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
            
        print()
        avg_train = np.mean(train_loss)
        avg_train_loss.append(avg_train)
        
        # End the time
        end_train_time = time.time()
        train_time.append(end_train_time - start_time)
        
        # Evaluate on val set
        with torch.no_grad():
            for batches, (X, y) in enumerate(val_loader):
                X = X.to(DEVICE).float()
                y = y.to(DEVICE).float()

                S = X.shape[1]
                mask = create_look_ahead_mask(S)
                out, _ = transformer(X, mask) # .auto_regressor(X, mask, step)
                loss = loss_function(out, y.reshape(batch_size, -1)) # y.reshape(batch_size, -1)
                val_loss.append(loss.item())

            avg_val = np.mean(val_loss)
            avg_val_loss.append(avg_val)
        
        end_time = time.time()
        elapsed_time.append(end_time - start_time)

        print(f'- Training Loss: {avg_train}\n- Validation Loss: {avg_val}')
        print(f'- Train Time: {sum(train_time)}\n- Elapsed Time: {sum(elapsed_time)}\n')
        
        scheduler.step(avg_val)
        
        # save better model
        if avg_val < best_val_score:
            best_val_score = avg_val
            torch.save(transformer, f'fine_model_{city}.pt')
        
    return transformer, (avg_train_loss, avg_val_loss)

### City: Austin

In [14]:
# Hyperparameter
city = 'austin'
model_path = 'best_total.pt'
n_epochs = 20 # [50, 100]
learning_rate = 0.00002 # [0.001, 0.01] 0.002
factor = 0.5 # 0.1 ~ 0.99
patience = 1

austin_net, austin_losses = fine_tune(city, model_path, n_epochs, learning_rate, factor, patience)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch 101
Training & Validating ------------------------------------------------------------
- Training Loss: 15.270596969620255
- Validation Loss: 14.749081401682611
- Train Time: 21.919984340667725
- Elapsed Time: 22.444457530975342

Epoch 102
Training & Validating ------------------------------------------------------------
- Training Loss: 15.226474562558261
- Validation Loss: 14.652001633572935
- Train Time: 44.263585805892944
- Elapsed Time: 45.34610414505005

Epoch 103
Training & Validating ------------------------------------------------------------
- Training Loss: 15.193496205195908
- Validation Loss: 14.68318531050611
- Train Time: 68.13679528236389
- Elapsed Time: 69.73237752914429

Epoch 104
Training & Validating ------------------------------------------------------------
- Training Loss: 15.174199531492123
- Validation Loss: 14.653462335244933
- Train Time: 91.01125025749207
- Elapsed Time: 93.08648133277893

Epoch     4: reducing learning rate of group 0 to 1.0000e-05.


### City: Miami

In [None]:
# Hyperparameter
city = 'miami'
model_path = 'best_total.pt'
n_epochs = 20 # [50, 100]
learning_rate = 0.00002 # [0.001, 0.01] 0.002
factor = 0.5 # 0.1 ~ 0.99
patience = 1

miami_net, miami_losses = fine_tune(city, model_path, n_epochs, learning_rate, factor, patience)

### City: Pittsburgh

In [None]:
# Hyperparameter
city = 'pittsburgh'
model_path = 'best_total.pt'
n_epochs = 20 # [50, 100]
learning_rate = 0.00002 # [0.001, 0.01] 0.002
factor = 0.5 # 0.1 ~ 0.99
patience = 1

pitts_net, pitts_losses = fine_tune(city, model_path, n_epochs, learning_rate, factor, patience)

### City: Dearborn

In [None]:
# Hyperparameter
city = 'dearborn'
model_path = 'best_total.pt'
n_epochs = 20 # [50, 100]
learning_rate = 0.00002 # [0.001, 0.01] 0.002
factor = 0.5 # 0.1 ~ 0.99
patience = 1

dearborn_net, dearborn_losses = fine_tune(city, model_path, n_epochs, learning_rate, factor, patience)

### City: Washington D.C.

In [None]:
# Hyperparameter
city = 'washington-dc'
model_path = 'best_total.pt'
n_epochs = 20 # [50, 100]
learning_rate = 0.00002 # [0.001, 0.01] 0.002
factor = 0.5 # 0.1 ~ 0.99
patience = 1

wash_net, wash_losses = fine_tune(city, model_path, n_epochs, learning_rate, factor, patience)

### City: Palo Alto

In [22]:
# Hyperparameter
city = 'palo-alto'
model_path = 'best_total.pt'
n_epochs = 20 # [50, 100]
learning_rate = 0.00002 # [0.001, 0.01] 0.002
factor = 0.5 # 0.1 ~ 0.99
patience = 1

palo_net, palo_losses = fine_tune(city, model_path, n_epochs, learning_rate, factor, patience)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch 101
Training & Validating ----------------
- Training Loss: 15.139370829132261
- Validation Loss: 14.980401090673498
- Train Time: 5.84048867225647
- Elapsed Time: 5.968170166015625

Epoch 102
Training & Validating ----------------
- Training Loss: 14.961215251277391
- Validation Loss: 14.933151219342205
- Train Time: 12.256375074386597
- Elapsed Time: 12.510830879211426

Epoch 103
Training & Validating ----------------
- Training Loss: 14.877133031273454
- Validation Loss: 14.955894109365103
- Train Time: 17.986387491226196
- Elapsed Time: 18.37568211555481

Epoch 104
Training & Validating ----------------
- Training Loss: 14.809772420707725
- Validation Loss: 14.920741545187461
- Train Time: 23.75948929786682
- Elapsed Time: 24.282828092575073

Epoch 105
Training & Validating ----------------
- Training Loss: 14.754254384988847
- Validation Loss: 14.936514622456318
- Train Time: 29.39859366416931
- Elapsed Time: 30.052507638931274

Epoch 106
Training & Validating --------------

### Prediction

In [18]:
city = 'austin' 

test_dataset = ArgoverseDataset(city = city, split = 'test', transform=False, normalized=True)
test_loader = DataLoader(test_dataset, batch_size=32)

austin_array = make_pred(test_loader, 32, austin_net)
austin_array.shape

21


(6325, 120)

In [19]:
austin_df = pd.DataFrame(austin_array)
austin_df.to_csv('austin_tuned_0529.csv', index=False, header=False)

In [43]:
city = 'miami' 

test_dataset = total_test_dataset.datasets[1]
test_loader = DataLoader(test_dataset, batch_size=32)

miami_array = make_pred(test_loader, 32, total_net)
miami_array.shape

3


(7971, 120)

In [49]:
city = 'pittsburgh' 

test_dataset = total_test_dataset.datasets[2]
test_loader = DataLoader(test_dataset, batch_size=32)

pitts_array = make_pred(test_loader, 32, total_net)
pitts_array.shape

25


(6361, 120)

In [45]:
city = 'dearborn' 

test_dataset = total_test_dataset.datasets[3]
test_loader = DataLoader(test_dataset, batch_size=32)

dearborn_array = make_pred(test_loader, 32, total_net)
dearborn_array.shape

23


(3671, 120)

In [46]:
city = 'washington-dc' 

test_dataset = total_test_dataset.datasets[4]
test_loader = DataLoader(test_dataset, batch_size=32)

wash_array = make_pred(test_loader, 32, total_net)
wash_array.shape

21


(3829, 120)

In [47]:
city = 'palo-alto' 

test_dataset = total_test_dataset.datasets[5]
test_loader = DataLoader(test_dataset, batch_size=32)

palo_array = make_pred(test_loader, 32, total_net)
palo_array.shape

22


(1686, 120)

### Write File

In [51]:
import csv

cols = [['ID'] + ['v{}'.format(i) for i in range(120)]]

with open('output.csv', 'w+') as file:
    mywriter = csv.writer(file, delimiter=',')
    mywriter.writerows(cols)

with open('output.csv', 'a') as file:
    mywriter = csv.writer(file, delimiter=',')
    
    count = 0
    for i in range(len(austin_array)):
        temp = [np.append(['{}_austin'.format(i)], austin_array[i])]
        mywriter.writerows(temp)
        count += 1
    print(count)
    
    count = 0
    for i in range(len(miami_array)):
        temp = [np.append(['{}_miami'.format(i)], miami_array[i])]
        mywriter.writerows(temp)
        count += 1
    print(count)
    
    count = 0
    for i in range(len(pitts_array)):
        temp = [np.append(['{}_pittsburgh'.format(i)], pitts_array[i])]
        mywriter.writerows(temp)
        count += 1
    print(count)
    
    count = 0
    for i in range(len(dearborn_array)):
        temp = [np.append(['{}_dearborn'.format(i)], dearborn_array[i])]
        mywriter.writerows(temp)
        count += 1
    print(count)
    
    count = 0
    for i in range(len(wash_array)):
        temp = [np.append(['{}_washington-dc'.format(i)], wash_array[i])]
        mywriter.writerows(temp)
        count += 1
    print(count)
    
    count = 0
    for i in range(len(palo_array)):
        temp = [np.append(['{}_palo-alto'.format(i)], palo_array[i])]
        mywriter.writerows(temp)
        count += 1
    print(count)
    

6325
7971
6361
3671
3829
1686
