In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
FLOOR = 2
#Change below to training, testing dataset 
TRAIN_SET = ""
TEST_SET = "" 

#Change below to directory for saving model 
MODEL_DIR = ""

In [None]:
import torch
import pandas as pd 
import torchvision
from torchvision import datasets, transforms
from torch import nn,optim
import torch.nn.functional as F
import numpy as np
import math
from sklearn.model_selection import train_test_split

## Helper functions


In [None]:
def convert_data_to_tensor(np_data,device):
    return torch.tensor(np_data,dtype=torch.float32).to(device)

def create_data_loader(np_input, np_label, batch_size, shuffle=False):
    device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')

    input = Variable(convert_data_to_tensor(np_input,device))
    label = Variable(convert_data_to_tensor(np_label,device))

    if torch.cuda.is_available():
        input = input.cuda()
        label = label.cuda()

    data_loader = DataLoader(dataset=TensorDataset(input,label),
                             batch_size=batch_size,
                             shuffle=shuffle)
    return data_loader


def scale_data(rss_df):
    if not('pandas' in str(type(rss_df))):
        print("ERROR: Please Input Data in pandas.DataFrame")
        return -1

    new_rss = rss_df.copy()
    new_rss[new_rss == 100] = -110
    new_rss[new_rss == -100] = -110

    new_rss /= 110
    new_rss += 1
    return new_rss.values

def scale_xy(lon_lat, mean, std):
    return (lon_lat - mean) / std

def rescale_xy(scale_lon_lat, mean, std):
    scale_lon_lat = np.array(scale_lon_lat)
    lon = np.transpose(np.array([scale_lon_lat[:, 0]]))
    # lon = output[0]
    lon_rescaled = lon * std[0] + mean[0]

    lat = np.transpose(np.array([scale_lon_lat[:, 1]]))
    # lat = output[1]
    lat_rescaled = lat * std[1] + mean[1]
    return np.array([lon_rescaled, lat_rescaled]).transpose().reshape(-1,2)


def save_result (df_pred, df_true, data_name, case_name, save_path):
    # % SAVE RESULT
    df_result = df_pred.copy()

    df_result.join(df_true)
    file_path = join(save_path, "pred_{}_{}.csv".format(data_name,case_name))
    df_result.to_csv(file_path,header=True)


    # % Evaluate the Result
    eval_euclidean_distance(
        df_pred = df_pred,
        df_truth = df_true,
        dataset_name = data_name,
        case_name = case_name,
        save_path = save_path,
        run_time=-1,
        file_name="Eval_Results.csv",
        saveFlag=True
    )

    # Plot - Result
    Plot2Dlocation(df_true.values, [[case_name, df_pred.values]], save_path, data_name, case_name,
                   connectDots=True, saveFlag=True)

    print("-- [complete] save pred_{}_{}.csv".format(data_name,case_name))
#end save_result

## Dataset 

### **Training dataset**

In [None]:
data = pd.read_csv(TRAIN_SET)
#data = pd.read_csv('/content/drive/MyDrive/FYP/NG/Combined_collections_training_EPSG3414.csv')
#data = pd.read_csv('file:/root/bryan/NG/csv-files/filter-train.csv')
#label = data[["lon_3414","lat_3414"]]
#print(label)
label = data[["LONGITUDE","LATITUDE"]]

#data = data.drop(columns=['type','timestamp','longitude','latitude','floor_id'])
data = data.drop(columns=['TYPE','TIMESTAMP','LONGITUDE','LATITUDE','FLOOR_ID','FLOOR'])
#print(data)

X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.2, random_state=42)




#Normalize training input data
X_train = scale_data(X_train)

#Normalize training label: 
mean = np.mean(y_train.values, axis=0)
std = np.std(y_train.values, axis=0)
label = scale_xy(y_train.values, mean, std)



#convert df to tensor object
data = torch.tensor(X_train)

label_t = torch.tensor(label)

#flatten 
data_t = torch.flatten(data,1)

label_t = torch.flatten(label_t,1)
print("data shape: " + str(data_t.shape))
print("label shape: " + str(label_t.shape))


train_data = [] 
for i in range(len(data_t)):
    train_data.append([data_t[i],label_t[i]])





data shape: torch.Size([13077, 345])
label shape: torch.Size([13077, 2])


### Validation dataset

In [None]:
# data_val = pd.read_csv('file:/root/bryan/NG/csv-files/rfe-test.csv')
# label_val=data_val[["lon_3414","lat_3414"]]
# data_val=data_val.drop(columns=['type','timestamp','latitude','longitude','lon_3414','lat_3414','floor_id'])


#Normalize test input data 
X_test = scale_data(X_test) 

#Normalize training label
mean_val = np.mean(y_test.values, axis=0)
std_val = np.std(y_test.values, axis=0)
label_val = scale_xy(y_test.values, mean_val, std_val)




#convert df to tensor object
data_val_t1 = torch.tensor(X_test)

label_val_t = torch.tensor(label_val)

#flatten 
data_val_t = torch.flatten(data_val_t1,1)

label_val_t = torch.flatten(label_val_t,1)
print("data shape: " + str(data_val_t.shape))
print("label shape: " + str(label_val_t.shape))

val_data = [] 
for i in range(len(data_val_t)):
    val_data.append([data_val_t[i],label_val_t[i]])

data shape: torch.Size([3270, 345])
label shape: torch.Size([3270, 2])


### Put into train/test loader

In [None]:
#put both train and validation sets into loader 
trainloader = torch.utils.data.DataLoader(train_data, batch_size=32)
testloader = torch.utils.data.DataLoader(val_data, batch_size=32)

print("Finish dataset part")

Finish dataset part


## Model 

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(data_val_t.shape[1],300)
        self.fc2 = nn.Linear(300,300)
        self.fc3 = nn.Linear(300,300)
        self.fc4 = nn.Linear(300,2)

        
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self,x):
        x = x.view(x.shape[0],-1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = self.fc4(x)
        
        return x

    


In [None]:
def weight_reset(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        m.reset_parameters()



In [None]:
LR = 0.0001


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Computation device: {device}\n")
model = Classifier()
model.apply(weight_reset)
model.to(device)
#optimizer = optim.Adam(model.parameters(), lr= LR, eps = 1e-9) 
optimizer = optim.Adam(model.parameters(), lr= LR) 

#optimizer = optim.SGD(model.parameters(), lr = 0.001, weight_decay=0.03, momentum=0.003)
#optimizer = optim.SGD(model.parameters(), lr = LR)
criterion = nn.MSELoss() #MSE
#criterion = torch.nn.L1Loss()

epochs = 500
train_losses, test_losses = [],[]

for e in range(epochs):
    tot_train_loss = 0 
    for data, labels in trainloader:
        data, labels = data.to(device), labels.to(device)
        data = data.view(data.shape[0],-1)
        data = data.float()
        optimizer.zero_grad()
        output = model(data)
        #print(output)
        #output = ((output - torch.mean(output))/torch.max(output)-torch.min(output))
        loss = torch.sqrt(criterion(output,labels.float())) #RMSE 
        #loss = criterion(output,labels.float())
        tot_train_loss+=loss.item()
        
        loss.backward()
        optimizer.step()
        


    else:
        #if e>=100 and e%5==0 :
            #torch.save(model.state_dict(), './outputs/dec_control/dec-epoch-{}.pth'.format(e))
        tot_test_loss = 0 
        with torch.no_grad():
            for data, labels in testloader:
                data, labels = data.to(device), labels.to(device)
                data = data.view(data.shape[0],-1)
                data=data.float()
                test_output = model(data)
                loss = torch.sqrt(criterion(test_output,labels.float()))
                #loss = criterion(test_output,labels.float())

                tot_test_loss+=loss.item()
                
        train_loss = tot_train_loss/len(trainloader.dataset)
        test_loss = tot_test_loss/len(trainloader.dataset)
        train_losses.append(train_loss)
        test_losses.append(test_loss)

        print("Epoch:{}/{}..".format(e+1,epochs),
             "Training Loss:{:.3f}..".format(train_loss),
             "Test Loss:{:.3f}..".format(test_loss))


Computation device: cpu

Epoch:1/500.. Training Loss:0.016.. Test Loss:0.002..
Epoch:2/500.. Training Loss:0.009.. Test Loss:0.002..
Epoch:3/500.. Training Loss:0.008.. Test Loss:0.002..
Epoch:4/500.. Training Loss:0.007.. Test Loss:0.002..
Epoch:5/500.. Training Loss:0.007.. Test Loss:0.002..
Epoch:6/500.. Training Loss:0.007.. Test Loss:0.002..
Epoch:7/500.. Training Loss:0.006.. Test Loss:0.002..
Epoch:8/500.. Training Loss:0.006.. Test Loss:0.002..
Epoch:9/500.. Training Loss:0.006.. Test Loss:0.002..
Epoch:10/500.. Training Loss:0.006.. Test Loss:0.002..
Epoch:11/500.. Training Loss:0.006.. Test Loss:0.002..
Epoch:12/500.. Training Loss:0.006.. Test Loss:0.002..
Epoch:13/500.. Training Loss:0.006.. Test Loss:0.002..
Epoch:14/500.. Training Loss:0.006.. Test Loss:0.001..
Epoch:15/500.. Training Loss:0.006.. Test Loss:0.001..
Epoch:16/500.. Training Loss:0.006.. Test Loss:0.001..
Epoch:17/500.. Training Loss:0.005.. Test Loss:0.002..
Epoch:18/500.. Training Loss:0.005.. Test Loss:0.

KeyboardInterrupt: ignored

In [None]:
import matplotlib.pyplot as plt


# loss plots
plt.figure(figsize=(16,9))
# fig, (ax1, ax2) = plt.subplots(1, 2)
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(15,5))

ax1.plot(train_losses, color='orange', label='train loss',)
plt.xlabel('Epochs')
plt.ylabel('Loss')
ax1.legend()

# loss plots
ax2.plot(test_losses, color='red', label='validation loss')
ax2.legend()
# plt.show()

## Evaluate on test set 

In [None]:
#validation_loss, validation_pred = evaluate(data_val_t, label_val_t, model, criterion, mean, std)

data_val_new = pd.read_csv(TEST_SET)
# data_val_new = pd.read_csv('/content/drive/MyDrive/FYP/NG/Combined_collections_validation_EPSG3414.csv')
#data_val_new = pd.read_csv('file:/root/bryan/NG/csv-files/filter-test.csv')

# label_val_new = data_val_new[["lon_3414","lat_3414"]]
# data_val_new = data_val_new.drop(columns=['type','timestamp','longitude','latitude','floor_id'])


label_val_new=data_val_new[["LONGITUDE","LATITUDE"]]
data_val_new=data_val_new.drop(columns=['TYPE','TIMESTAMP','LONGITUDE','LATITUDE','FLOOR_ID','FLOOR'])


def evaluate(test_input, test_label, model, loss_fn, xy_mean, xy_std):
    model.eval()
    pred = model(convert_data_to_tensor(scale_data(test_input), device))
    val_loss = loss_fn(
        convert_data_to_tensor(rescale_xy(pred.data.cpu().numpy(), xy_mean, xy_std), device),
        convert_data_to_tensor(test_label, device)).data
    return val_loss.item(), rescale_xy(pred.data.cpu().numpy(), xy_mean, xy_std)

validation_loss, validation_pred = evaluate(data_val_new,label_val_new.values, model, criterion, mean, std)


In [None]:
print(mean)
print(std)

[30032.69349262 30317.32534712]
[22.18997454 36.97402449]


In [None]:
print("Validation loss: " + str('{0:.2f}'.format(validation_loss)))

In [None]:
GRAPH_COLOUR = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
MARKER = ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd']

plt.figure(figsize=(10, 7))
x,y = label_val_new.values.T
plt.scatter(x,y)
plt.show()

In [None]:
plt.figure(figsize=(10, 7))
x,y = validation_pred.T
plt.scatter(x,y)
plt.show()

In [None]:
dist_list = [] 
for i in range(len(validation_pred)):
    dist = np.linalg.norm(validation_pred[i]-label_val_new.values[i])
    dist_list.append(dist)
    
print("Validation loss: " + str('{0:.2f}'.format(validation_loss)))
print("Max: " + str('{0:.2f}'.format(max(dist_list))) + " m")
print("Min: " + str('{0:.2f}'.format(min(dist_list))) + " m")
print("Average: " + str('{0:.2f}'.format(np.average(dist_list))) + " m")



In [None]:
def Plot2Dlocation(np_xy_truth, list_xy_preds,result_path, dataset_name, plot_name, connectDots=True,saveFlag=False):
    """
    Plot the 2D map with the ground truth and prediction results (list_xy_preds could hold 1 set of results or more)
    :param main_info (HouseKeeping):
    :param np_xy_truth (Numpy):
    :param list_xy_preds (List<String,2d_np>): e.g. list_xy_preds=[["Euclidean",df_pred.values]]
    :param dataset_name (String):
    :param plot_name (String):
    :param connectDots (Bool):
    :param saveFlag (Bool):
    :return:
    """
    plt.clf()   #clear the plot
    fig, ax = plt.subplots(figsize=(16, 10))
    #Figure Info
    plt.title('{}\n({})'.format(plot_name,dataset_name))
    #plt.title(dataset_name,y=1)
    #plt.suptitle(plot_name, y=1.1, fontsize=18)
    #plt.title(dataset_name, fontsize=10)
    plt.xlabel('x')
    plt.ylabel('y')

    #plot groundtruth
    plt.plot(np_xy_truth[:,0], np_xy_truth[:,1], GRAPH_COLOUR[0]+MARKER[0],label="Ground Truth")

    #connect the points
    for caseNum in range(len(list_xy_preds)):
        #plot pred point (scatter)
        #plt.plot(list_xy_preds[caseNum][1][:, 0], list_xy_preds[caseNum][1][:, 1], GRAPH_COLOUR[caseNum+1] + MARKER[caseNum+1],label=list_xy_preds[caseNum][0])
        plt.scatter(list_xy_preds[caseNum][1][:, 0], list_xy_preds[caseNum][1][:, 1],c=GRAPH_COLOUR[caseNum+1],s=5,label=list_xy_preds[caseNum][0])

        if (connectDots):
            x = np.array([np_xy_truth[:, 0], list_xy_preds[caseNum][1][:, 0]])
            y = np.array([np_xy_truth[:, 1], list_xy_preds[caseNum][1][:, 1]])
            plt.plot(x, y, GRAPH_COLOUR[caseNum + 1], alpha=0.2)

    plt.legend()
    if saveFlag:
        saveName = join(result_path,dataset_name+"_"+plot_name+".png")
        plt.savefig(saveName)
    # plt.show()


    #endFunction Plot2Dlocation

In [None]:
temp_pred = [] 
for i in range(len(validation_pred)):
  temp_pred.append(validation_pred[i])
df_pred = pd.DataFrame(temp_pred)

In [None]:
print((label_val_new))

In [None]:
Plot2Dlocation(label_val_new.values, [["Euclidean",df_pred.values]],"test","National Gallery","All floors combined")

In [None]:
torch.save(model.state_dict(), MODEL_DIR)