In [2]:
# import
import csv
import math
import random
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

from sklearn.neighbors import NearestNeighbors

import torch
import torch.nn as nn
import torch.nn.functional as F

%matplotlib inline

random.seed

<bound method Random.seed of <random.Random object at 0x10203ac18>>

In [None]:
def IoW(gt, coor, radius_gt, radius):
    ### IoW cross section calculation
    cross_list_0 = [(gt[0]-radius_gt, 0), (gt[0]+radius_gt, 0)]
    cross_list_1 = [(gt[1]-radius_gt, 0), (gt[1]+radius_gt, 0)]
    
    cross_list_0.append((coor[0]-radius, 1))
    cross_list_0.append((coor[0]+radius, 1))
    cross_list_1.append((coor[1]-radius, 1))
    cross_list_1.append((coor[1]+radius, 1))
                        
    cross_list_0.sort(key = lambda x : x[0])
    cross_list_1.sort(key = lambda x : x[0])

    if (cross_list_0[0][1] != cross_list_0[1][1] and cross_list_1[0][1] != cross_list_1[1][1]):
        return (cross_list_0[2][0] - cross_list_0[1][0]) * (cross_list_1[2][0] - cross_list_1[1][0]) / ((radius*2)**2)
    else:
        return 0.
    
    
def distance_progress(coor_gt, coor_cur, coor_next):
    dis_cur = math.sqrt((coor_gt[0]-coor_cur[0])**2 + (coor_gt[1]-coor_cur[1])**2)
    dis_next = math.sqrt((coor_gt[0]-coor_next[0])**2 + (coor_gt[1]-coor_next[1])**2)
    return dis_cur - dis_next
    
    
class Net(nn.Module):
    def __init__(self, n_states, n_actions, n_hidden):
        super(Net, self).__init__()

        # 輸入層 (state) 到隱藏層，隱藏層到輸出層 (action)
        self.fc1 = nn.Linear(n_states, n_hidden)
        self.fc2 = nn.Linear(n_hidden, n_hidden)
        self.out = nn.Linear(n_hidden, n_actions)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x) # ReLU # activation
        x = self.fc2(x)
        x = F.relu(x)
        actions_value = self.out(x)
        return actions_value


class DQN(object):
    def __init__(self, n_states, n_actions, n_hidden, batch_size, lr, gamma, target_replace_iter, memory_capacity):
        self.eval_net, self.target_net = Net(n_states, n_actions, n_hidden), Net(n_states, n_actions, n_hidden)

        self.memory = np.zeros((memory_capacity, n_states * 2 + 2)) # 每個 memory 中的 experience 大小為 (state + next state + reward + action)
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=lr)
        self.loss_func = nn.MSELoss()
        self.memory_counter = 0
        self.learn_step_counter = 0 # 讓 target network 知道什麼時候要更新

        self.n_states = n_states
        self.n_actions = n_actions
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        self.lr = lr
        self.gamma = gamma
        self.target_replace_iter = target_replace_iter
        self.memory_capacity = memory_capacity

    def choose_action(self, state, epsilon):
        x = torch.unsqueeze(torch.FloatTensor(state), 0)

        # epsilon-greedy
        if (np.random.uniform() < epsilon):
            action = np.random.randint(0, self.n_actions)
        else: 
            actions_value = self.eval_net(x) # 以現有 eval net 得出各個 action 的分數
            action = torch.max(actions_value, 1)[1].data.numpy()[0] # 挑選最高分的 action

        return action
    

    def store_transition(self, state, action, reward, next_state):
        # 打包 experience
        transition = np.hstack((state, [action, reward], next_state))

        # 存進 memory；舊 memory 可能會被覆蓋
        index = self.memory_counter % self.memory_capacity
        self.memory[index, :] = transition
        self.memory_counter += 1
        
    def BP(self, state, next_state, reward):
        q_eval = self.eval_net(state)
        q_next = self.target_net(next_state).detach()
        q_target = reward + self.gamma * q_next
        
        
    def learn(self):
        # 隨機取樣 batch_size 個 experience
        sample_index = np.random.choice(self.memory_capacity, self.batch_size)
        b_memory = self.memory[sample_index, :]
        b_state = torch.FloatTensor(b_memory[:, :self.n_states])
        b_action = torch.LongTensor(b_memory[:, self.n_states:self.n_states+1].astype(int))
        b_reward = torch.FloatTensor(b_memory[:, self.n_states+1:self.n_states+2])
        b_next_state = torch.FloatTensor(b_memory[:, -self.n_states:])

        # 計算現有 eval net 和 target net 得出 Q value 的落差
        q_eval = self.eval_net(b_state).gather(1, b_action) # 重新計算這些 experience 當下 eval net 所得出的 Q value
        q_next = self.target_net(b_next_state).detach() # detach 才不會訓練到 target net
        q_target = b_reward + self.gamma * q_next.max(1)[0].view(self.batch_size, 1) # 計算這些 experience 當下 target net 所得出的 Q value
        loss = self.loss_func(q_eval, q_target)

        # Backpropagation
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # 每隔一段時間 (target_replace_iter), 更新 target net，即複製 eval net 到 target net
        self.learn_step_counter += 1
        if self.learn_step_counter % self.target_replace_iter == 0:
            self.target_net.load_state_dict(self.eval_net.state_dict())

In [3]:
# for floor in range(0,5):
#     for building in range(0,3):
for floor in range(2,3):
    for building in range(0,1):
        
        ## Count the number of data points in building id & floor id
        data_num = 0
        with open("1478167720_9233432_trainingData.csv", newline='') as csvfile:
            spamreader = csv.reader(csvfile, delimiter=',')
            for row in spamreader:
                if (row[523] == 'BUILDINGID'):
                    continue
                elif (int(row[523]) is not building or int(row[522]) is not floor):
                    continue
                data_num += 1
        print(data_num)
        ## if there are no data, continue to next floor 
        if (data_num == 0):
            continue
            
        ## Load data points in
        wifi_loc_time = np.zeros(shape = (data_num, 524))
        i=-1
        with open("1478167720_9233432_trainingData.csv", newline='') as csvfile:
            spamreader = csv.reader(csvfile, delimiter=',')
            for row in spamreader:
                if (row[523] == 'BUILDINGID'):
                    continue
                elif (int(row[523]) is not building or int(row[522]) is not floor):
                    continue
                i = i+1
                if (i > data_num):
                    break
                # wifi
                wifi_loc_time[i-1][:520] = np.array(row[:520])
                # location x, y
                wifi_loc_time[i-1][520:522] = np.array(row[520:522])
                # userID
                wifi_loc_time[i-1][522] = np.array(row[526])
                # time stamp
                wifi_loc_time[i-1][-1] = np.array(row[-1])
        
        ## Sort by time stamp
        wifi_loc_time = wifi_loc_time[wifi_loc_time[:,-1].argsort()]
        
        ## Map boundaries
        longitude_list = np.array([max(wifi_loc_time[:, 520]), (max(wifi_loc_time[:, 520])-min(wifi_loc_time[:, 520]))/grid_slice\
                                   , min(wifi_loc_time[:, 520])])
        latitude_list = np.array([max(wifi_loc_time[:, 521]), (max(wifi_loc_time[:, 521])-min(wifi_loc_time[:, 521]))/grid_slice\
                                   , min(wifi_loc_time[:, 521])])
        
        ## KNN initial calculation
        nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(wifi_loc_time[:,:520])
        distances, indices = nbrs.kneighbors(wifi_loc_time[:,:520])

        ## DQN's hyper para
        n_actions = 5
        # state: RSSI (520), coordinate (2), radius (1), history (50)
        n_states = 520 + 2 + 1 + 50 
        n_hidden = 512
        batch_size = 100
        gamma = 0.1 # reward discount factor
        target_replace_iter = 100
        memory_capacity = 2000
        n_episodes = 10000
        lr = 0.7
        eps = 1.
        max_search_steps = 10
        
        radius_gt = 0.5
        
        dqn = DQN(n_states, n_actions, n_hidden, batch_size, lr, gamma, target_replace_iter, memory_capacity)

        ## DQN training
        for k in range(n_episodes):
            
            for i in range(len(wifi_loc_time)):
                # some important variables used for training
                Rewards = 0
                Goal = False
                alpha = 0.7
                delta = 0.7
                next_coordinate = np.array([0, 0])
                next_radius = 0
                
                ## 1. KNN locates initial coordinates and radius
                Hx = 0.
                Hy = 0.
                for m in range(3):
                    Hx += wifi_loc_time[indices[i, m], 520]
                    Hy += wifi_loc_time[indices[i, m], 521]
                Hx /= 3.
                Hy /= 3.
                coordinate = np.array([Hx, Hy])
                radius = 10.
                # initial history, 5n vector
                history = np.zeros(shape=(5*max_search_steps,), dtype=int)

                
                ## 2. Check initial KNN IoW
                while True:
                    IoW = IoW(wifi_loc_time[i, 520:522], coordinate, radius_gt, radius)
                    if (IoW == 0):
                        radius *= 1.5
                    elif (IoW > delta):
                        print("Precise location!")
                        Goal = True
                        break
                    else:
                        print("Start to searching!")
                        break
                # initial state: RSSI (520), coordinate (2), radius (1), history (50)
                state = np.concatenate((wifi_loc_time[i, :520], coordinate, np.array([radius]), history), axis=0)
                
                    
                ## 3. Searching starts
                for t in range(max_search_steps):
                    ## Plot gt region
                    if (k % 500 == 0 and i == 0 and j == 0 ):
                        fig = plt.figure()
                        ax = fig.add_subplot(111)
                        plt.xlim(longitude_list[0], longitude_list[2])
                        plt.ylim(latitude_list[0], latitude_list[2])
                        rect0 = plt.Rectangle((wifi_loc_timefi_loc_time[i,520]-radius_gt, wifi_loc_time[i,521]-radius_gt), 2*radius_gt, 2*radius_gt, alpha=0.9)
                    while True:
                        ### (1) select an action
                        action = dqn.choose_action(state, eps)
                        ### (1) - 1. New Center
                        ### 0 -> "Up Left"
                        ### 1 -> "Up Right"
                        ### 2 -> "Down Left"
                        ### 3 -> "Down Right"
                        ### 4 -> "Center"
                        if (action == 0):
                            next_coordinate[0] -= radius/2.
                            next_coordinate[1] += radius/2.
                        elif (action == 1):
                            next_coordinate[0] += radius/2.
                            next_coordinate[1] -= radius/2.
                        elif (action == 2):
                            next_coordinate[0] -= radius/2.
                            next_coordinate[1] -= radius/2.
                        elif (action == 3):
                            next_coordinate[0] += radius/2.
                            next_coordinate[1] -= radius/2.
                        else:
                            next_coordinate = coordinate
                        ### (1) - 2. New radius
                        next_radius = radius * alpha
                        ### (1) - 3. New IoW
                        next_IoW = IoW(wifi_loc_time[i, 520:522], next_coordinate, radius_gt, next_radius)
                        
                        if (next_IoW > delta):
                            print("Precise location!")
                            Goal = True
                            break
                        elif (next_IoW > IoW):
                            print("continue next round of searching")
                            # close score
                            reward = distances_progress(wifi_loc_time[i, 520:522], coordinate, next_coordinate)
                            IoW = next_IoW
                            radius = next_radius
                            coordinate = next_coordinate
                            one_hot = t*5 + action
                            history[one_hot] = 1
                            next_state = np.concatenate((wifi_loc_time[i,:520], coordinate, np.array([radius]), history), axis=0)
                            dqn.store_transition(state, action, reward, next_state)
                            Rewards += reward
                            break
                        else:
                            reward = distances_progress(wifi_loc_time[i, 520:522], coordinate, next_coordinate)
                            Rewards += reward
                            # back_propagation
                            
                            print("Repeat this round")
                        
                            
                            
                    
                        
                        
                        

1443
-7645.519433815032 -7653.343301445246 4864921.386717163 4864920.963251494
-7646.986467147867 -7653.343301445246 4864924.647117165 4864920.963251494
-7643.859667148441 -7653.343301445246 4864913.9872171655 4864920.963251494
-7646.986467147867 -7653.343301445246 4864924.647117165 4864920.963251494
-7646.2360004807515 -7653.343301445246 4864923.886517164 4864920.963251494
-7646.1416671487195 -7653.343301445246 4864917.367250499 4864920.963251494
-7653.361433815211 -7653.343301445246 4864928.570250499 4864920.963251494
-7654.391367630412 -7653.343301445246 4864926.672000997 4864920.963251494
-7643.359967148553 -7653.343301445246 4864913.853183831 4864920.963251494
-7647.888167147835 -7653.343301445246 4864927.792883831 4864920.963251494
-7634.05126666526 -7633.236699998379 4864965.525299999 4864965.063299999
-7632.350533332676 -7633.236699998379 4864969.511266666 4864965.063299999
-7638.33459999909 -7633.236699998379 4864957.644233334 4864965.063299999
-7633.236699998379 -7633.2366999

-7595.05896666646 -7594.264100000262 4864982.755633335 4864982.456100002
-7590.142933332671 -7594.264100000262 4864984.692066667 4864982.456100002
-7597.361166667193 -7594.264100000262 4864981.5684 4864982.456100002
-7590.284433332582 -7587.102299999446 4864985.178899999 4864986.090999998
-7597.029899999499 -7587.102299999446 4864982.718033333 4864986.090999998
-7587.102299999446 -7587.102299999446 4864986.090999998 4864986.090999998
-7587.102299999446 -7587.102299999446 4864986.090999998 4864986.090999998
-7587.755666665733 -7587.102299999446 4864985.903699999 4864986.090999998
-7597.656099999945 -7587.102299999446 4864982.538466667 4864986.090999998
-7590.300566665828 -7587.102299999446 4864985.174166667 4864986.090999998
-7597.029899999499 -7587.102299999446 4864982.718033333 4864986.090999998
-7587.102299999446 -7587.102299999446 4864986.090999998 4864986.090999998
-7598.081233332555 -7589.062399998307 4864981.889366667 4864985.529100001
-7587.755666665733 -7589.062399998307 486498

-7639.63830000038 -7637.5564999990165 4864911.510200001 4864907.466200002
-7639.63830000038 -7637.5564999990165 4864911.510200001 4864907.466200002
-7637.245199999462 -7637.5564999990165 4864904.784000002 4864907.466200002
-7640.706266666452 -7637.5564999990165 4864915.000666668 4864907.466200002
-7638.979499999434 -7637.5564999990165 4864912.4306 4864907.466200002
-7637.426866667966 -7637.002300001681 4864905.41726667 4864905.532300003
-7637.300300000857 -7637.002300001681 4864903.379700002 4864905.532300003
-7637.912966667364 -7637.002300001681 4864908.709533334 4864905.532300003
-7640.698866666605 -7637.002300001681 4864914.9750000015 4864905.532300003
-7638.101366667698 -7637.002300001681 4864906.1613666685 4864905.532300003
-7638.500433333218 -7637.002300001681 4864909.1630666675 4864905.532300003
-7637.300300000857 -7637.002300001681 4864903.379700002 4864905.532300003
-7637.300300000857 -7637.002300001681 4864903.379700002 4864905.532300003
-7638.32310000062 -7637.002300001681 4

-7683.412799999118 -7683.855900000781 4864931.012100001 4864930.357900001
-7678.623733333002 -7683.855900000781 4864932.384866667 4864930.357900001
-7683.265399999917 -7683.855900000781 4864930.527066668 4864930.357900001
-7683.855900000781 -7683.855900000781 4864930.357900001 4864930.357900001
-7688.235899999738 -7683.855900000781 4864929.1023 4864930.357900001
-7683.265399999917 -7683.855900000781 4864930.527066668 4864930.357900001
-7683.388866666704 -7683.855900000781 4864931.018999998 4864930.357900001
-7676.618933333705 -7683.855900000781 4864932.394500002 4864930.357900001
-7674.117166666935 -7683.855900000781 4864934.203833334 4864930.357900001
-7678.986533333858 -7683.855900000781 4864932.280866668 4864930.357900001
-7680.584733332197 -7682.084399998188 4864931.822700001 4864930.865400001
-7675.434833332896 -7682.084399998188 4864933.826133333 4864930.865400001
-7680.451599999021 -7682.084399998188 4864932.3882 4864930.865400001
-7683.412799999118 -7682.084399998188 4864931.01

-7676.643599998206 -7676.643599998206 4864934.006999999 4864934.006999999
-7676.643599998206 -7676.643599998206 4864934.006999999 4864934.006999999
-7676.643599998206 -7676.643599998206 4864934.006999999 4864934.006999999
-7676.643599998206 -7676.643599998206 4864934.006999999 4864934.006999999
-7676.643599998206 -7676.643599998206 4864934.006999999 4864934.006999999
-7676.643599998206 -7676.643599998206 4864934.006999999 4864934.006999999
-7678.615433331579 -7676.643599998206 4864933.441866666 4864934.006999999
-7678.615433331579 -7676.643599998206 4864933.441866666 4864934.006999999
-7678.615433331579 -7682.559099998325 4864933.441866666 4864932.3116
-7682.559099998325 -7682.559099998325 4864932.3116 4864932.3116
-7682.559099998325 -7682.559099998325 4864932.3116 4864932.3116
-7682.559099998325 -7682.559099998325 4864932.3116 4864932.3116
-7682.559099998325 -7682.559099998325 4864932.3116 4864932.3116
-7682.559099998325 -7682.559099998325 4864932.3116 4864932.3116
-7682.559099998325 

-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7589.062399998307 -7589.062399998307 4864985.529100001 4864985.529100001
-7590.953933332115 -7594.736999999732 4864984.986866668 4864983.902400002
-7594.736999999732 -7594.736999999732 4864983.902400002 4864983.902400002
-7594.736999999732 -7594.736999999732 4864983.902400002 4864983.902400002
-7594.736999999732 -7594.736999999732 4864983.902400002 4864983.902400002
-7594.736999999732 -7594.736999999732 

-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7631.815499998629 -7631.815499998629 4864988.132799998 4864988.132799998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7632.1528999991715 -7632.1528999991715 4864981.863899998 4864981.863899998
-7631.815499998629 -7631.815499998629 4864988.132799998 4864988.132799998
-7622.0546000003815 -7622.0546000003815 4864976.294200003 4864976.294200003
-7613.437466667344 -7622.0546000003815 4864978.16286667 4864976.294200003
-7622.054600000381