In [1]:
#!/usr/bin/env python3 

In [1]:
# Importing the necssary modules 
import folium 
from IPython.display import display, clear_output 
import numpy as np 
import time 

In [2]:
class MapEnv:
    def __init__(self, startLat, startLng, goalLat, goalLng, stepSize=0.0005):
        self.startLat = startLat
        self.startLng = startLng
        self.goalLat = goalLat
        self.goalLng = goalLng
        self.stepSize = stepSize
        self.agentPath = []  # store all positions
        self.reset()

    def reset(self):
        self.lat = self.startLat
        self.lng = self.startLng
        self.done = False
        self.agentPath = [(self.lat, self.lng)]
        return (self.lat, self.lng)

    def getDistanceToGoal(self):
        return np.sqrt((self.lat - self.goalLat)**2 + (self.lng - self.goalLng)**2)

    def step(self, action):
        if self.done:
            return (self.lat, self.lng), 0, self.done

        # Actions: 0=N, 1=S, 2=E, 3=W
        if action == 0:
            self.lat += self.stepSize
        elif action == 1:
            self.lat -= self.stepSize
        elif action == 2:
            self.lng += self.stepSize
        elif action == 3:
            self.lng -= self.stepSize

        self.agentPath.append((self.lat, self.lng))
        distance = self.getDistanceToGoal()
        reward = -distance
        self.done = distance < 0.0003

        return (self.lat, self.lng), reward, self.done

    def render(self):
        mapView = folium.Map(location=[self.lat, self.lng], zoom_start=18)

        # Start & Goal markers
        folium.Marker([self.startLat, self.startLng], popup='Start', icon=folium.Icon(color='blue')).add_to(mapView)
        folium.Marker([self.goalLat, self.goalLng], popup='Goal', icon=folium.Icon(color='green')).add_to(mapView)

        # Draw path
        folium.PolyLine(self.agentPath, color='red', weight=5, opacity=0.8).add_to(mapView)

        # Current position
        folium.CircleMarker(location=[self.lat, self.lng], radius=6, color='red', fill=True).add_to(mapView)

        clear_output(wait=True)
        display(mapView)
        time.sleep(0.5)

In [3]:
def randomAgentDemo():
    # Start and goal coordinates
    startLat, startLng = 6.5244, 3.3792
    goalLat, goalLng = 6.5254, 3.3805

    env = MapEnv(startLat, startLng, goalLat, goalLng)
    state = env.reset()
    env.render()

    step = 0
    done = False

    while not done:
        action = np.random.choice([0, 1, 2, 3])  # N, S, E, W
        nextState, reward, done = env.step(action)
        step += 1
        print(f"Step {step} | Action: {action} | State: {nextState} | Reward: {reward:.5f}")
        env.render()

    print("Agent reached the goal!")

In [4]:
#randomAgentDemo()

<h2> Second Part </h2

In [5]:
import numpy as np
import random
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt

# Deep Q-Network Agent
class DQNAgent:
    def __init__(self, stateSize, actionSize):
        self.stateSize = stateSize
        self.actionSize = actionSize
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95     # discount rate
        self.epsilon = 1.0    # exploration rate
        self.epsilonMin = 0.01
        self.epsilonDecay = 0.995
        self.learningRate = 0.001
        self.model = self._buildModel()

    def _buildModel(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(64, input_dim=self.stateSize, activation='relu'))
        model.add(tf.keras.layers.Dense(64, activation='relu'))
        model.add(tf.keras.layers.Dense(120, activation='relu')) 
        model.add(tf.keras.layers.Dense(self.actionSize, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learningRate))
        return model

    def remember(self, state, action, reward, nextState, done):
        self.memory.append((state, action, reward, nextState, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.actionSize)
        actValues = self.model.predict(state, verbose=0)
        return np.argmax(actValues[0])

    def replay(self, batchSize=32):
        minibatch = random.sample(self.memory, min(len(self.memory), batchSize))
        for state, action, reward, nextState, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(nextState, verbose=0)[0])
            targetQ = self.model.predict(state, verbose=0)
            targetQ[0][action] = target
            self.model.fit(state, targetQ, epochs=1, verbose=0)

        if self.epsilon > self.epsilonMin:
            self.epsilon *= self.epsilonDecay


2025-08-12 19:41:10.231292: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
def trainDeepRlAgent(episodes=300):
    startLat, startLng = 6.5244, 3.3792
    goalLat, goalLng = 6.5254, 3.3805

    env = MapEnv(startLat, startLng, goalLat, goalLng)
    stateSize = 4  # [lat, lng, goal_lat, goal_lng]
    actionSize = 4  # N, S, E, W

    agent = DQNAgent(stateSize, actionSize)
    scores = []

    for episode in range(episodes):
        currentPosition = env.reset()
        state = np.array([*currentPosition, goalLat, goalLng]).reshape(1, -1)
        totalReward = 0
        step = 0
        done = False

        while not done and step < 200:
            action = agent.act(state)
            nextPosition, reward, done = env.step(action)
            nextState = np.array([*nextPosition, goalLat, goalLng]).reshape(1, -1)

            agent.remember(state, action, reward, nextState, done)
            state = nextState
            totalReward += reward
            step += 1

        agent.replay()
        scores.append(totalReward)

        print(f"Episode {episode+1}/{episodes}, Steps: {step}, TotalReward: {totalReward:.4f}, Epsilon: {agent.epsilon:.2f}")

    return agent


In [7]:
def testAgent(agent):
    startLat, startLng = 6.5244, 3.3792
    goalLat, goalLng = 6.5254, 3.3805

    env = MapEnv(startLat, startLng, goalLat, goalLng)
    state = np.array([*env.reset(), goalLat, goalLng]).reshape(1, -1)
    done = False
    steps = 0

    env.render()

    while not done and steps < 200:
        action = agent.act(state)
        nextStateCoord, reward, done = env.step(action)
        state = np.array([*nextStateCoord, goalLat, goalLng]).reshape(1, -1)
        steps += 1
        print(f"Step {steps}: Action={action}, Reward={reward:.3f}")
        env.render()

    print("Agent reached the goal!" if done else "Agent failed to reach the goal.")


In [8]:
trainedAgent = trainDeepRlAgent(episodes=50)
testAgent(trainedAgent) 

Agent failed to reach the goal.


In [13]:
trainedAgent.model.save("model.h5")

In [9]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

# Environment Class
class MapEnv:
    def __init__(self, startLat, startLng, goalLat, goalLng, stepSize=0.0001):
        self.startLat = startLat
        self.startLng = startLng
        self.goalLat = goalLat
        self.goalLng = goalLng
        self.stepSize = stepSize
        self.reset()

    def reset(self):
        self.agentLat = self.startLat
        self.agentLng = self.startLng
        return self._getState()

    def _getState(self):
        # Normalize to center around 0
        return np.array([
            self.agentLat - self.goalLat,
            self.agentLng - self.goalLng
        ], dtype=np.float32)

    def step(self, action):
        # 0=N, 1=S, 2=E, 3=W
        if action == 0: self.agentLat += self.stepSize
        elif action == 1: self.agentLat -= self.stepSize
        elif action == 2: self.agentLng += self.stepSize
        elif action == 3: self.agentLng -= self.stepSize

        nextState = self._getState()
        dist = np.linalg.norm(nextState)
        done = dist < 0.0002
        reward = 10.0 if done else -dist * 10  # Reward closer steps

        return nextState, reward, done

    def render(self):
        print(f"Agent at: ({self.agentLat:.6f}, {self.agentLng:.6f})")

# DQN Agent
class DQNAgent:
    def __init__(self, stateSize, actionSize):
        self.stateSize = stateSize
        self.actionSize = actionSize
        self.memory = []
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilonMin = 0.01
        self.epsilonDecay = 0.995
        self.learningRate = 0.001
        self.model = self._buildModel()

    def _buildModel(self):
        model = Sequential()
        model.add(Dense(64, input_dim=self.stateSize, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.actionSize, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learningRate))
        return model

    def remember(self, state, action, reward, nextState, done):
        self.memory.append((state, action, reward, nextState, done))
        if len(self.memory) > 2000:
            self.memory.pop(0)

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.actionSize)
        qValues = self.model.predict(state[np.newaxis, :], verbose=0)
        return np.argmax(qValues[0])

    def replay(self, batchSize=32):
        if len(self.memory) < batchSize:
            return
        minibatch = random.sample(self.memory, batchSize)
        for state, action, reward, nextState, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(nextState[np.newaxis, :], verbose=0)[0])
            targetF = self.model.predict(state[np.newaxis, :], verbose=0)
            targetF[0][action] = target
            self.model.fit(state[np.newaxis, :], targetF, epochs=1, verbose=0)

        if self.epsilon > self.epsilonMin:
            self.epsilon *= self.epsilonDecay

# Train function
def trainAgent(episodes=300):
    startLat, startLng = 6.5244, 3.3792
    goalLat, goalLng = 6.5254, 3.3805
    env = MapEnv(startLat, startLng, goalLat, goalLng)
    agent = DQNAgent(stateSize=2, actionSize=4)

    for e in range(episodes):
        state = env.reset()
        totalReward = 0
        done = False
        step = 0

        while not done and step < 100:
            action = agent.act(state)
            nextState, reward, done = env.step(action)
            agent.remember(state, action, reward, nextState, done)
            state = nextState
            totalReward += reward
            step += 1

        agent.replay()
        print(f"Episode {e+1}: Reward={totalReward:.2f} Epsilon={agent.epsilon:.2f}")

    agent.model.save("geoNavigationModel.h5")
    return agent

# Test function
def testAgent(agent):
    env = MapEnv(6.5244, 3.3792, 6.5254, 3.3805)
    state = env.reset()
    done = False
    step = 0
    print("Starting Test...")

    while not done and step < 100:
        action = agent.act(state)
        nextState, reward, done = env.step(action)
        state = nextState
        env.render()
        step += 1

    print("✅ Goal reached!" if done else "❌ Failed to reach goal.")



In [10]:
# --- RUN THE AGENT ---
# trainedAgent = trainAgent(episodes=300)
trainedAgent = trainAgent(episodes=50) 
testAgent(trainedAgent)

Episode 1: Reward=-2.68 Epsilon=0.99
Episode 2: Reward=-1.10 Epsilon=0.99
Episode 3: Reward=-1.80 Epsilon=0.99
Episode 4: Reward=-1.60 Epsilon=0.98
Episode 5: Reward=-1.30 Epsilon=0.98
Episode 6: Reward=-1.06 Epsilon=0.97
Episode 7: Reward=-1.99 Epsilon=0.97
Episode 8: Reward=-2.77 Epsilon=0.96
Episode 9: Reward=-1.27 Epsilon=0.96
Episode 10: Reward=-1.86 Epsilon=0.95
Episode 11: Reward=-1.00 Epsilon=0.95
Episode 12: Reward=-1.37 Epsilon=0.94
Episode 13: Reward=-1.35 Epsilon=0.94
Episode 14: Reward=-1.19 Epsilon=0.93
Episode 15: Reward=-2.19 Epsilon=0.93
Episode 16: Reward=-1.21 Epsilon=0.92
Episode 17: Reward=-1.43 Epsilon=0.92
Episode 18: Reward=-1.57 Epsilon=0.91
Episode 19: Reward=-1.66 Epsilon=0.91
Episode 20: Reward=-1.02 Epsilon=0.90
Episode 21: Reward=-2.53 Epsilon=0.90
Episode 22: Reward=-1.77 Epsilon=0.90
Episode 23: Reward=-1.74 Epsilon=0.89
Episode 24: Reward=-2.81 Epsilon=0.89
Episode 25: Reward=-1.44 Epsilon=0.88
Episode 26: Reward=-1.04 Epsilon=0.88
Episode 27: Reward=-2