In [1]:
!pip install googlemaps pandas


Collecting googlemaps
  Downloading googlemaps-4.10.0.tar.gz (33 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: googlemaps
  Building wheel for googlemaps (setup.py) ... [?25l[?25hdone
  Created wheel for googlemaps: filename=googlemaps-4.10.0-py3-none-any.whl size=40715 sha256=eca711806e088c8816dea5e6ef9272594bb59929aceee468578f0f44e6761b22
  Stored in directory: /root/.cache/pip/wheels/17/f8/79/999d5d37118fd35d7219ef57933eb9d09886c4c4503a800f84
Successfully built googlemaps
Installing collected packages: googlemaps
Successfully installed googlemaps-4.10.0


In [21]:
import googlemaps
import pandas as pd

# Initialize Google Maps Client
gmaps = googlemaps.Client(key="SECRET_API_KEY")

locations = [
    "San Francisco, CA",
    "Berkeley, CA",
    "Oakland, CA",
    "Palo Alto, CA",
    "Mountain View, CA",
    "Santa Clara, CA",
    "Napa, CA",
    "Sausalito, CA",
    "Half Moon Bay, CA",
    "Berkeley Hills, CA"
]

# Get Distance Matrix
matrix = gmaps.distance_matrix(origins=locations, destinations=locations, mode="driving")

# Check the response from Google Maps API
print("Google Maps API response:", matrix)

# Check if there are any errors or missing data
if matrix['status'] == 'OK':
    # Parse the distance matrix into a DataFrame
    distance_matrix = pd.DataFrame(
        [[entry['distance']['value'] if entry['status'] == 'OK' else None for entry in row['elements']]
         for row in matrix['rows']],
        index=locations,
        columns=locations
    )
    print("Distance Matrix:")
    print(distance_matrix)
else:
    print("Error in Distance Matrix API request:", matrix['status'])


Google Maps API response: {'destination_addresses': ['San Francisco, CA, USA', 'Berkeley, CA, USA', 'Oakland, CA, USA', 'Palo Alto, CA, USA', 'Mountain View, CA, USA', 'Santa Clara, CA, USA', 'Napa, CA, USA', 'Sausalito, CA 94965, USA', 'Half Moon Bay, CA, USA', 'Berkeley Hills, Berkeley, CA, USA'], 'origin_addresses': ['San Francisco, CA, USA', 'Berkeley, CA, USA', 'Oakland, CA, USA', 'Palo Alto, CA, USA', 'Mountain View, CA, USA', 'Santa Clara, CA, USA', 'Napa, CA, USA', 'Sausalito, CA 94965, USA', 'Half Moon Bay, CA, USA', 'Berkeley Hills, Berkeley, CA, USA'], 'rows': [{'elements': [{'distance': {'text': '1 m', 'value': 0}, 'duration': {'text': '1 min', 'value': 0}, 'status': 'OK'}, {'distance': {'text': '21.8 km', 'value': 21836}, 'duration': {'text': '22 mins', 'value': 1306}, 'status': 'OK'}, {'distance': {'text': '19.8 km', 'value': 19820}, 'duration': {'text': '18 mins', 'value': 1098}, 'status': 'OK'}, {'distance': {'text': '53.5 km', 'value': 53538}, 'duration': {'text': '37 

In [22]:
import networkx as nx

# Create Graph
G = nx.Graph()

# Add Edges
for i, source in enumerate(locations):
    for j, target in enumerate(locations):
        if i != j:  # Avoid self-loops
            G.add_edge(source, target, weight=distance_matrix.iloc[i, j])

print("Graph Representation:", G.edges(data=True))


Graph Representation: [('San Francisco, CA', 'Berkeley, CA', {'weight': 22276}), ('San Francisco, CA', 'Oakland, CA', {'weight': 19928}), ('San Francisco, CA', 'Palo Alto, CA', {'weight': 54259}), ('San Francisco, CA', 'Mountain View, CA', {'weight': 60857}), ('San Francisco, CA', 'Santa Clara, CA', {'weight': 72831}), ('San Francisco, CA', 'Napa, CA', {'weight': 77818}), ('San Francisco, CA', 'Sausalito, CA', {'weight': 15764}), ('San Francisco, CA', 'Half Moon Bay, CA', {'weight': 47830}), ('San Francisco, CA', 'Berkeley Hills, CA', {'weight': 25553}), ('Berkeley, CA', 'Oakland, CA', {'weight': 8203}), ('Berkeley, CA', 'Palo Alto, CA', {'weight': 71973}), ('Berkeley, CA', 'Mountain View, CA', {'weight': 78571}), ('Berkeley, CA', 'Santa Clara, CA', {'weight': 77040}), ('Berkeley, CA', 'Napa, CA', {'weight': 61732}), ('Berkeley, CA', 'Sausalito, CA', {'weight': 41932}), ('Berkeley, CA', 'Half Moon Bay, CA', {'weight': 65544}), ('Berkeley, CA', 'Berkeley Hills, CA', {'weight': 2977}), (

In [23]:
import random
import numpy as np

# Define Reward Function (Shorter distance = higher reward)
def reward_function(current_state, next_state, graph):
    # Distance is negative because we want to minimize distance (shorter = better)
    distance = graph[current_state][next_state]['weight']
    return -distance  # Penalize longer distances

# Define the Q-learning Environment
class RouteOptimizationEnv:
    def __init__(self, graph):
        self.graph = graph
        self.locations = list(graph.nodes)
        self.state = random.choice(self.locations)  # Start from a random location

    def reset(self):
        # Reset the environment to a random start state
        self.state = random.choice(self.locations)
        return self.state

    def step(self, action):
        # Transition from current state to the next based on action (state to state)
        next_state = action
        # Calculate the reward based on the reward function
        reward = reward_function(self.state, next_state, self.graph)
        # Move to the next state
        self.state = next_state
        return next_state, reward

    def available_actions(self):
        # Available actions are the neighboring nodes (locations) in the graph
        return [node for node in self.graph.neighbors(self.state)]

# Initialize Environment
env = RouteOptimizationEnv(G)

# Q-learning setup
q_table = np.zeros((len(locations), len(locations)))  # Initialize Q-table for state-action pairs

# Q-Learning parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.1  # Exploration rate

# Convert location names to indices for easier Q-table access
location_index = {location: i for i, location in enumerate(locations)}

# Train the RL agent using Q-learning
for episode in range(1000):  # Number of episodes
    state = env.reset()  # Reset the environment
    total_reward = 0

    for t in range(100):  # Max time steps per episode
        current_state_index = location_index[state]

        # Exploration vs Exploitation: Choose action
        if random.uniform(0, 1) < epsilon:
            # Exploration: Choose a random action (neighboring location)
            possible_actions = env.available_actions()
            action = random.choice(possible_actions)
        else:
            # Exploitation: Choose the action with the highest Q-value
            possible_actions = env.available_actions()
            action_indices = [location_index[action] for action in possible_actions]
            action = possible_actions[np.argmax(q_table[current_state_index, action_indices])]

        # Take the action and observe the next state and reward
        next_state, reward = env.step(action)

        # Update Q-table using the Q-learning formula
        next_state_index = location_index[next_state]
        q_table[current_state_index, location_index[action]] = q_table[current_state_index, location_index[action]] + \
            alpha * (reward + gamma * np.max(q_table[next_state_index, :]) - q_table[current_state_index, location_index[action]])

        total_reward += reward
        state = next_state  # Move to the next state

    print(f"Episode {episode + 1}: Total Reward = {total_reward}")

# Example: Check the learned Q-table (showing optimal actions)
print("Q-table after training:")
print(q_table)

Episode 1: Total Reward = -5580201
Episode 2: Total Reward = -1646858
Episode 3: Total Reward = -1599043
Episode 4: Total Reward = -1394756
Episode 5: Total Reward = -2078755
Episode 6: Total Reward = -1426733
Episode 7: Total Reward = -1223661
Episode 8: Total Reward = -1477661
Episode 9: Total Reward = -1790811
Episode 10: Total Reward = -1910866
Episode 11: Total Reward = -1423594
Episode 12: Total Reward = -1226143
Episode 13: Total Reward = -1711954
Episode 14: Total Reward = -1137264
Episode 15: Total Reward = -1450974
Episode 16: Total Reward = -1724873
Episode 17: Total Reward = -1717653
Episode 18: Total Reward = -1666909
Episode 19: Total Reward = -732506
Episode 20: Total Reward = -1224431
Episode 21: Total Reward = -1022479
Episode 22: Total Reward = -1398392
Episode 23: Total Reward = -1505542
Episode 24: Total Reward = -1418834
Episode 25: Total Reward = -1200760
Episode 26: Total Reward = -1366843
Episode 27: Total Reward = -1523647
Episode 28: Total Reward = -2156931
Ep

In [27]:
# ... (your Q-learning training code from ipython-input-23-7345fad2823f) ...

# Save the trained Q-table to a file
import pickle

with open('q_table.pkl', 'wb') as f:
    pickle.dump(q_table, f)

print("Q-table saved to 'q_table.pkl'")

Q-table saved to 'q_table.pkl'
