In [23]:
pip install tensorflow numpy



In [22]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers, models

# Confirm GPU
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

ROWS, COLS = 5, 5
FRUIT = 'f'
HARVESTED = 'h'
EMPTY = '.'
GARDENER = 'G'

actions = ['up', 'down', 'left', 'right']

def create_garden():
    garden = [[EMPTY for _ in range(COLS)] for _ in range(ROWS)]
    for r in range(ROWS):
        for c in range(COLS):
            if random.random() < 0.2:
                garden[r][c] = FRUIT
    return garden

def print_garden(garden, pos):
    for r in range(ROWS):
        row_str = ''
        for c in range(COLS):
            if pos == [r, c]:
                row_str += GARDENER
            else:
                row_str += garden[r][c]
        print(row_str)
    print()

def valid_move(r, c):
    return 0 <= r < ROWS and 0 <= c < COLS

def move_agent(pos, action):
    r, c = pos
    if action == 'up' and valid_move(r - 1, c):
        return [r - 1, c]
    elif action == 'down' and valid_move(r + 1, c):
        return [r + 1, c]
    elif action == 'left' and valid_move(r, c - 1):
        return [r, c - 1]
    elif action == 'right' and valid_move(r, c + 1):
        return [r, c + 1]
    return pos

class DQNAgent:
    def __init__(self):
        self.model = self.build_model()
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.99  # slower decay for longer exploration
        self.gamma = 0.95
        self.memory = []
        self.batch_size = 32

    def build_model(self):
        model = models.Sequential()
        model.add(layers.Flatten(input_shape=(ROWS, COLS, 1)))
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(32, activation='relu'))
        model.add(layers.Dense(len(actions), activation='linear'))
        model.compile(optimizer='adam', loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        if len(self.memory) > 1000:
            self.memory.pop(0)

    def act(self, state):
        if np.random.rand() < self.epsilon:
            return random.randrange(len(actions))
        q_values = self.model.predict(state[np.newaxis], verbose=0)[0]
        return np.argmax(q_values)

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state[np.newaxis], verbose=0)[0])
            target_f = self.model.predict(state[np.newaxis], verbose=0)
            target_f[0][action] = target
            self.model.fit(state[np.newaxis], target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

def encode_state(garden, pos):
    mapping = {EMPTY: 0, FRUIT: 1, HARVESTED: 2}
    state = np.zeros((ROWS, COLS), dtype=np.float32)
    for r in range(ROWS):
        for c in range(COLS):
            state[r][c] = mapping[garden[r][c]]
    r, c = pos
    state[r][c] = 3  # Gardener mark
    return state.reshape((ROWS, COLS, 1))

episodes = 20  # increased episodes for better training
agent = DQNAgent()

for e in range(1, episodes + 1):
    garden = create_garden()
    pos = [0, 0]  # Gardener starts at top-left corner, no fruit here
    total_harvested = 0

    max_steps = 25
    steps = 0

    print(f"Episode {e} starting:")
    print_garden(garden, pos)

    done = False
    while not done and steps < max_steps:
        state = encode_state(garden, pos)
        action_idx = agent.act(state)
        action = actions[action_idx]

        new_pos = move_agent(pos, action)
        r, c = new_pos

        # Step penalty to encourage efficient movement
        reward = -0.01

        if garden[r][c] == FRUIT:
            garden[r][c] = HARVESTED
            total_harvested += 1
            reward = 1
            print(f"Harvested fruit at ({r},{c})")

        pos = new_pos
        next_state = encode_state(garden, pos)
        done = not any(FRUIT in row for row in garden)

        agent.remember(state, action_idx, reward, next_state, done)
        agent.replay()

        steps += 1
        print(f"Step {steps}: Gardener at {pos}, action: {action}")
        print_garden(garden, pos)

        # Safety break to avoid infinite loops
        if steps > 200:
            print("Max steps reached, ending episode early.")
            break

    print(f"Episode {e} finished in {steps} steps. Total harvested: {total_harvested}\n{'='*30}\n")


Num GPUs Available: 1
Episode 1 starting:
G....
.f..f
f.ff.
..fff
..f..

Harvested fruit at (0,0)
Step 1: Gardener at [0, 0], action: up
G....
.f..f
f.ff.
..fff
..f..

Step 2: Gardener at [0, 1], action: right
hG...
.f..f
f.ff.
..fff
..f..

Step 3: Gardener at [0, 2], action: right
h.G..
.f..f
f.ff.
..fff
..f..

Step 4: Gardener at [0, 2], action: up
h.G..
.f..f
f.ff.
..fff
..f..

Step 5: Gardener at [0, 2], action: up
h.G..
.f..f
f.ff.
..fff
..f..

Step 6: Gardener at [0, 1], action: left
hG...
.f..f
f.ff.
..fff
..f..

Step 7: Gardener at [0, 0], action: left
G....
.f..f
f.ff.
..fff
..f..

Step 8: Gardener at [0, 0], action: up
G....
.f..f
f.ff.
..fff
..f..

Step 9: Gardener at [0, 0], action: left
G....
.f..f
f.ff.
..fff
..f..

Step 10: Gardener at [1, 0], action: down
h....
Gf..f
f.ff.
..fff
..f..

Harvested fruit at (2,0)
Step 11: Gardener at [2, 0], action: down
h....
.f..f
G.ff.
..fff
..f..

Step 12: Gardener at [3, 0], action: down
h....
.f..f
h.ff.
G.fff
..f..

Step 13: Gardene

In [None]:
!git init