In [1]:
import random


# Simplified Grid World Example (Text-based)
class GridWorld:
    
    def __init__(self, size=4, goal=(3, 3)):
        self.size = size
        self.goal = goal
        self.agent_pos = (0, 0) # Starting position
        self.state = self.agent_pos
    
    
    def reset(self):
        self.agent_pos = (0, 0)
        self.state = self.agent_pos
        return self.state
    
    
    def step(self, action):
        row, col = self.agent_pos
        if action == 0: # Up
            row = max(0, row - 1)
        elif action == 1: # Down
            row = min(self.size - 1, row + 1)
        elif action == 2: # Left
            col = max(0, col - 1)
        elif action == 3: # Right
            col = min(self.size - 1, col + 1)
        self.agent_pos = (row, col)
        self.state = self.agent_pos
        
        if self.agent_pos == self.goal:
            reward = 10 # Positive reward for reaching the goal
            done = True
        else:
            reward = -1 # Small negative reward for each step
            done = False
        return self.state, reward, done

        
    def display(self):
        for r in range(self.size):
            row_str = ""
            for c in range(self.size):
                if (r, c) == self.agent_pos:
                    row_str += "A " # Agent
                elif (r, c) == self.goal:
                    row_str += "G " # Goal
                else:
                    row_str += ". " # Empty space
            print(row_str)

In [7]:
# Agent (Simple Random Agent)

class RandomAgent:
    def __init__(self, action_space):
        self.action_space = action_space
    
    def choose_action(self, state): #state is not used in random agent.
        return random.choice(range(self.action_space))

        
# Main Execution
env = GridWorld()
agent = RandomAgent(action_space=4) # 4 actions: up, down, left, right
episode = 0 # only run episode 0
state = env.reset()
done = False
total_reward = 0
print(f"\nEpisode {episode + 1} - Agent positions and Rewards:")
while not done:
    action = agent.choose_action(state)
    next_state, reward, done = env.step(action)
    total_reward += reward
    print(f" Agent position: {env.agent_pos}, Reward: {reward}")
    state = next_state

    
print(f"Episode {episode + 2}, Total Reward: {total_reward}")


Episode 1 - Agent positions and Rewards:
 Agent position: (0, 1), Reward: -1
 Agent position: (1, 1), Reward: -1
 Agent position: (1, 2), Reward: -1
 Agent position: (1, 1), Reward: -1
 Agent position: (0, 1), Reward: -1
 Agent position: (1, 1), Reward: -1
 Agent position: (2, 1), Reward: -1
 Agent position: (1, 1), Reward: -1
 Agent position: (1, 0), Reward: -1
 Agent position: (2, 0), Reward: -1
 Agent position: (2, 1), Reward: -1
 Agent position: (2, 2), Reward: -1
 Agent position: (2, 3), Reward: -1
 Agent position: (2, 3), Reward: -1
 Agent position: (1, 3), Reward: -1
 Agent position: (1, 3), Reward: -1
 Agent position: (0, 3), Reward: -1
 Agent position: (0, 3), Reward: -1
 Agent position: (0, 3), Reward: -1
 Agent position: (0, 2), Reward: -1
 Agent position: (0, 3), Reward: -1
 Agent position: (0, 3), Reward: -1
 Agent position: (0, 3), Reward: -1
 Agent position: (0, 2), Reward: -1
 Agent position: (0, 2), Reward: -1
 Agent position: (0, 3), Reward: -1
 Agent position: (1, 3

In [9]:
import numpy as np
import random
# Simplified Auto Insurance Environment
class AutoInsuranceEnv:
    def __init__(self):
        # State: [speeding_frequency, hard_braking_frequency, mileage, time_of_day_driving]
        self.state = np.random.randint(0, 10, 4) # Initialize with random state
        self.premium_base = 100 # Base premium
        self.risk_factors = [1.5, 1.2, 0.8, 1.1] #weight on each state feature.
        
    def reset(self):
        self.state = np.random.randint(0, 10, 4)
        return self.state
        
    def step(self, action):
        premium = action[0] # The action is the premium offered
        risk_score = np.dot(self.state, self.risk_factors)
        # Simulate customer response and claims
        renewal_probability = 1.0 / (1 + np.exp(risk_score - premium / 10)) #higher risk score, lower renewal.
        claim_probability = risk_score / 20.0 #higher risk score, higher claims.
        renewal = random.random() < renewal_probability
        claim_amount = np.random.normal(claim_probability * 1000, 200) #average claim amount proportional to risk.
 
        # Calculate reward
        if renewal:
            reward = premium - claim_amount
        
        else:
            reward = -premium - claim_amount # if customer does not renew, reward is negative.
        
        
        # Update state (simulate customer behavior change)
        self.state += np.random.randint(-1, 2, 4) #customer's behavior changes slightly.
        self.state = np.clip(self.state, 0, 9) # Keep state within bounds
        return self.state, reward, renewal # renewal is used to determine if the episode ends.


# Simple Agent (Random Premium)
class InsuranceAgent:
    
    def choose_action(self, state):
        premium = random.randint(50, 200) # Random premium between 50 and 200
        return np.array([premium])
        
# Main Execution
env = AutoInsuranceEnv()
agent = InsuranceAgent()
episodes = 5

for episode in range(episodes):
    state = env.reset()
    total_reward = 0
    done = False
    step = 0
    while not done:
        action = agent.choose_action(state)
        next_state, reward, renewal = env.step(action)
        total_reward += reward
        state = next_state
        step += 1
        if not renewal or step > 10: # episode ends if the customer doesnt renew, or after 10 steps.
            done = True
    print(f"Episode {episode + 1}, Total Reward: {total_reward}")

Episode 1, Total Reward: -1265.5383232377465
Episode 2, Total Reward: -1044.4008589364726
Episode 3, Total Reward: -890.576635135134
Episode 4, Total Reward: -1142.3275643558204
Episode 5, Total Reward: -459.783226944564
