In [1]:
import numpy as np
import gym
from gym import spaces
import matplotlib.pyplot as plt

class BasketballEnv(gym.Env):
    def __init__(self, player_stats):
        super(BasketballEnv, self).__init__()
        self.player_stats = player_stats
        self.action_space = spaces.Discrete(len(player_stats))  # Number of shooting areas
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(player_stats),), dtype=np.float32)
        self.shot_areas = list(player_stats.keys())
        self.state = None
        self.time_left = None
        self.shots_taken = []

    def reset(self):
        self.state = np.array([self.player_stats[area] for area in self.shot_areas])
        self.time_left = 24  # Shot clock reset
        self.shots_taken = []
        return self.state

    def step(self, action):
        reward = 0
        done = False
        
        shot_percentage = self.state[action]
        if np.random.rand() < shot_percentage:
            reward = 1  # Successful shot
        else:
            reward = -1  # Missed shot
        
        self.shots_taken.append((action, reward))
        self.time_left -= 1
        if self.time_left <= 0:
            done = True
        
        return self.state, reward, done, {}

    def render(self, mode='human'):
        if not self.shots_taken:
            return
        actions, rewards = zip(*self.shots_taken)
        successful_shots = [self.shot_areas[action] for action, reward in zip(actions, rewards) if reward == 1]
        missed_shots = [self.shot_areas[action] for action, reward in zip(actions, rewards) if reward == -1]

        plt.figure(figsize=(12, 6))
        plt.bar(successful_shots, [1]*len(successful_shots), color='g', label='Made Shots')
        plt.bar(missed_shots, [1]*len(missed_shots), color='r', label='Missed Shots')
        plt.xlabel('Shot Areas')
        plt.ylabel('Number of Shots')
        plt.title('Agent Shot Selection')
        plt.legend()
        plt.show()

    def close(self):
        pass


ModuleNotFoundError: No module named 'gym'

In [None]:
import random
from collections import defaultdict
import matplotlib.pyplot as plt

class QLearningAgent:
    def __init__(self, n_actions, learning_rate=0.1, discount_factor=0.99, epsilon=0.1):
        self.q_table = defaultdict(lambda: np.zeros(n_actions))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.n_actions = n_actions

    def choose_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(range(self.n_actions))
        else:
            return np.argmax(self.q_table[tuple(state)])

    def update(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[tuple(next_state)])
        td_target = reward + self.gamma * self.q_table[tuple(next_state)][best_next_action]
        td_error = td_target - self.q_table[tuple(state)][action]
        self.q_table[tuple(state)][action] += self.lr * td_error

# Player shooting statistics
player_stats = {
    'Left Corner 3': 0.35,
    'Left Wing 3': 0.37,
    'Top of Key 3': 0.40,
    'Right Wing 3': 0.38,
    'Right Corner 3': 0.34,
    'Left Baseline Mid-range': 0.45,
    'Left Elbow Mid-range': 0.44,
    'Center Mid-range': 0.50,
    'Right Elbow Mid-range': 0.43,
    'Right Baseline Mid-range': 0.46,
    'Paint': 0.65
}

# Training the Q-Learning agent
env = BasketballEnv(player_stats)
agent = QLearningAgent(n_actions=env.action_space.n)

n_episodes = 1000
cumulative_rewards = []

for episode in range(n_episodes):
    state = env.reset()
    done = False
    total_reward = 0
    while not done:
        action = agent.choose_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.update(state, action, reward, next_state)
        state = next_state
        total_reward += reward
    
    cumulative_rewards.append(total_reward)

# Plot the cumulative rewards
plt.plot(range(n_episodes), cumulative_rewards)
plt.xlabel('Episodes')
plt.ylabel('Cumulative Reward')
plt.title('Cumulative Reward Over Time')
plt.show()

# Example of using the trained agent and visualizing its shots
state = env.reset()
done = False
while not done:
    action = agent.choose_action(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state

env.render()


: 