In [1]:
import numpy as np
import random
import csv
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

# 데이터 로드 함수
def load_data(filepath):
    with open(filepath, mode='r', newline='') as file:
        data = [list(map(float, row)) for row in csv.reader(file)]
    return np.array(data)

# 경로 시각화 함수
def plot_path(path, coordinates):
    path = np.array(path + [path[0]])  # 경로 마감
    plt.figure(figsize=(10, 10))
    plt.plot(coordinates[path, 0], coordinates[path, 1], 'o-')
    plt.xlabel('X coordinate')
    plt.ylabel('Y coordinate')
    plt.title('Optimized Path')
    plt.show()

# 거리 계산 함수
def calculate_cost(path, coordinates):
    cost = 0
    for i in range(len(path) - 1):
        cost += np.linalg.norm(coordinates[path[i]] - coordinates[path[i + 1]])
    return cost

# 초기 Q-테이블 생성 함수
def initialize_q_table(num_cities, value_table):
    q_table = np.zeros((num_cities, num_cities))
    for i in range(num_cities):
        for j in range(num_cities):
            if i != j:
                q_table[i][j] = -value_table[i][j]
    return q_table

# Q-러닝 알고리즘
#새로운 Q-러닝 알고리즘
def q_learning(coordinates, q_table, num_episodes, alpha, gamma, epsilon):
    num_cities = len(coordinates)

    for episode in range(num_episodes):
        state = random.randint(0, num_cities - 1)
        visited = set([state])
        path = [state]

        while len(visited) < num_cities:
            if random.random() < epsilon:
                next_state = random.choice([city for city in range(num_cities) if city not in visited])
            else:
                next_state_values = q_table[state, :]
                next_state_values = np.ma.masked_array(next_state_values, mask=[i in visited for i in range(num_cities)])
                next_state = np.argmax(next_state_values)

            reward = -np.linalg.norm(coordinates[state] - coordinates[next_state])
            old_q_value = q_table[state, next_state]
            future_q_max = np.max(q_table[next_state, :])

            # TD 업데이트
            q_table[state, next_state] = old_q_value + alpha * (reward + gamma * future_q_max - old_q_value)

            state = next_state
            visited.add(state)
            path.append(state)

        path.append(path[0])
        episode_cost = calculate_cost(path, coordinates)

        # MC 업데이트: 경로상의 각 state-action 쌍을 업데이트
        for i in range(len(path) - 1):
            start, end = path[i], path[i + 1]
            reward = -np.linalg.norm(coordinates[start] - coordinates[end])
            q_table[start, end] = q_table[start, end] + alpha * (reward - q_table[start, end])

    return q_table, path


# 데이터 로드
#coordinates = load_data('2024_AI_TSP.csv')
num_cities = 998 #len(coordinates)

# 초기화된 value_table을 가져오는 부분 (예시로 랜덤 값 사용)
value_table = np.random.rand(num_cities, num_cities)

# 기존 Value Table을 기반으로 Q-테이블 초기화
q_table = initialize_q_table(num_cities, value_table)

# Q-러닝 수행
q_table, best_path = q_learning(coordinates, q_table, num_episodes=1000, alpha=0.1, gamma=0.9, epsilon=0.1)
best_cost = calculate_cost(best_path, coordinates)

# 결과 출력 및 경로 시각화
print("최적 비용:", best_cost)
print("경로:", best_path)
print("Q-테이블:\n", q_table)
plot_path(best_path, coordinates)


NameError: name 'coordinates' is not defined