In [None]:
import numpy as np
import torch
import random
import csv
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

# 데이터 로드 함수
def load_data(filepath):
    with open(filepath, mode='r', newline='') as file:
        data = [list(map(float, row)) for row in csv.reader(file)]
    tensor_data = torch.tensor(data, dtype=torch.float32)
    return tensor_data

# 경로 시각화 함수
def plot_path(path, coordinates):
    coordinates = coordinates.numpy()
    path = np.array(path + [path[0]])
    plt.figure(figsize=(10, 10))
    plt.plot(coordinates[path, 0], coordinates[path, 1], 'o-')
    
    plt.xlabel('X coordinate')
    plt.ylabel('Y coordinate')
    plt.title('Optimized Path')
    plt.show()

# 거리 계산 함수
def distance(city1, city2):
    return np.linalg.norm(city1 - city2)

# 비용 계산 함수
def calculate_cost(path, coordinates):
    return np.sum([distance(coordinates[path[i]], coordinates[path[i+1]]) for i in range(len(path)-1)])

# 초기 Value Table 및 Policy 초기화 함수
def initialize_value_policy(num_cities):
    value_table = np.zeros((num_cities, num_cities))
    policy = np.zeros(num_cities, dtype=int)
    return value_table, policy

# Value Table 및 Policy 업데이트 함수
def update_value_policy(value_table, policy, path, coordinates):
    num_cities = len(coordinates)
    for i in range(num_cities):
        current_city = path[i]
        next_city = path[(i + 1) % num_cities]
        cost = distance(coordinates[current_city], coordinates[next_city])
        value_table[current_city][next_city] = cost
        policy[current_city] = next_city
    return value_table, policy

# Nearest Neighbor 알고리즘을 사용한 초기 경로 생성 함수
def nearest_neighbor(cities):
    num_cities = len(cities)
    unvisited_cities = set(range(1, num_cities))
    current_city = 0
    tour = [current_city]
    while unvisited_cities:
        nearest_city = min(unvisited_cities, key=lambda city: distance(cities[current_city], cities[city]))
        tour.append(nearest_city)
        unvisited_cities.remove(nearest_city)
        current_city = nearest_city
    tour.append(0)
    return tour

# 유전 알고리즘의 변이 연산 함수
def mutate(path, mutation_rate):
    new_path = path[:]
    num_cities = len(path)
    for _ in range(int((num_cities - 2) * mutation_rate)):
        swap_idx1, swap_idx2 = random.sample(range(1, num_cities - 1), 2)
        new_path[swap_idx1], new_path[swap_idx2] = new_path[swap_idx2], new_path[swap_idx1]
    return new_path

# 유전 알고리즘의 교차 연산 함수
def crossover(parent1, parent2):
    size = len(parent1)
    start, end = sorted(random.sample(range(size), 2))
    child = [None] * size
    child[start:end] = parent1[start:end]
    ptr = end
    for city in parent2:
        if city not in child:
            if ptr >= size:
                ptr = 0
            child[ptr] = city
            ptr += 1
    return child

# 유전 알고리즘 함수
def genetic_algorithm(coordinates, max_iterations, num_expansions, mutation_rate):
    initial_path = nearest_neighbor(coordinates)
    best_path = initial_path
    best_cost = calculate_cost(initial_path, coordinates)

    for iteration in range(max_iterations):
        for _ in range(num_expansions):
            new_path = mutate(best_path, mutation_rate)
            new_cost = calculate_cost(new_path, coordinates)
            if new_cost < best_cost:
                best_path = new_path
                best_cost = new_cost

    return best_cost, best_path

# Value Iteration 함수
def value_iteration(distance_matrix, gamma=0.999, theta=1e-7):
    num_cities = len(distance_matrix)
    V = np.zeros(num_cities)
    policy = np.zeros(num_cities, dtype=int)

    while True:
        delta = 0
        for state in range(num_cities):
            v = V[state]
            next_values = [distance_matrix[state][next_state] + gamma * V[next_state] for next_state in range(num_cities) if next_state != state]
            min_value = min(next_values)
            V[state] = min_value
            policy[state] = np.argmin([distance_matrix[state][next_state] + gamma * V[next_state] for next_state in range(num_cities) if next_state != state])
            delta = max(delta, abs(v - V[state]))
            print(delta, theta)
        if delta < theta:
            break

    return V, policy

# 데이터 로드
coordinates = load_data('../2024_AI_TSP.csv')
num_cities = len(coordinates)

# 초기 Value Table 및 Policy 초기화
value_table, policy = initialize_value_policy(num_cities)

# 유전 알고리즘을 사용하여 초기 경로 생성
best_cost, best_path = genetic_algorithm(coordinates, max_iterations=1000, num_expansions=5, mutation_rate=0.1)



In [None]:
# 최적 경로 기반으로 Value Table 및 Policy 업데이트
value_table, policy = update_value_policy(value_table, policy, best_path, coordinates)

# Value Iteration을 사용하여 Value Table 및 Policy 갱신
distance_matrix = cdist(coordinates.numpy(), coordinates.numpy())
value_table, policy = value_iteration(distance_matrix)
# 결과 출력 및 경로 시각화
print("Value Table:\n", value_table)
print("Policy:\n", policy)

In [None]:
def generate_path_from_policy(policy):
    num_cities = len(policy)
    start_city = 0
    path = [start_city]
    current_city = start_city
    visited = set(path)
    
    while len(visited) < num_cities:
        next_city = policy[current_city]
        if next_city in visited:
            unvisited_cities = set(range(num_cities)) - visited
            if unvisited_cities:
                next_city = unvisited_cities.pop()
            else:
                break
        path.append(next_city)
        visited.add(next_city)
        current_city = next_city
    
    return path
path = generate_path_from_policy(policy)
print('after update : ',calculate_cost(path, coordinates))
plot_path(path, coordinates)