In [385]:
import json

def read_game_data(file_path):
    game_data = []
    with open(file_path, 'r') as file:
        for line in file:
            try:
                game_state = json.loads(line.strip())
                game_data.append(game_state)
            except json.JSONDecodeError:
                continue  # Skip lines that can't be decoded as JSON
    return game_data


In [386]:
def extract_features(game_data):
    extracted_data = []
    for state in game_data:
        new_state = {
            "turn_number": state["turn_number"],
            "current_player": state["current_player"],
            "num_players": state["num_players"],
            "cards_in_hands": state["cards_in_hands"],
            "playable_cards": state["playable_cards"],
            "known_opponent_cards": state["known_opponent_cards"],
            "num_cards_in_hands": state["num_cards_in_hands"],
            "top_card": state["top_card"],
            "cards_in_pile": state["cards_in_pile"],
            "face_up_cards": state["face_up_cards"],
            "num_face_up_cards": state["num_face_up_cards"],
            "num_face_down_cards": state["num_face_down_cards"],
            "play_direction": state["play_direction"],
            "num_cards_in_draw_pile": state["num_cards_in_draw_pile"]
        }
        extracted_data.append(new_state)
    return extracted_data


In [387]:
# Example usage
file_path = 'game_data.txt'
game_data = read_game_data(file_path)
extracted_data = extract_features(game_data)
print(extracted_data)


[{'turn_number': 0, 'current_player': 5, 'num_players': 7, 'cards_in_hands': [['Unknown', 'Unknown', 'Unknown'], ['Unknown', 'Unknown', 'Unknown'], ['Unknown', 'Unknown', 'Unknown'], ['Unknown', 'Unknown', 'Unknown'], ['5 Clubs', '5 Hearts', '8 Clubs'], ['Unknown', 'Unknown', 'Unknown'], ['Unknown', 'Unknown', 'Unknown']], 'playable_cards': '[[5 Clubs, 5 Hearts, 8 Clubs]]', 'known_opponent_cards': [[], [], [], [], [], [], []], 'num_cards_in_hands': [3, 3, 3, 3, 3, 3, 3], 'top_card': None, 'cards_in_pile': [], 'face_up_cards': [['13 Spades', '10 Clubs', '2 Clubs'], ['14 Diamonds', '14 Hearts', '2 Clubs'], ['11 Spades', '11 Diamonds', 'Joker'], ['13 Diamonds', '14 Hearts', '2 Diamonds'], ['9 Spades', '11 Hearts', 'Joker'], ['8 Diamonds', '13 Hearts', '2 Spades'], ['9 Diamonds', '11 Spades', '2 Diamonds']], 'num_face_up_cards': [3, 3, 3, 3, 3, 3, 3], 'num_face_down_cards': [3, 3, 3, 3, 3, 3, 3], 'play_direction': 1, 'num_cards_in_draw_pile': 45}]


In [388]:
def get_line_from_extracted_data(extracted_data, line_number):
    if line_number < 0 or line_number >= len(extracted_data):
        return "Line number out of range"
    return extracted_data[line_number]

# Example usage
line_number = 102  # Get the first line of extracted data
line_data = get_line_from_extracted_data(extracted_data, line_number)
print(line_data)

Line number out of range


In [389]:
def get_selected_features(extracted_data, line_number):
    if line_number < 1 or line_number >= len(extracted_data):  # Check if line number is valid
        return "Line number out of range"

    current_player = extracted_data[line_number - 1]["current_player"] - 1  # Get the current player index from the previous line
    selected_features = {
        "top_card": extracted_data[line_number]["top_card"],
        "num_cards_in_hand": extracted_data[line_number]["num_cards_in_hands"][current_player],
        "num_face_up_cards": extracted_data[line_number]["num_face_up_cards"][current_player]
    }
    return selected_features

# Example usage
line_number = 102  # Get the second line of extracted data
selected_data = get_selected_features(extracted_data, line_number)
print(selected_data)


Line number out of range


In [390]:
def convert_data_to_index(data):
    # Create a unique string representation of the data
    data_string = (
        str(data["turn_number"]) +
        str(data["current_player"]) +
        str(data["num_players"]) +
        "".join(["".join(hand) for hand in data["cards_in_hands"]]) +
        "".join(data["playable_cards"]) +  # Convert the list to a string
        "".join(["".join(map(str, cards)) for cards in data["known_opponent_cards"]]) +
        "".join(map(str, data["num_cards_in_hands"])) +
        (data["top_card"] if data["top_card"] else "None") +
        "".join(data["cards_in_pile"]) +
        "".join(["".join(cards) for cards in data["face_up_cards"]]) +
        "".join(map(str, data["num_face_up_cards"])) +
        "".join(map(str, data["num_face_down_cards"])) +
        str(data["play_direction"]) +
        str(data["num_cards_in_draw_pile"])
    )

    # Hash the string to get a unique index
    data_index = hash(data_string)
    return data_index

convert_data_to_index(line_data)


TypeError: string indices must be integers

In [None]:
def parse_playable_cards(card_string):
    # Remove the brackets and split the string by commas
    card_list = card_string.strip("[]").split(", ")
    # Strip extra whitespace and add quotes around each element
    return [card.strip().replace(" ", "_") for card in card_list]

In [None]:
import numpy as np
import pickle
import os

def Q_learning():
    
    # Initialize the Q-table and other data as a dictionary
    data_file = 'q_learning_data.pkl'
    if os.path.exists(data_file):
        # Load the Q-table and other data from the file
        with open(data_file, 'rb') as file:
            data = pickle.load(file)
        q_table = data['q_table']
        state_index_history = data['state_index_history']
        action_index_history = data['action_index_history']
    else:
        # Initialize new data
        q_table = {}
        state_index_history = []
        action_index_history = []
    
    
    file_path = 'game_data.txt'

    # Define the learning parameters
    
    epsilon = 0.1  # Exploration rate

    # Q-learning algorithm
    game_data = read_game_data(file_path)
    extracted_data = extract_features(game_data)
    line_number = len(extracted_data) - 1  # Get the last line of extracted data
    state = get_line_from_extracted_data(extracted_data, line_number)
    state['playable_cards'] = parse_playable_cards(state['playable_cards'])
    state_index = convert_data_to_index(state)
    num_actions = len(state['playable_cards'])  # Number of actions depends on the playable cards

    # Initialize the Q-values for the new state if it's not in the Q-table
    if state_index not in q_table:
        q_table[state_index] = np.zeros(num_actions)

    if np.random.uniform(0, 1) < epsilon:
        action_index = np.random.choice(range(num_actions))  # Explore action space
    else:
        action_index = np.argmax(q_table[state_index])  # Exploit learned values

    action = state['playable_cards'][action_index]  # Select the action based on the action index

    # Save state_index and action_index
    state_index_history.append(state_index)
    action_index_history.append(action_index)

    # Save the updated Q-table and other data
    data = {
        'q_table': q_table,
        'state_index_history': state_index_history,
        'action_index_history': action_index_history
    }
    with open(data_file, 'wb') as file:
        pickle.dump(data, file)

    return state['playable_cards'][action_index]  # Return the selected action



Q_learning()
    


''

In [None]:
def calculate_reward(old_state, new_state):
    # Get the index of the player who just played
    player_index = old_state['current_player'] - 1

    # Calculate the change in the number of face-up cards and cards in hand for the player
    change_in_face_up_cards = old_state['num_face_up_cards'][player_index] - new_state['num_face_up_cards'][player_index]
    change_in_cards_in_hand = old_state['num_cards_in_hands'][player_index] - new_state['num_cards_in_hands'][player_index]

    # Define the reward based on the changes
    reward = change_in_face_up_cards + change_in_cards_in_hand
    # We reward and punish the model by exactly the change in hand and face_up.

    return reward

0

In [391]:
import pickle

def train_Q_model():
    # Load the Q-table and other data from the pickle file
    data_file = 'q_learning_data.pkl'
    if os.path.exists(data_file):
        with open(data_file, 'rb') as file:
            data = pickle.load(file)
        q_table = data['q_table']
        state_index_history = data['state_index_history']
        action_index_history = data['action_index_history']
    else:
        # Initialize new data if the file does not exist
        q_table = {}
        state_index_history = []
        action_index_history = []

    # Define the learning parameters
    alpha = 0.1  # Learning rate
    gamma = 0.99  # Discount factor

    # Example usage of the loaded data
    file_path = 'game_data.txt'
    game_data = read_game_data(file_path)
    extracted_data = extract_features(game_data)
    if len(extracted_data) == 1:
        print("Only one line")
        return
    old_state = get_line_from_extracted_data(extracted_data, line_number=len(extracted_data)-1)
    new_state = get_line_from_extracted_data(extracted_data, line_number=len(extracted_data))
    reward = calculate_reward(old_state, new_state)
    new_state_index = convert_data_to_index(new_state)
    new_num_actions = len(new_state['playable_cards'])

    # Initialize the Q-values for the new state if it's not in the Q-table
    if new_state_index not in q_table:
        q_table[new_state_index] = np.zeros(new_num_actions)

    # Assume action_index is available, e.g., from the action_index_history
    action_index = action_index_history[-1]  # Get the last action index

    # Update Q-value
    q_table[state_index_history[-1]][action_index] = (
        q_table[state_index_history[-1]][action_index] +
        alpha * (reward + gamma * np.max(q_table[new_state_index]) - q_table[state_index_history[-1]][action_index])
    )

    # Save the updated data
    data = {
        'q_table': q_table,
        'state_index_history': state_index_history,
        'action_index_history': action_index_history
    }
    with open(data_file, 'wb') as file:
        pickle.dump(data, file)

train_Q_model()


Only one line


In [None]:
print(q_table)

{1581664960681904165: array([-2., -2.,  0.]), -4500153054193776778: array([0., 0.]), 8221874073243824000: array([0., 0., 0.]), 7003414756375877080: array([0., 0., 0.]), -5645696580594727924: array([0., 0., 0.]), -4606296050214782123: array([0., 0., 0.]), 6538114144556302867: array([0., 0., 0.]), -5438063989234695115: array([0., 0., 0.]), 7289699047892737939: array([0., 0., 0.]), -4639093716540390959: array([0.]), -3018947662364402942: array([0., 0., 0.]), -9120601883240926354: array([0., 0., 0.]), -4547814260440546445: array([0., 0., 0.]), 8346437109265019977: array([0.]), -7852571337383494282: array([0., 0.]), 5891555846141916084: array([0.]), 8261123817876077038: array([0.]), -9073923416428301325: array([0., 0., 0.]), 4598791378723180436: array([0., 0., 0.]), 4613699287674183368: array([0., 0., 0., 0., 0., 0.]), 5990329351020088770: array([0., 0., 0., 0., 0., 0., 0., 0., 0.]), 5630264422828600923: array([0., 0.]), -5175769196293510746: array([0., 0., 0., 0., 0., 0., 0.]), 73411102269