In [55]:
import gym
from gym import spaces
import random
import sys

#PyGame screen sizes
WIDTH,HEIGHT = 800, 600



class MangalaEnv(gym.Env):

    def convert_move(self, move, player):
        """
        Girdilerin pocket karşılıklarına çevrilmesi
        """
        if player == 1:
            return move
        if player == 2:
            return move+7
        return False

    def valid_move(self, pocket_position, player):
        """
        Hamle yapılıcak taş var mı diye kontrol ediliyor
        """
        player_1_side = (0 <= pocket_position <= 5)
        player_2_side = (7 <= pocket_position <= 12)

        if self.pockets[pocket_position] > 0:
            if player_1_side and player==1:
                return True
            if player_2_side and player==2:
                return True

        return

    def initialize_board(self):
        """
        Tahtanın doğru degerlerde başlatılması
        """
        num_stones_on_start = 4
        pockets = [num_stones_on_start]*14
        pockets[6] = 0
        pockets[13] = 0

        return pockets

    def check_game_over(self):
        """ Bir tarafın kuyusunda taş kalmaması durumunda oyunun bitmesi ve karşı
        tarafta taş kalması durumunda taşlarını bitiren oyuncunun hazinesinde toplanması
        """

        game_over = False

        empty_player_1 = sum(self.pockets[:6]) == 0
        empty_player_2 = sum(self.pockets[7:13]) == 0

        if empty_player_2:
            self.pockets[13] += sum(self.pockets[:6])
            self.pockets[:6] = [0]*6
            game_over = True

        if empty_player_1:
            self.pockets[6] += sum(self.pockets[7:13])
            self.pockets[7:13] = [0]*6
            game_over = True

        return game_over

    def determine_winner(self):
        """ Hazine durumlarına göre kazananın belirlenmesi
        """
        if self.pockets[13]>self.pockets[6]:
            return "Player 2"
        elif self.pockets[13]<self.pockets[6]:
            return "Player 1"
        return "Draw"

    def switch_player(self, player):
        """
        Tur geçişlerinde oyuncu geçişi
        """

        if player == 1:
            return 2
        return 1

    def capture(self, pocket_position, mangala_pocket,turn):
        """ pocket position : oynanan hamle
            mangala pocket : aktif oyuncunun hazinesin indexi

            eger oynanan son taş kendi boş kuyusuna denk geliyorsa
            ve de kuyunun karşıt kuyusu doluysa hem kendi kuyusundaki hemde
            karşıt kuyudaki taşları hazinesine eklenmesi
        """
        
        reward = 0
        opposite_pocket_dict = {0: 12, 1:11, 2:10, 3:9, 4:8, 5:7,
                                7:5, 8:4, 9:3, 10:2, 11:1, 12:0}
        if(self.pockets[opposite_pocket_dict[pocket_position]] != 0):
            opposite_pocket = opposite_pocket_dict[pocket_position]
            if(self.pockets[pocket_position] == 0 and self.pockets[opposite_pocket] == 0):
                return False,0
            self.pockets[mangala_pocket] += self.pockets[pocket_position]
            reward += self.pockets[pocket_position]
            self.pockets[pocket_position] = 0
            

            self.pockets[mangala_pocket] += self.pockets[opposite_pocket]
            reward += self.pockets[pocket_position]
            self.pockets[opposite_pocket] = 0

        return True,reward
    
    def capture_even(self,pocket_position,mangala_pocket,turn):
        """
        Eger oyuncunun son taşı rakibin kuyusundaki taşların toplamını çift yapıyorsa
        o kuyudaki tüm taşları hazinesine katar.
        """
        reward = 0
        if(self.pockets[pocket_position] == 0):
            return False,0
        
        self.pockets[mangala_pocket] += self.pockets[pocket_position]
        reward = self.pockets[pocket_position]
        self.pockets[pocket_position] = 0
        

        return True,reward

    def simulate_move(self, pocket_position, player):
        """
        Seçilen hamlenin oynanması
        """
        reward = 0
        capture_amnt_p1 = 0
        capture_amnt_p2 = 0
        go_again_amnt = 0
        pockets = self.pockets
        #Geriye bir taş bırakma ve eger hazinede tek taş varsa sağa taşıma
        stones_drawn = pockets[pocket_position] #eldeki taş sayısı
        if(stones_drawn == 1):
            pockets[pocket_position] = 0
        elif(stones_drawn != 0):
            pockets[pocket_position] = 1
            stones_drawn -= 1

        # Hamle yapılan kuynun sağa doğru birer birer bırakılarak ilerlenmesi
        while stones_drawn > 0:
            pocket_position += 1

            if pocket_position > len(pockets)-1:
                pocket_position = 0


            #Karşıdakinin kuyusuna gelinmesi durumunda bir sonraki kuyuya geçme
            mangala_1_position = pocket_position==6
            mangala_2_position = pocket_position==13
            player_1 = player == 1
            player_2 = player == 2
            player1_capture_true = False
            player2_capture_true = False
            if mangala_1_position and player_2:
                continue
            if mangala_2_position and player_1:
                continue

            # Taşları bırakma
            pockets[pocket_position] += 1
            stones_drawn -= 1

        #çift sayı yaptı mı
        end_with_even = pockets[pocket_position] % 2 == 0

        # Son hamlenin kendi kuyusunda bitirilmesi
        end_on_player_1_side = (0 <= pocket_position <= 5)
        end_on_player_2_side = (7 <= pocket_position <= 12)

        #son taşa gelinmesi durumu kontrolü
        stone_was_empty = pockets[pocket_position] == 1

        #boş kuyuya gelinmesi durumunda capture
        # Player 1 capture
        if player_1 and end_on_player_1_side and stone_was_empty:
            player1_capture_true,rwrd = self.capture(pocket_position, 6,"1")
            if(rwrd != 0):
                reward += 0.01
        # Player 2 capture
        if player_2 and end_on_player_2_side and stone_was_empty:
            player2_capture_true,rwrd = self.capture(pocket_position, 13,"2")
            if(rwrd != 0):
                reward -= 0.01
            
        if player1_capture_true:
            capture_amnt_p1 +=1
        if player2_capture_true:
            capture_amnt_p2 +=1

        #çift yapma durumuna göre captur
        if player_1 and end_on_player_2_side and end_with_even:
            player1_capture_true,rwrd =self.capture_even(pocket_position,6,"1")
            if(rwrd != 0):
                reward += 0.01
        if player_2 and end_on_player_1_side and end_with_even:
            player2_capture_true,rwrd = self.capture_even(pocket_position,13,"2")
            if(rwrd != 0):
                reward -= 0.01
        if(player1_capture_true):
            capture_amnt_p1 +=1
        if(player2_capture_true):
            capture_amnt_p2 +=1


        # son taşın kendi kuyusuna gelme durumu göz önünde bulundurarak hamlenin kime gecicegi
        if mangala_1_position and player_1:
            next_player = player
            go_again_amnt +=1
            reward += 0.01
        elif mangala_2_position and player_2:
            next_player = player
        else:
            next_player = self.switch_player(player)

        game_over = self.check_game_over()

        return next_player, game_over ,capture_amnt_p1,capture_amnt_p2,go_again_amnt,reward


    def turn_table(self,pockets):
        return pockets[7:]+pockets[:7]
    
    def show_end_game_alert(self,winner):
        """
        Render mode human iken oyunun bitmesi durumunda kimin kazandıgını gösteren ekran ve sistemin kapatılması
        """
#         if(self.render_mode == "human"):
#             pygame.quit()  
#             pygame.init()
            
#             screen = pygame.display.set_mode((400, 100))
#             pygame.display.set_caption("Mancala Game End")
#             screen.fill((255, 253, 208))
#             font = pygame.font.Font(None, 30)
#             text = font.render(f"Player {winner} wins! Press 'Esc' to exit.", True, "black")
#             screen.blit(text, (30, 30))
#             pygame.display.flip()    
#             waiting = True
#             while waiting:
#                 for event in pygame.event.get():
#                     if (event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE) or event.type == pygame.QUIT:
#                         pygame.quit()
#                         sys.exit()

    


    #GYM ENV

    def __init__(self):
        super(MangalaEnv, self).__init__()

        self.prev_mov1 = -1
        self.prev_mov2 = -1
        self.render_mode = "train"
        # Define action and observation space
        self.action_space = spaces.Discrete(6)  # 6 possible pockets
        self.observation_space = spaces.Box(low=0, high=48, shape=(14,), dtype=int)

        # Initialize the board
        self.pockets = self.initialize_board()
        self.player_turn = random.randint(1, 2)
        self.game_over = False
        
        self.capture_amount = 0
        self.capture_amount_p1 = 0
        self.capture_amount_p2 = 0
    def reset(self):
        # Reset the board

        self.pockets = self.initialize_board()
        self.player_turn = random.randint(1, 2)
        self.game_over = False
        self.capture_amount_p1 = 0
        self.capture_amount_p2 = 0
        self.go_again_amount = 0
        
#         if self.render_mode == 'human':
#             pygame.init()
#             self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
#             pygame.display.set_caption("Mancala Game")
#             self.clock = pygame.time.Clock()
#             self.font = pygame.font.Font(None, 36)
#             self.render()



        return self.get_observation()

    def step(self, action):
        reward = 0
        move = self.convert_move(action, self.player_turn)
        if not self.valid_move(move, self.player_turn):
            #print("INVALID MOVE")
            return self.get_observation(), 0, False, {}


        if(self.player_turn == 1):
            self.prev_mov1 = move+1
           #print("prev_mov1",self.prev_mov1)
        if(self.player_turn == 2):
            self.prev_mov2 = move-7+1
            #print("prev_mov2",self.prev_mov2)


        next_player, game_over,capture_amnt_p1,capture_amnt_p2,go_again_amnt,rwrd = self.simulate_move(move, self.player_turn)
        
        
        self.capture_amount_p1 += capture_amnt_p1
        self.capture_amount_p2 += capture_amnt_p2
        self.go_again_amount += go_again_amnt
        
        self.player_turn = next_player
        self.game_over = game_over
        reward += rwrd
        if game_over:
            winner = self.determine_winner()
            if winner == "Player 1":
                self.show_end_game_alert("1")
                reward += 10
                
                #print("Player 1 Win"+"n"+"n"*5)
            elif winner == "Player 2":
                self.show_end_game_alert("2")
                reward += -10
                #print("Player 2 Win"+"n"+"n"*5)

        if self.render_mode == 'human':
            self.render(move=move)

        return self.get_observation(), reward, game_over, {}

    def render(self,move="none"):
        pass
#         if(self.render_mode == "human"):
#             #print("prev1->",self.prev_mov1,"prev2->",self.prev_mov2)
#                      # array to pocket res move 
#             upper_pockets = self.pockets[:6]
#             lower_pockets = self.pockets[7:-1]
    
#             upper_pockets_marker = [6, 5, 4, 3, 2, 1]
#             lower_pockets_marker = [1, 2, 3, 4, 5, 6]
#             mancala_1 = self.pockets[-1]
#             mancala_2 = self.pockets[6]
#             self.screen.fill("white")
#             # Oyuncu turn yazısı
#             turn_text = self.font.render("Player {}'s Turn".format(self.player_turn), True, "black")
#             self.screen.blit(turn_text, (10, 10))
#             # Tahta çizimi
#             pygame.draw.rect(self.screen, "black", (50, 50, 700, 500), 2)
#             # Pockets çizimi
            
#             color1,color2 = "black","black"
#             for i in range(6):
#                 if(i == self.prev_mov1-1):
#                     color1 = "blue"
#                 else:
#                     color1 = "black"
#                 if(i == self.prev_mov2-1):
#                     color2 = "red"
#                 else:
#                     color2 = "black"
                    
#                 pygame.draw.rect(self.screen, "black", (100 + i * 100, 100, 100, 100), 2)
#                 pygame.draw.rect(self.screen, "black", (100 + i * 100, 300, 100, 100), 2)
#                 pygame.draw.rect(self.screen, "black", (100 + i * 100, 100, 100, 200), 2)
#                 upper_stones_marker = self.font.render(str(upper_pockets_marker[i]), True, "gray")
#                 lower_stones_marker = self.font.render(str(lower_pockets_marker[i]), True, "gray")
#                 self.screen.blit(upper_stones_marker, (140 + i * 100, 80))
#                 self.screen.blit(lower_stones_marker, (140 + i * 100, 400))
#                 upper_stones = self.font.render(str(upper_pockets[i]), True, color1)
#                 lower_stones = self.font.render(str(lower_pockets[i]), True, color2)
#                 self.screen.blit(upper_stones, (640 - i * 100, 140))
#                 self.screen.blit(lower_stones, (140 + i * 100, 340))
#             # Mancala çizimi
#             mancala_1_text = self.font.render(str(mancala_1), True, "black")
#             mancala_2_text = self.font.render(str(mancala_2), True, "black")
#             self.screen.blit(mancala_1_text, (730, 250))
#             self.screen.blit(mancala_2_text, (55, 250))
#             pygame.display.flip()
            

    def close(self):
        pass
#         if(self.render_mode == "human"):
#              pygame.quit()
#              sys.exit()


    def get_observation(self):
        return self.pockets


 #3.29

In [None]:
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class DQN(nn.Module):

    def __init__(self, n_observations, n_actions):
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)

env = MangalaEnv()
    
input_size = len(env.get_observation())
output_size = env.action_space.n

models = []
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        models.extend([os.path.join(dirname, filename)])

#9 model var
import pandas as pd 

df = pd.DataFrame(models,columns=["model_name"])
df["score"] = 0

df["name_sort"]= df.model_name.apply(lambda x :x[40:])
import random
x1 = df.model_name.values
x2 = df.model_name.values
    
for i in range(0,1000000):
    selection_1 = 0
    selection_2 = 0

    selection_1 = random.choice(x1)
    selection_2 = random.choice(x2)

   
        
    loaded_model = DQN(input_size, output_size)

    loaded_model.load_state_dict(torch.load(selection_1)) 
    loaded_model.eval() 

    loaded_model2 = DQN(input_size, output_size)
    loaded_model2.load_state_dict(torch.load(selection_2)) 
    loaded_model2.eval()

    state = torch.tensor([env.reset()], dtype=torch.float32)
    while True:    
        if(env.player_turn == 1):
            raw_act = loaded_model(state) 
            enum_raw_act = list(enumerate(raw_act[0].detach().numpy()))
            sorted_enum_raw_act = list(reversed(sorted(enum_raw_act, key=lambda x: x[1])))
            action2 = "none"
            state_np = state.detach().numpy()[0]
            enum_raw_act = list(enumerate(raw_act[0].detach().numpy()))
            sorted_enum_raw_act = list(reversed(sorted(enum_raw_act, key=lambda x: x[1])))

            action = "none"
            state_np = state.detach().numpy()[0]
            state_pos = 0
            for i,_ in sorted_enum_raw_act:
                state_pos = i
                if(state_np[state_pos] != 0):
                    action = i
                    break        
            next_state, reward, done, _ = env.step(action)
            state = torch.tensor([next_state], dtype=torch.float32)
            #print("observation->",state,"action->",action,"reward->",reward)

        else:
            raw_act = loaded_model2(torch.tensor([env.turn_table(env.get_observation())], dtype=torch.float32)) 
            enum_raw_act = list(enumerate(raw_act[0].detach().numpy()))
            sorted_enum_raw_act = list(reversed(sorted(enum_raw_act, key=lambda x: x[1])))
            action2 = "none"
            state_np = state.detach().numpy()[0]
            enum_raw_act = list(enumerate(raw_act[0].detach().numpy()))
            sorted_enum_raw_act = list(reversed(sorted(enum_raw_act, key=lambda x: x[1])))

            action = "none"
            state_np = state.detach().numpy()[0]
            state_pos = 0
            for i,_ in sorted_enum_raw_act:
                state_pos = i+7
                if(state_np[state_pos] != 0):
                    action = i
                    break        

            next_state, reward, done, _ = env.step(action)
            state = torch.tensor([next_state], dtype=torch.float32)
            #print("observation->",state,env.turn_table(env.get_observation()),"action->",action,"reward->",reward)
        if done:
            #print("done")
            state = env.get_observation()
            if(state[13]> state[6]):
                df.loc[df.model_name==selection_2,["score"]] += 3
                df.loc[df.model_name==selection_1,["score"]] -= 3
            elif(state[13]<state[6]):
                df.loc[df.model_name==selection_1,["score"]] += 3
                df.loc[df.model_name==selection_2,["score"]] -= 3
            break
    if(i%500 == 0):
        print(df.iloc[:,1:].sort_values(by=["score"],ascending=False)[:5])

print(f"Total Test Reward: {total_reward}")




    score                            name_sort
11      6   /260000-trained_model-17-15_18.pth
13      3   /180000-trained_model-17-17_03.pth
23      3  /1670000-trained_model-17-00_11.pth
3       3   /230000-trained_model-13-12_37.pth
22      3   /830000-trained_model-14-04_41.pth
    score                           name_sort
14      9  /160000-trained_model-17-16_12.pth
3       6  /230000-trained_model-13-12_37.pth
22      6  /830000-trained_model-14-04_41.pth
16      6  /340000-trained_model-17-17_08.pth
11      6  /260000-trained_model-17-15_18.pth
    score                           name_sort
3      18  /230000-trained_model-13-12_37.pth
16     18  /340000-trained_model-17-17_08.pth
14     15  /160000-trained_model-17-16_12.pth
10     12  /130000-trained_model-13-09_42.pth
0       6  /330000-trained_model-13-16_13.pth
    score                           name_sort
14     30  /160000-trained_model-17-16_12.pth
16     21  /340000-trained_model-17-17_08.pth
3      18  /230000-trained_m

In [None]:
# 4   130000-trained_model-13-09_42.pth    918
# 5   130000-trained_model-17-00_44.pth    897
# 8   160000-trained_model-17-16_12.pth    771
# 10  230000-trained_model-13-12_37.pth    738
# 14  330000-trained_model-13-16_13.pth    468

In [None]:
x = np.array([0,0,0,0,0])
x

/kaggle/input/mangala-models-firstnewdqn/330000-trained_model-13-16_13.pth
/kaggle/input/mangala-models-firstnewdqn/130000-trained_model-17-00_44.pth
/kaggle/input/mangala-models-firstnewdqn/410000-trained_model-17-07_25.pth
/kaggle/input/mangala-models-firstnewdqn/230000-trained_model-13-12_37.pth
/kaggle/input/mangala-models-firstnewdqn/30000-trained_model-13-07_19.pth
/kaggle/input/mangala-models-firstnewdqn/140000-trained_model-17-15_07.pth
/kaggle/input/mangala-models-firstnewdqn/630000-trained_model-13-23_37.pth
/kaggle/input/mangala-models-firstnewdqn/1010000-trained_model-14-10_57.pth
/kaggle/input/mangala-models-firstnewdqn/320000-trained_model-17-16_41.pth
/kaggle/input/mangala-models-firstnewdqn/730000-trained_model-14-01_40.pth
/kaggle/input/mangala-models-firstnewdqn/130000-trained_model-13-09_42.pth
/kaggle/input/mangala-models-firstnewdqn/260000-trained_model-17-15_18.pth
/kaggle/input/mangala-models-firstnewdqn/530000-trained_model-13-21_08.pth
/kaggle/input/mangala-mod