In [1]:
import numpy as np
import random
from tqdm.notebook import tqdm
# import gym

In [2]:
%run tetris-field.ipynb
%run tetris-bag.ipynb

In [4]:
SCORING = {
    'NORMAL': {
        0: 0,
        1: 50,
        2: 150,
        3: 300,
        4: 800
    },
    
    'TSPIN': {
        0: 50,
        1: 600,
        2: 1200,
        3: 1600,
        4: 2600
    },
    
    'BACKTOBACK_MULTIPLIER': 1.5,
    'COMBO': 50,
    'ALL_CLEAR': 3500,
    
    'GAME_OVER': -5000
}

In [90]:
class TetrisEnv():
    def __init__(self):
        self.field = Field()
        self.bag = Bag()
        
        self.current_piece = self.bag.next_piece()
        
        self.current_combo = 1
        self.current_btb = 1
        
        # game statistics
        self.score = 0
        self.moves = 0
        self.clears = [0, 0, 0, 0]
        self.tspins = 0
        self.all_clears = 0
        
        
    def get_next_states(self):
        '''Returns all possible follow states by placing the current piece at (nearly) all possible positions.'''
        next_states, next_clears = self.field.get_follow_states(Piece(self.current_piece))
        # current_states = [self.field.get_current_state() for _ in range(len(next_states))]
        scores = [SCORING['GAME_OVER'] if clear is None else self._calculate_score(*clear, self.current_combo, self.current_btb) for clear in next_clears]
        dones = [clear is None for clear in next_clears]
        
        
        return (next_states, scores, next_clears, dones)
    
    def get_current_state(self):
        return self.field.get_current_state()
    
    def get_next_queue(self, length=5):
        return self.bag.peek_pieces(length)
    
    
    def step(self, next_state, next_clear, next_score):
        self.field.field_data = next_state
        self.current_piece = self.bag.next_piece()
        
        cleared_rows = next_clear[0]
        is_tspin = next_clear[1]
        is_all_clear = next_clear[2]
        
            
        # update back-to-back
        if is_tspin or cleared_rows == 4:
            self.current_btb += 1
        elif cleared_rows > 0:
            self.current_btb = 1
        
        # update combo
        if cleared_rows > 0:
            self.current_combo += 1
            self.clears[cleared_rows - 1] += 1
        else:
            self.current_combo = 1
            
        # update this run's score
        self.score += next_score
        
        if is_tspin:
            self.tspins += 1
        if is_all_clear:
            self.all_clears += 1
            
        self.moves += 1
        
        # print('==== DOING STEP =====')
        # self.field._print_board(next_state)
        # print(next_clear)
        # print()
    
    def _calculate_score(self, cleared_lines, t_spin, all_clear, combo, btb):
        """Calculates the score by considering the number of cleared lines, whether a t-spin was performed and the current combo and back-to-back streaks."""
        # get base score
        score = SCORING['TSPIN' if t_spin else 'NORMAL'][cleared_lines]

        if cleared_lines > 0 and btb > 1:
            score *= SCORING['BACKTOBACK_MULTIPLIER'] * (btb / 2)

        if combo > 1:
            score += SCORING['COMBO'] * (combo - 1)

        if all_clear:
            score += SCORING['ALL_CLEAR']

        return score
    
    def _print_state(self, s):
        self.field._print_board(s)
    def _print_current_state(self):
        self._print_state(self.get_current_state())
    
    
    def observation(self):
        pass
        
    def reset(self):
        self.field.reset()
        self.bag.reset()
        
        self.current_piece = self.bag.next_piece()
        
        self.current_combo = 1
        self.current_btb = 1
        
        self.score = 0
        self.moves = 0
        self.clears = [0, 0, 0, 0]
        self.tspins = 0
        self.all_clears = 0
    
    def close(self):
        pass
        
    def render(self, mode=None):
        pass
        

In [88]:
# env = TetrisEnv()

# print('Current piece:', env.current_piece)
# env._print_current_state()

# scored_points = False
# counter = 0
# scs = []

# with tqdm(total=100000) as pbar:
#     while not scored_points:
#         for i in range(40):
#             states, scores, clears, dones = env.get_next_states()

#             chosen_index = random.randint(0, len(states) - 1)

#             if dones[chosen_index]:
#                 if counter % 5 == 1:
#                     print(f'Score: {env.score}, #: {len(scs)}, min: {np.min(scs)}, max: {np.max(scs)}, avg: {np.average(scs)}')
                
#                 # clear_output(wait=True)
#                 # print(env.score)
#                 scs.append(env.score)
#                 env.reset()
#                 break
#             else:
#                 env.step(states[chosen_index], clears[chosen_index], scores[chosen_index])
            
#             if clears[chosen_index][0] > 1:
#                 scored_points = True
#                 print('YOOOOO', clears[chosen_index][0])
#                 break

           
#         counter += 1
#         pbar.update(1)

    
# print('took ', counter)

Current piece: 1
          
          
          
          
          
          
          
          
          
          
          
          
          
          
          
          
          
          
          
          


  0%|          | 0/100000 [00:00<?, ?it/s]

Score: 0, #: 1, min: 0, max: 0, avg: 0.0
Score: 0, #: 6, min: 0, max: 0, avg: 0.0
Score: 0, #: 11, min: 0, max: 0, avg: 0.0
Score: 0, #: 16, min: 0, max: 0, avg: 0.0
Score: 0, #: 21, min: 0, max: 0, avg: 0.0
Score: 0, #: 26, min: 0, max: 0, avg: 0.0
Score: 0, #: 31, min: 0, max: 0, avg: 0.0
Score: 0, #: 36, min: 0, max: 0, avg: 0.0
Score: 0, #: 41, min: 0, max: 0, avg: 0.0
Score: 0, #: 46, min: 0, max: 100, avg: 2.1739130434782608
Score: 0, #: 51, min: 0, max: 100, avg: 1.9607843137254901
Score: 0, #: 56, min: 0, max: 100, avg: 1.7857142857142858
Score: 0, #: 61, min: 0, max: 100, avg: 1.639344262295082
Score: 0, #: 66, min: 0, max: 100, avg: 3.0303030303030303
Score: 0, #: 71, min: 0, max: 100, avg: 2.816901408450704
Score: 0, #: 76, min: 0, max: 100, avg: 2.6315789473684212
Score: 400, #: 81, min: 0, max: 100, avg: 2.4691358024691357
Score: 0, #: 86, min: 0, max: 400, avg: 6.976744186046512
Score: 0, #: 91, min: 0, max: 400, avg: 6.593406593406593
Score: 0, #: 96, min: 0, max: 400, a

KeyboardInterrupt: 