# Get training data

In [1]:
import pandas as pd

### 1. Define a Board Object

In [2]:
import pandas as pd 

class Board:

    def __init__(self, x_range, y_range, filler):
        assert x_range == y_range
        self.x_range = x_range
        self.y_range = y_range
        self.filler = filler
        b = self.create_board()
        self.board = b
        
    def is_hit(self, x, y):
        return self.board.loc[y, x] == 'S'
        
    def print_board(self):
        for row in self.board.values():
            print (row)  

    def create_board(self):
        rt = []
        
        for x in range(self.y_range+1):
            new_row = []
            for y in range(self.x_range+1):
                new_row.append(self.filler)
            rt.append(new_row)
            
        df = pd.DataFrame(rt)
            
        index = []
        columns = []

        for i in range(10):
            base_x = 'x_{}'.format(i)
            base_y = 'y_{}'.format(i)
            index.append(base_y)
            columns.append(base_x)

        index.reverse()

        df.index = index
        df.columns = columns
        
        return df

    def get_board(self):
        return self.board
    
    def get_ship_dimentions(self, ship_type, x, y, direction):
        x2, y2 = x, y

        ships = {'carrier':5, 'battleship':4, 'cruiser':3, 'submarine':3, 'destroyer':2}
        assert ship_type in list(ships.keys())
        assert direction in ['up', 'right']

        length = ships[ship_type]

        if direction == 'up':
            y2 += length - 1
        elif direction == 'right':
            x2 += length - 1

        return x, x2, y, y2
    
    def add_ship(self, ship_type, x, y, direction ):
        '''
        Make sure this is done from a 0 index space
        '''
        
        board = self.board

        x1, x2, y1, y2 = self.get_ship_dimentions(ship_type, x, y, direction)

        up = (x1 == x2)

        # direction is up, select a column
        if up:
            rows = []
            col = 'x_{}'.format(x1)
            for y in range(y1, y2+1):
                rows.append('y_{}'.format(y))
            space = board.loc[rows, col]

        # direction is right, select a row
        elif y1 == y2:
            row = 'y_{}'.format(y1)
            col_1 = 'x_{}'.format(x1)
            col_2 = 'x_{}'.format(x2)
            space = board.loc[row, col_1:col_2]

        for each in space.values:
            if each != 0:
                b.print_board()
                return 'Please try a different spot! This one is taken.'

        if up:
            board.loc[rows, col] = 'S'

        else:

            board.loc[row, col_1:col_2] = 'S'
            
        self.board = board

        return 

In [62]:
class TrainingSet:
      
    def __init__(self, board):
        self.board = board
        
        # extracts features from the board 
        self.training_set = self.create_features(board.get_board().values)
        
        # labeles the newly created set based on labels in the acutal board
        self.create_training_set()
        
    def get_set(self):
        return self.training_set
    
    # updates training set within the object every time it is called
    def add_value(self, x, y, column):
        ts_df = self.training_set
        query = ts_df[ (ts_df[x] == 1) & (ts_df[y] == 1)]
        idx = int(query.index.values[0])
        ts_df.at[idx, column] = 1
        self.training_set = ts_df
        
#         return list(ts_df.iloc[idx, :].values)[:]
       
    def create_training_set(self):
        '''
        Walks through a board, finds the points where there would be a hit, and labels it. 
        '''
        b = self.board
        df = b.get_board()
                        
        for y, row in df.iterrows():
    
            for x, col in row.iteritems():

                # check x and y
                if b.is_hit(x, y):

                    self.add_value(x, y, column='Hit')

                else:

                    self.add_value(x, y, column = 'Miss')
                    
                break
                
        return 
        
    def create_features(self, board):
        '''
        Given a square board, generates features from the board. Returns a pandas data frame
        '''
        n_columns = len(board) + len(board[0])

        rt = []

        for y, row in enumerate(board):

            for x, col in enumerate(row):

                # add one extra column for the label 
                new_row = [0 for i in range(n_columns)]

                new_row[x] = 1
                new_row[y + len(board)] = 1

                rt.append(new_row)
                
        df = pd.DataFrame(rt)

        columns = []

        for c in range(n_columns):
            if c < len(board):
                str_base = "x_{}"
            else:
                str_base = 'y_{}'
                c -= len(board)
            columns.append(str_base.format(c))

        df = pd.DataFrame(rt, columns = columns)
        df.insert(0, 'Hit', [0 for i in range(df.shape[0])])
        df['Miss'] = 0

        return df


### Use the Board and Training Set classes to create a training set

In [16]:
b = Board(9,9,0)

b.add_ship('carrier', 5, 2, 'up')
b.add_ship('destroyer', 0,0,'right')
b.add_ship('submarine', 1,1,'up')
b.add_ship('cruiser', 8,0,'up')

board_df = b.get_board()

This is what your board is going to look like. You have 10 rows, 10 columns.

In [17]:
board_df.head()

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9
y_9,0,0,0,0,0,0,0,0,0,0
y_8,0,0,0,0,0,0,0,0,0,0
y_7,0,0,0,0,0,0,0,0,0,0
y_6,0,0,0,0,0,S,0,0,0,0
y_5,0,0,0,0,0,S,0,0,0,0


Now, you need to turn this board into a set of features, a training set. The class here is one way of doing this, we're considering each x-y coordinate in the board as a single datapoint. It has its own row. That point was either a hit, a miss, or not yet attempted. 

In [63]:
ts = TrainingSet(b)

In [64]:
training_df = ts.get_set()

In [65]:
training_df.head()

Unnamed: 0,Hit,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,...,y_1,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,Miss
0,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


This is a feature-ized representation of that model. Each point in the board has been converted to its own row. It has an X and Y coordinate from the game board, now represented as columns. The first column is the label: whether or not the row was a hit.

This is a single conversion of the board into feature vectors. Now, if you want a robust data set, you might want to to generate a large number of boards, with different ships loaded into these, and train a single model on that larger data set.

# Define a machine learning model

In [68]:
from sklearn import linear_model

X_train = training_df.drop('Hit', axis=1, inplace=False)
y_train = training_df['Hit']

clf = linear_model.LinearRegression()

clf.fit(X_train, y_train)

In [83]:
predictions = pd.DataFrame(clf.predict(X_train))

In [87]:
X_train['predicted_hit'] = predictions[0].round(3)

In [88]:
X_train.head()

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,...,y_2,y_3,y_4,y_5,y_6,y_7,y_8,y_9,Miss,predicted_hit
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-0.0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-0.0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-0.0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0


# Push the model to SageBuild

### Move the training data
1. Now, you need to upload your training data to an S3 bucket that SageBuild can read from. 

### Copy your ETL + training / inference code
1. Next, copy the code that you are using to transform your data and to train your model into:
    /ml-gameday-tutorial/containers/Layout/train.py

2. Last, copy the ETL + inference code that you need into
    /ml-gameday-tutorial/containers/Layout/host.py