# Meaningful Play Score Assigner V1
Accepts a topology of choices in the form of several weighted adjacency matrices, each with a 1 for any viable connection, and a 999 for the desired ending. Then, it performs q-learning based calculations and outputs a score of meaningfulness based on the average of the pairwise weighted minkowski distances.

In [185]:
from math import *
from decimal import Decimal
import numpy as np
import xlsxwriter

### Define Q-Learning Class

Define the functions for q-learning and for outputting the optimal path and q-tables.

In [186]:
class QAgent():
    
    def __init__(self, alpha, gamma, location_to_state, rewards, state_to_location, Q):
        """ Initialize alpha, gamma, states, actions, rewards, and Q-values
        """
        self.gamma = gamma  
        self.alpha = alpha 
        
        self.location_to_state = location_to_state
        self.rewards = rewards
        self.state_to_location = state_to_location
        
        self.Q = Q
        
    def training(self, start_location, end_location, iterations):
        """Training the system in the given environment to move from a start state to an end state
        """
        rewards_new = np.copy(self.rewards)
        
        #set reward for end state to 999 to incentivize reaching desired end
        ending_state = self.location_to_state[end_location]
        rewards_new[ending_state, ending_state] = 999

        #Loop for iterations
        for i in range(iterations):
            #Randomly pick a state to observe
            current_state = np.random.randint(0,len(self.rewards)) 
            playable_actions = []

            #Construct list of possible actions
            for j in range(len(self.rewards)):
                if rewards_new[current_state,j] > 0:
                    playable_actions.append(j)

            #Only run updates if observed state has performable actions
            if len(playable_actions) > 0:
                next_state = np.random.choice(playable_actions)

                #Calculate temporal difference
                TD = rewards_new[current_state,next_state] + \
                        self.gamma * self.Q[next_state, np.argmax(self.Q[next_state,])] - self.Q[current_state,next_state]

                #updates Q-value using Bellman equation
                self.Q[current_state,next_state] += self.alpha * TD

        route = [start_location]
        next_location = start_location
        
        # Get the route 
        return self.get_optimal_route(start_location, end_location, next_location, route, self.Q)
        
    # Get the optimal route
    def get_optimal_route(self, start_location, end_location, next_location, route, Q):
        
        while(next_location != end_location):
            starting_state = self.location_to_state[start_location]
            next_state = np.argmax(Q[starting_state,])
            next_location = self.state_to_location[next_state]
            route.append(next_location)
            start_location = next_location
        
        return route

### Q-Learning

Get the input and then execute q-learning algorithm to get q-tables

In [187]:
#topology txt file to score
filename = input("Please input the name of the topology .txt file you want to score: ")
layers = input("Please input the number of non-ending layers your topology has: ")
endingCount = int(input("Please input number of endings in your topology: "))

#get topology from file
with open(filename) as textFile:
    rewards = np.array([[int(digit) for digit in line.strip().split(",")] for line in textFile])
    
#list of averaged q-tables to take minkowski differences of
averaged_tables = []

# Define the states
location_to_state = {
    'Start' : 0,
    '1D' : 1,
    '1N1' : 2,
    '1N2' : 3,
    '1C' : 4,
    '2D' : 5,
    '2N1' : 6,
    '2N2' : 7,
    '2C' : 8,
    '3D' : 9,
    '3N1' : 10,
    '3N2' : 11,
    '3C' : 12,
    '4D' : 13,
    '4N1' : 14,
    '4N2' : 15,
    '4C' : 16,
    'E1' : 17,
    'E2' : 18,
    'E3' : 19,
    'E4' : 20
}

# Map indices to locations
state_to_location = dict((state,location) for location,state in location_to_state.items())

# Initialize parameters
gamma = 0.75 # Discount factor (discounts previous rewards)
alpha = 0.9 # Learning rate

#generates excel spreadsheet containing all q-tables in a given path
def to_excel(paths_taken, qtables, final_state):
    """store data in excel
    """
    workbook = xlsxwriter.Workbook(filename + final_state + '.xlsx')
    worksheet = workbook.add_worksheet()

    #write all paths taken into first worksheet
    col = 0
    for row, data in enumerate(paths_taken):
        worksheet.write_row(row, col, data)

    #write each q-table to another worksheet
    for table, data in enumerate(qtables):
        worksheet = workbook.add_worksheet()
        for row, data2 in enumerate(qtables[table]):
            worksheet.write_row(row, col, data2)

    workbook.close()
    
#Take set of q-tables and average them into one q-table
def qaverage(table_set):
    num = 0
    output_table = table_set[0].copy()
    for i in range(len(table_set[0][0])):
        for j in range(len(table_set[0])):
            for k in range(len(table_set)):
                num += table_set[k][j][i]
            output_table[j][i] = num / len(table_set)
            num = 0
            
    return output_table

#Handle all q-learning for a given topology
def qmaster(final_state):
    #array to store the final optimal path of each 1000 iterations
    paths_taken = []
    #array to store the final Q-Table of each 1000 iterations
    qtables = []
    for i in range(100):
      qagent = QAgent(alpha, gamma, location_to_state, rewards,  state_to_location, 
                      np.array(np.zeros([len(location_to_state),len(location_to_state)])))
      paths_taken.append(qagent.training('Start', final_state, 1000))
      qtables.append(qagent.Q)

    #output the current run to an excel file
    to_excel(paths_taken, qtables, final_state)
    averaged_tables.append(qaverage(qtables))


#run qmaster for each ending
for i in range(endingCount):
    qmaster('E' + str(i + 1))
    
print(averaged_tables)

Please input the name of the topology .txt file you want to score: NoIntegrated.txt
Please input the number of non-ending layers your topology has: 5
Please input number of endings in your topology: 4
[array([[0.00000000e+00, 9.50438544e+02, 9.50470595e+02, 9.49974179e+02,
        9.50439638e+02, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 1.26558084e+03, 1.26606058e+03, 1.26670819e+03,
        1.26608286e+03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00],
       [0.00000000e+00, 0.00

### Pairwise Minkowski Difference

Now that the averaged q-tables are stored, we can calculate the minkowski differences in a pairwise fashion, average them up, and then spit them out.

In [188]:
#altered from
#https://www.geeksforgeeks.org/minkowski-distance-python/

#convert array into 1D vector for ease of manipulation
def vectorize(input_array):
    output_array = []
    for i in range(len(input_array)):
        for j in range(len(input_array)):
            output_array.append(input_array[i][j])
            
    return output_array

def weight_calculator(layers):
    #get slope
    slope = 1 / (layers * layers)
    #get sum
    sum = 0
    for i in range(layers):
        sum += (i * slope)
    
    #get amount to add to equal 1
    toAdd = (1 - sum) / layers
    
    #Finally, set up and return array of weights
    weights = []
    for i in range(layers):
        weights.append((i * slope) + toAdd)
        
    #print(weights)
    return weights

#This array exists to keep a running tally of all the x-indices that have been visited when applying weights
visited = []

#Apply weighting function to give high score to early states
def apply_weights_helper(array, layers):
    global visited
    visited = []
    weights = weight_calculator(int(layers))
    weights.reverse()
    array = apply_weights(array, weights, 0, 0)
    return array
    
def apply_weights(array, weights, x, level):
    if(x not in visited):
        visited.append(x)
        for i in range(len(array)):
            #print("i: ",i)
            #print("x: ",x)
            if(x < (len(array) - endingCount)):
                array[x][i] = array[x][i] * weights[level]
            else:
                array[x][i] = 0
            if(array[x][i] > 0):
                array = apply_weights(array, weights, i, (level + 1))
                
    return array
            
#Calculate Minkowski distance between arrays
  
# Function distance between two points  
# and calculate distance value to given 
# root value(p is root value) 
def p_root(value, root):
    root_value = 1 / float(root)
    return round (Decimal(value) **
             Decimal(root_value), 3)
  
def minkowski_distance(x, y, p_value): 
    # pass the p_root function to calculate 
    # all the values of vector in parallel 
    return (p_root(sum(pow(abs(a-b), p_value) 
            for a, b in zip(x, y)), p_value))

#print(averaged_tables[1])

#vectorize averaged tables and apply weights
vectors = []
for i in range(len(averaged_tables)):
    averaged_tables[i] = apply_weights_helper(averaged_tables[i], layers)
    vectors.append(vectorize(averaged_tables[i]))

#print(averaged_tables[1])
    
#calculate minkowski distances in a pairwise fashion
distances = []
acc = 0
for i in range(len(vectors)):
    for j in range(i + 1, len(vectors)):
        distance = minkowski_distance(vectors[i], vectors[j], 1)
        distances.append(distance)
        acc += distance

print(acc / len(distances))
print(minkowski_distance(vectors[0], vectors[1], 1))

2885.060833333333333333333333
2881.697


In [189]:
print(averaged_tables[3])

[[0.00000000e+00 2.65993838e+02 2.65905816e+02 2.66025547e+02
  2.65927935e+02 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 3.03844112e+02 3.03848175e+02 3.03661721e+02
  3.03706482e+02 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 3.03406857e+02 3.03581293e+02 3.03974716e+02
  3.03901798e+02 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00

In [190]:
weights = weight_calculator(int(layers))
weights

[0.12, 0.16, 0.2, 0.24, 0.28]