# Dependencies

In [5]:
import pandas as pd
import os
import json
from itertools import permutations

# 物件設定

In [6]:
def input_preprocess(path):
    # load input 
    input_state = []
    if os.path.exists(path):
        with open(path) as f:
            for line in f.readlines():
                input_state.append(line.strip('\n'))
    else:
        print('Input path does not exist.')
    # turn txt into a dict with correspond to columns in path table
    labels = ['start', 'end']
    content = list(map(lambda ele: int(ele), input_state[0:2]))
    try:
        for i in range(2, len(input_state), 2):    
            labels.append('vehicle' + str(i//2 + 1))
            content.append(int(input_state[i]))
            labels.append('path' + str(i//2 + 1))
            content.append(json.loads(input_state[i + 1]))
        input_state = dict(zip(labels, content))
        return input_state
    
    except ValueError:
        raise ValueError('Incorrect input format.') 

def compare_paths(path1, path2):
    # path1: from input, path2: from table
    if not path1:
        if not path2:
            return 1
        else:
            return 0
    else:
        if path2:

            edges1 = set(zip(path1[:-1], path1[1:]))
            edges2 = set(zip(path2[:-1], path2[1:]))
            intersection = edges1.intersection(edges2)
            # the similarity with comparison of path 1 
            # set operations are comparing edges
            # paths are in nodes, changed into edges by subtracting 1
            score = len(intersection)/(len(path1)-1)

            # penalty: the undesired edges in path2
            # proportion to the edges in path1 and path2
            diff = edges2.difference(intersection)
            score -= len(diff)/(len(path2)-1)/(len(path1)-1)
            return score
        else:
            return 0

class State_Compare(object):

    def __init__(self):
        self.__full_table_list = None # became a list, for we need to read in chunks
        self.__df_dtype = None
        self.__preprocessed = None
        self.__ncars = None
        self.__input_state = None
        self.__subset = None
        self.__rows_high_score = None
        self.__high_score_and_reward = None
        
    def read_table(self, path, output=False):
        if os.path.exists(path):

            chunk_size = 50000
            df_list = []
            if '.csv' in path:
                with pd.read_csv(path, chunksize=chunk_size) as reader:
                    for chunk in reader:
                        df_list.append(chunk)
                self.__full_table_list = df_list
                self.__df_dtype = 'csv'
            elif '.json' in path:
                with pd.read_json(path, lines=True, orient='records', chunksize=chunk_size) as reader:
                    for chunk in reader:
                        df_list.append(chunk)
                self.__full_table_list = df_list
                self.__df_dtype = 'json'
                self.__preprocessed = True
            if output:
                return pd.concat(df_list, ignore_index=True)
        else:
            print('Path Table does not exist')    

    def set_ncars(self, ncars):
        self.__ncars = ncars

    def set_input(self, input_state):

        self.__input_state = input_state
        ncars = int((len(input_state)-2)/2 + 1)
        self.set_ncars(ncars)
    
    def preprocess(self, output=False):   
        if self.__preprocessed:
            # this may occur if read in table is of type json
            if output == True:
                return pd.concat(self.__full_table_list, ignore_index=True)
            return   
        # this is to convert list as string into list
        # if data is stroed as csv
        if all(getattr(self, attr) is not None for attr in ['_State_Compare__full_table', '_State_Compare__ncars']):
            ncars_table = int(len(self.__full_table_list[0].columns)/2)
            if self.__ncars == ncars_table:
                for chunk in self.__full_table_list:
                    for i in range(2, self.__ncars*2, 2):
                        chunk['path' + str(i//2 + 1)] = chunk.apply(lambda row: json.loads(row[i+1]), axis=1)
                self.__preprocessed = True
                if output == True:
                    return pd.concat(self.__full_table_list, ignore_index=True)
            else:
                raise ValueError(''.join(['Ncars parameter is not aligned with columns in given table. Ncars: ', 
                                        str(self.__ncars), '. Ncars in table: ', str(ncars_table), '.']))
        else:
            raise AttributeError('Attributes is not defined. Run read_table() first.')
        
    def filter_start_end(self, output=False):

        if all(getattr(self, attr) is not None for attr in ['_State_Compare__ncars', '_State_Compare__input_state', '_State_Compare__preprocessed']):
            subset_list = []
            for chunk in self.__full_table_list:
                subset_list.append(chunk[(chunk.start == self.__input_state['start']) & (chunk.end == self.__input_state['end'])])

            # by this step, it should not be necessary to read in chunks
            self.__subset = pd.concat(subset_list, ignore_index=True)
            if output:
                return self.__subset
        
        else:
            if self.__preprocessed is None:
                raise AttributeError('Attribute preprocessed is not True. Run preprocess() first.')
            else:
                raise AttributeError('Attributes is not defined. Run set_input() and read_table() first.')

    def path_most_similar(self, output=False):

        # path has direction property
        if all(getattr(self, attr) is not None for attr in ['_State_Compare__ncars', '_State_Compare__input_state', 
                                                            '_State_Compare__subset', '_State_Compare__preprocessed']):
            
           # cars to compare = ncars - 1
            perm = permutations(range(0, self.__ncars-1))
            perm_list = [list(p) for p in perm]
            len_perm = len(perm_list)

            path_cols  = []
            score_cols = []
            for i in range(1, self.__ncars):
                path_cols.append('path' + str(i+1))
                score_cols.append('path_score' + str(i))
            subset_path = self.__subset[path_cols]
            input_paths = [self.__input_state[key] for key in path_cols]

            scores = pd.DataFrame(columns = score_cols.append('path_avg_score'))
            for i in range(len_perm):
                current_scores = pd.DataFrame(columns=score_cols)
                for j in range(len(perm_list[i])):
                    # for each permutation, compute a score of sum of each input path with correspond to table path
                    current_scores['path_score' + str(j+1)] = subset_path.iloc[:, perm_list[i][j]].apply(lambda row: compare_paths(input_paths[j], row))
                current_scores['path_avg_score'] = current_scores.mean(axis=1)
                # extend the score df for each permutation
                if scores.empty:
                    scores = current_scores
                else:
                    scores = pd.concat([scores, current_scores])
            
            # find rows with max avg score in the score column
            max_score = scores[scores['path_avg_score'] == scores['path_avg_score'].max()]
            max_score = max_score.loc[max_score.index.unique()]
            max_score_idx = list(max_score.index)
            # find the correspond row in the original table
            rows_high_score = self.__subset[self.__subset.index.isin(max_score_idx)]
            rows_high_score = pd.concat([rows_high_score, max_score], axis=1)
            self.__rows_high_score = rows_high_score

            if output:
                return self.__rows_high_score
    
        else:
            if self.__preprocessed is None:
                raise AttributeError('Attribute preprocessed is not True. Run preprocess() first.')
            elif self.__subset is None:
                raise AttributeError('Attribute subset is not defined. Run filter_start_end() first.')
            else:
                raise AttributeError('Attributes is not defined. Run set_input() and read_table() first.')
            
    def highest_reward(self, output=False):

        if all(getattr(self, attr) is not None for attr in ['_State_Compare__rows_high_score']):

            # find rows with highest reward frim last step
            rows = self.__rows_high_score
            highest_reward = rows[rows['reward'] == rows['reward'].max()]           

            self.__high_score_and_reward = highest_reward

            if output:
                return self.__high_score_and_reward

# 使用者介面

## 讀取前面生成的表格資料

In [7]:
# prepare table file
# execute time is mostly spent on reading in table
table = State_Compare()
table.read_table(path='state_path_reward.json')
table.preprocess()

## 讀取 input、拿 input 比對資料後找出 reward 最高的主機路徑（self_path）

In [10]:
input_state = input_preprocess('input.txt')
table.set_input(input_state)
# compare
table.filter_start_end()
table.path_most_similar()
table.highest_reward(output=True)

Unnamed: 0,start,end,vehicle2,path2,vehicle3,path3,self_path,reward,path_score1,path_score2,path_avg_score
94567,1,3,1,"[3, 6, 5, 2, 1]",7,"[6, 5, 4, 7, 8, 9]","[1, 2, 3]",0.92623,0.866667,0.916667,0.891667
