From dd62adfe4e829a629566f17b776d8b58320dc608 Mon Sep 17 00:00:00 2001 From: Mark Melnic Date: Sat, 22 Aug 2020 13:18:06 +0200 Subject: [PATCH 1/2] Percentual proximity scoring algorithm added --- scalg.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 scalg.py diff --git a/scalg.py b/scalg.py new file mode 100644 index 00000000000..665a22d5584 --- /dev/null +++ b/scalg.py @@ -0,0 +1,74 @@ + +def score(source_data : list, weights : list, *args) -> list: + + ''' + int list - weights + possible values - 0 / 1 + 0 if lower values have higher weight in the data set + 1 if higher values have higher weight in the data set + ========== + Optional arguments: + str - "score_lists" + get a list with all the scores for each piece of data + + str - "scores" + get only the final scores for each data set + ''' + + # getting data + data_lists = [] + for item in source_data: + for i in range(len(item)): + try: + data_lists[i].append(float(item[i])) + except IndexError: + data_lists.append([]) + data_lists[i].append(float(item[i])) + + score_lists = [] + # calculating price score + for dlist, weight in zip(data_lists, weights): + mind = min(dlist) + maxd = max(dlist) + + score = [] + if weight == 0: + for item in dlist: + try: + score.append(1 - ((item - mind) / (maxd - mind))) + except ZeroDivisionError: + score.append(1) + + elif weight == 1: + for item in dlist: + try: + score.append((item - mind) / (maxd - mind)) + except ZeroDivisionError: + score.append(0) + + else: + raise ValueError("Invalid weight of %f provided" % (weight)) + + score_lists.append(score) + + # return score lists + if "score_lists" in args: + return score_lists + + # initialize final scores + final_scores = [0 for i in range(len(score_lists[0]))] + + # generate final scores + for i, slist in enumerate(score_lists): + for j, ele in enumerate(slist): + final_scores[j] = final_scores[j] + ele + + # return only scores + if "scores" in args: + return final_scores + + # append scores to source data + for i, ele in enumerate(final_scores): + source_data[i].append(ele) + + return source_data From cd816d171faf4b063e911d06939e8d32494c1d0d Mon Sep 17 00:00:00 2001 From: Mark Date: Sat, 22 Aug 2020 14:21:07 +0300 Subject: [PATCH 2/2] Update scalg.py --- scalg.py | 51 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/scalg.py b/scalg.py index 665a22d5584..a5d073d5e8d 100644 --- a/scalg.py +++ b/scalg.py @@ -1,18 +1,38 @@ +''' +developed by: markmelnic +original repo: https://github.com/markmelnic/Scoring-Algorithm -def score(source_data : list, weights : list, *args) -> list: +Analyse data using a range based percentual proximity algorithm +and calculate the linear maximum likelihood estimation. +The basic principle is that all values supplied will be broken +down to a range from 0 to 1 and each column's score will be added +up to get the total score. + +========== +Example for data of vehicles +price|mileage|registration_year +20k |60k |2012 +22k |50k |2011 +23k |90k |2015 +16k |210k |2010 + +We want the vehicle with the lowest price, +lowest mileage but newest registration year. +Thus the weights for each column are as follows: +[0, 0, 1] + +>>> procentual_proximity([[20, 60, 2012],[23, 90, 2015],[22, 50, 2011]], [0, 0, 1]) +[[20, 60, 2012, 2.0], [23, 90, 2015, 1.0], [22, 50, 2011, 1.3333333333333335]] +''' + + +def procentual_proximity(source_data : list, weights : list) -> list: ''' - int list - weights + weights - int list possible values - 0 / 1 0 if lower values have higher weight in the data set 1 if higher values have higher weight in the data set - ========== - Optional arguments: - str - "score_lists" - get a list with all the scores for each piece of data - - str - "scores" - get only the final scores for each data set ''' # getting data @@ -22,16 +42,18 @@ def score(source_data : list, weights : list, *args) -> list: try: data_lists[i].append(float(item[i])) except IndexError: + # generate corresponding number of lists data_lists.append([]) data_lists[i].append(float(item[i])) score_lists = [] - # calculating price score + # calculating each score for dlist, weight in zip(data_lists, weights): mind = min(dlist) maxd = max(dlist) score = [] + # for weight 0 score is 1 - actual score if weight == 0: for item in dlist: try: @@ -46,15 +68,12 @@ def score(source_data : list, weights : list, *args) -> list: except ZeroDivisionError: score.append(0) + # weight not 0 or 1 else: raise ValueError("Invalid weight of %f provided" % (weight)) score_lists.append(score) - # return score lists - if "score_lists" in args: - return score_lists - # initialize final scores final_scores = [0 for i in range(len(score_lists[0]))] @@ -63,10 +82,6 @@ def score(source_data : list, weights : list, *args) -> list: for j, ele in enumerate(slist): final_scores[j] = final_scores[j] + ele - # return only scores - if "scores" in args: - return final_scores - # append scores to source data for i, ele in enumerate(final_scores): source_data[i].append(ele)