# [不到100行代码实现一个简单的推荐系统](https://colobu.com/2015/04/16/recommandation-system-implement-in-100-lines/#more)

In [27]:
import math

In [28]:
def load_data(file_path):
    # load data from file ,save a data into a dic and return them
    # params:
    #   file_path: the file path to load the data
    #return:
    #   a dic contains data,shape like{user_1{movie_1:score,movie_2:score},user_2{movie_1:score,movie_2:score}}

    file = open(file_path)
    columns = file.readline().strip("\n").split(',')

    score_dic = {}
    for line in file:
        scores = line.split(',')
        object_score = {}
        for i in range(len(scores))[1:]:
            object_score[(columns[i])] = scores[i].strip("\n")
        score_dic[(scores[0])]= object_score

    return score_dic

In [29]:
def sim_distance(score_dic, ori_object, object):
    # compute a distance-based similarity score for ori_object and object
    # params:
    #   score_dic: the dic contains object and their score
    #   ori_object: the object to get the similar one
    #   object: the obeject to compared with the ori_obeject
    # return:
    #   the similarity between ori_obejct and obejct

    # get same items
    same_item = {}
    for item in score_dic[ori_object]:
        if score_dic[ori_object][item]:
            if item in score_dic[object] and score_dic[object][item]:
                same_item[item] = 1

    #euclidean distance
    if len(same_item) == 0:
        return 0

    sum_of_squares = sum(pow(float(score_dic[ori_object][item]) - float(score_dic[object][item]), 2) for item in same_item)

    sim_score = 1.0/(1.0 + math.sqrt(sum_of_squares))

    return sim_score



In [33]:
def matrix_transform(score_dic):
    # transfer the rows and columns in score_dic
    # params:
    #   score_dic: the dic contains object and their score
    # return:
    #   the transfered dic of score_dic
    trans_score_dic = {}

    users = score_dic.keys()
    movies = []
    for i, user in enumerate(users):
        if i == 0:
            movies = score_dic[user].keys()

    for movie in movies:
        movie_score = {}
        for user in users:
           movie_score[user] = score_dic[user][movie]
        trans_score_dic[movie] = movie_score

    return trans_score_dic


In [34]:
def top_matches(score_dic, ori_object, similarity = sim_distance):
    # find the similarity between objects in score_dic and ori_objects
    # params:
    #   score_dic: the dic contains object and their score
    #   ori_object: the object to get the similar one
    #   similarity: the function to get the similarity
    # return:
    #   the similarity score of obejcts in score_dic
    
     objects = score_dic.keys()
     scores = [(similarity(score_dic, ori_object, object), object) for object in objects]
     scores.sort()
     scores.reverse()
     return scores


In [35]:
if __name__ == "__main__":
    file_path = "train.csv"
    score_dic = load_data(file_path)
    for item in score_dic.keys():
        print("{\'"+item+"\':"+str(score_dic[item])+"}")

    #user similarity
    print("user similarity:")
    person = "Kai Zhou"
    for item in top_matches(score_dic, person):
        print(str(item))

{'Kai Zhou':{'Friends': '4', 'Bedtime Stories': '3', 'Dawn of the Planet of the Apes': '5', 'RoboCop': '', 'Fargo': '1', 'Cougar Town': '2'}}
{'Shuai Ge':{'Friends': '', 'Bedtime Stories': '3.5', 'Dawn of the Planet of the Apes': '3', 'RoboCop': '4', 'Fargo': '2.5', 'Cougar Town': '4.5'}}
{'Mei Nv':{'Friends': '3', 'Bedtime Stories': '4', 'Dawn of the Planet of the Apes': '2', 'RoboCop': '3', 'Fargo': '2', 'Cougar Town': '3'}}
{'xiaoxianrou':{'Friends': '2.5', 'Bedtime Stories': '3.5', 'Dawn of the Planet of the Apes': '3', 'RoboCop': '3.5', 'Fargo': '2.5', 'Cougar Town': '3'}}
{'fengzhi':{'Friends': '3', 'Bedtime Stories': '4', 'Dawn of the Planet of the Apes': '', 'RoboCop': '5', 'Fargo': '3.5', 'Cougar Town': '3'}}
{'meinv':{'Friends': '', 'Bedtime Stories': '4.5', 'Dawn of the Planet of the Apes': '', 'RoboCop': '4', 'Fargo': '1', 'Cougar Town': ''}}
{'mincat':{'Friends': '3', 'Bedtime Stories': '3.5', 'Dawn of the Planet of the Apes': '1.5', 'RoboCop': '5', 'Fargo': '3.5', 'Cougar

In [37]:
def get_recommendation(score_dic, ori_object, similarity = sim_distance):
    # find the item to be recommeded to the ori_object
    # params:
    #   score_dic: the dic contains object and their score
    #   ori_object: the object to get the similar one
    #   similarity: the function to get the similarity
    # return:
    #   the scores of the recommedations

    # get the rows and columns
    rows = score_dic.keys()
    columns = []
    for i, row in enumerate(rows):
        if i == 0:
            columns = score_dic[row].keys()

    sum_of_column_sim = {}
    sum_of_column = {}

    for row in rows:
        if row == ori_object:
            # just compare object which is distinct with ori_obejct
            continue
        #get the similarity score between ori_object and row
        sim = similarity(score_dic, ori_object, row)
        if sim <= 0:
            continue

        for column in columns:
            if score_dic[row][column] == "":
                # just compare the column of the row which has score
                continue

            # get the similarity of each column
            sum_of_column_sim.setdefault(column, 0)
            sum_of_column_sim[column] += sim
            # get the similarity * score of each column
            sum_of_column.setdefault(column, 0)
            sum_of_column[column] += float(score_dic[row][column])* sim

    scores = [(sum_of_column[column] / sum_of_column_sim[column], column) for column in columns]
    scores.sort()
    scores.reverse()
    return scores


In [38]:
if __name__ == "__main__":
    file_path = "train.csv"
    score_dic = load_data(file_path)
    for item in score_dic.keys():
        print("{\'"+item+"\':"+str(score_dic[item])+"}")
    movie = "Friends"
    trans_score_dic = matrix_transform(score_dic)
    # find person
    print("find the person who likes "+ movie)
    people_score = get_recommendation(trans_score_dic, movie)
    for item in people_score:
        print(str(item))


{'Kai Zhou':{'Friends': '4', 'Bedtime Stories': '3', 'Dawn of the Planet of the Apes': '5', 'RoboCop': '', 'Fargo': '1', 'Cougar Town': '2'}}
{'Shuai Ge':{'Friends': '', 'Bedtime Stories': '3.5', 'Dawn of the Planet of the Apes': '3', 'RoboCop': '4', 'Fargo': '2.5', 'Cougar Town': '4.5'}}
{'Mei Nv':{'Friends': '3', 'Bedtime Stories': '4', 'Dawn of the Planet of the Apes': '2', 'RoboCop': '3', 'Fargo': '2', 'Cougar Town': '3'}}
{'xiaoxianrou':{'Friends': '2.5', 'Bedtime Stories': '3.5', 'Dawn of the Planet of the Apes': '3', 'RoboCop': '3.5', 'Fargo': '2.5', 'Cougar Town': '3'}}
{'fengzhi':{'Friends': '3', 'Bedtime Stories': '4', 'Dawn of the Planet of the Apes': '', 'RoboCop': '5', 'Fargo': '3.5', 'Cougar Town': '3'}}
{'meinv':{'Friends': '', 'Bedtime Stories': '4.5', 'Dawn of the Planet of the Apes': '', 'RoboCop': '4', 'Fargo': '1', 'Cougar Town': ''}}
{'mincat':{'Friends': '3', 'Bedtime Stories': '3.5', 'Dawn of the Planet of the Apes': '1.5', 'RoboCop': '5', 'Fargo': '3.5', 'Cougar