In [None]:
pip install -qr requirements.txt

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from copy import deepcopy as copy

In [None]:
df = pd.read_csv('data/expert_opinions.csv')

experts_count = len(df)
experts_answers = {}
types = [int(i) for i in df.keys()]
options = set()
for i in range(experts_count):
    experts_answers.update({i: []})
    for k in df:
        answer = df[k][i]
        experts_answers[i].append(answer)

        options.add(answer)

print(f'Множество значений: {types}')
print(f'Термы: {options}')

In [None]:
for i in types:
    print(f'{i:<3} ', end='')
print()
print('----' * len(types))
for k in experts_answers:
    for i in options:
        for j in experts_answers[k]:
            if i == j:
                print('+   ', end='')
            else:
                print('    ', end='')
        print(i)
    print('----' * len(types))

In [None]:
membership_table = {j: [0 for i in range(len(types))] for j in options}
for k in experts_answers:
    for i in range(len(experts_answers[k])):
        answer = experts_answers[k][i]
        membership_table[answer][i] += 1

for i in types:
    print(f'{i:<5} ', end='')
print()
print('------' * len(types))
for k in membership_table:
    for i in membership_table[k]:
        print(f'{i:<5} ', end='')
    print()
    for i in membership_table[k]:
        print(f'{i / experts_count:<5} ', end='')
    print(k)
    print('------' * len(types))

for k in membership_table:
    plt.plot(
        types,
        [i / experts_count for i in membership_table[k]],
        label=k
    )
plt.legend()
plt.show()


In [None]:
category_tree = json.load(open('data/category_tree.json'))

def print_category_tree(Node=category_tree, prefix=''):
    level_size = len(Node)
    for category in Node:
        level_size -= 1

        if level_size == 0:
            angle = '└──'
            new_prefix = f'{prefix}   '
        else:
            angle = '├──'
            new_prefix = f'{prefix}│  '
        
        print(f'{prefix}{angle}{category}')
        print_category_tree(Node[category], new_prefix)


print_category_tree()

In [None]:
def find_categories_proximity(lhs, rhs):
    def find_category(category, Node=category_tree):
        assert Node is not None

        for i in Node:
            if category == i:
                return [category]
            else:
                in_sub_tree = find_category(category, Node[i])
                if in_sub_tree is not None:
                    return [i] + in_sub_tree
            
        return None
    
    lhs = find_category(lhs)
    rhs = find_category(rhs)

    for i in range(len(min(lhs, rhs))):
        if lhs[i] != rhs[i]:
            return 1 / (len(lhs) - i + len(rhs) - i + 1)
        
    return 1

def find_cals_proximity(lhs, rhs):
    return min(lhs, rhs) / max(lhs, rhs)

def find_fat_proximity(lhs, rhs):
    return min(lhs, rhs) / max(lhs, rhs)

def find_is_vegan_proximity(lhs, rhs):
    return int(lhs == rhs)

def find_raw_proximity(lhs, rhs):
    method = 'Без обработки'
    return int((rhs == method) == (lhs == method))

def find_taste_proximity(lhs, rhs):
    lhs = lhs.split(' ')
    rhs = rhs.split(' ')

    common = max(list(set(lhs).intersection(rhs)), [])

    return len(common) / (len(lhs) + len(rhs) - len(common))

def clac_proximity(df, func):
    matrix = []
    for i in df:
        matrix.append([])
        for j in df:
            matrix[-1].append(func(i, j))

    return matrix

def merge_matrix(lhs, rhs, *other):
    for i in range(len(lhs)):
        for j in range(len(lhs[i])):
            lhs[i][j] += rhs[i][j]
    
    if len(other) != 0:
        merge_matrix(lhs, *other)

    return lhs

def down_matrix(matrix, coef=0.5):
    matrix = copy(matrix)

    for i in range(len(matrix)):
        for j in range(len(matrix[i])):
            matrix[i][j] = matrix[i][j] * coef

    return matrix

In [None]:
df = pd.read_csv('data/dishes.csv')
taste = [i for i in df['Вкус']]
for i in range(len(taste)):
    taste[i] = taste[i].replace(';', ' ')
df['Вкус'] = taste
df

In [None]:
matrix = clac_proximity(df['Вкус'], find_taste_proximity)

plt.matshow(matrix, cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()

In [None]:
matrix_for_recomendation = merge_matrix(
    down_matrix(clac_proximity(df['Приготовления'], find_raw_proximity), coef=0.1),
    down_matrix(clac_proximity(df['Вегетарианское'], find_is_vegan_proximity), coef=0.1),
    down_matrix(clac_proximity(df['Категория'], find_categories_proximity), coef=0.3),
    down_matrix(clac_proximity(df['Жирность'], find_fat_proximity), coef=0.2),
    down_matrix(clac_proximity(df['Калорийность'], find_cals_proximity), coef=0.2),
    down_matrix(clac_proximity(df['Вкус'], find_taste_proximity), coef=0.1)
)

plt.matshow(matrix_for_recomendation, cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()

In [None]:
matrix = clac_proximity(df['Категория'], find_categories_proximity)

plt.matshow(matrix, cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()

In [None]:
def get_recomendation(likes, dislikes):
    def find(name):
        return df.loc[df['Название'] == name]

    recomendation = [0.5 for i in range(len(df))]
    for i in likes:
        dish_i = find(i).index[0]
        recomendation[dish_i] = None
        for j in range(len(matrix_for_recomendation[dish_i])):
            if recomendation[j] is None:
                continue

            recomendation[j] += matrix_for_recomendation[dish_i][j] / len(likes) * 0.5

    for i in dislikes:
        dish_i = find(i).index[0]
        recomendation[dish_i] = None
        for j in range(len(matrix_for_recomendation[dish_i])):
            if recomendation[j] is None:
                continue

            recomendation[j] -= matrix_for_recomendation[dish_i][j] / len(dislikes) * 0.5

    result = df
    result['Рекомендация'] = recomendation
    result = result.sort_values(by=['Рекомендация'], ascending=False)
    
    return result

In [None]:
def do_fliter(recomendation, category=None, cals=None, fat=None, taste=None, is_vegan=None, method=None):
    # from expert_options.csv
    term_to_fat = {
        'Не жирное': lambda x: int(x) <= 5,
        'Слегка жирное': lambda x: 5 < int(x) <= 10,
        'Жирное': lambda x: 10 < int(x) <= 18,
        'Очень жирное': lambda x: int(x) > 18,
    }

    if category is not None:
        recomendation = recomendation[recomendation['Категория'].apply(lambda x: x == category)]
    
    if cals is not None:
        if type(cals) is not tuple:
            is_max = True
        else:
            is_max = cals[1] == 'max'
            cals = cals[0]
        
        if is_max:
            recomendation = recomendation[recomendation['Калорийность'].apply(lambda x: x <= cals)]
        else:
            recomendation = recomendation[recomendation['Калорийность'].apply(lambda x: x >= cals)]
        
    if fat is not None:
        recomendation = recomendation[recomendation['Жирность'].apply(term_to_fat[fat])]

    if taste is not None:
        recomendation = recomendation[recomendation['Вкус'].apply(lambda x: taste in x.split(' '))]
    
    if is_vegan is not None:
        recomendation = recomendation[recomendation['Вегетарианское'].apply(lambda x: x == is_vegan)]
    
    if method is not None:
        recomendation = recomendation[recomendation['Приготовления'].apply(lambda x: x == method)]

    return recomendation

In [None]:
result = get_recomendation(['Фруктовый салат', 'Манго с лаймовым сиропом'], ['Рататуй']) 
result

In [None]:
result = do_fliter(result, fat='Жирное')
result