In [None]:
# pip install -qr requirements.txt

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from copy import deepcopy as copy
import re

In [None]:
df = pd.read_csv('data/expert_opinions.csv')

experts_count = len(df)
experts_answers = {}
types = [int(i) for i in df.keys()]
options = set()
for i in range(experts_count):
    experts_answers.update({i: []})
    for k in df:
        answer = df[k][i]
        experts_answers[i].append(answer)

        options.add(answer)

print(f'Множество значений: {types}')
print(f'Термы: {options}')

In [None]:
for i in types:
    print(f'{i:<3} ', end='')
print()
print('----' * len(types))
for k in experts_answers:
    for i in options:
        for j in experts_answers[k]:
            if i == j:
                print('+   ', end='')
            else:
                print('    ', end='')
        print(i)
    print('----' * len(types))

In [None]:
membership_table = {j: [0 for i in range(len(types))] for j in options}
for k in experts_answers:
    for i in range(len(experts_answers[k])):
        answer = experts_answers[k][i]
        membership_table[answer][i] += 1

for i in types:
    print(f'{i:<5} ', end='')
print()
print('------' * len(types))
for k in membership_table:
    for i in membership_table[k]:
        print(f'{i:<5} ', end='')
    print()
    for i in membership_table[k]:
        print(f'{i / experts_count:<5} ', end='')
    print(k)
    print('------' * len(types))

for k in membership_table:
    plt.plot(
        types,
        [i / experts_count for i in membership_table[k]],
        label=k
    )
plt.legend()
plt.show()


In [None]:
category_tree = json.load(open('data/category_tree.json'))

def print_category_tree(Node=category_tree, prefix=''):
    level_size = len(Node)
    for category in Node:
        level_size -= 1

        if level_size == 0:
            angle = '└──'
            new_prefix = f'{prefix}   '
        else:
            angle = '├──'
            new_prefix = f'{prefix}│  '
        
        print(f'{prefix}{angle}{category}')
        print_category_tree(Node[category], new_prefix)


print_category_tree()

In [None]:
def find_categories_proximity(lhs, rhs):
    def find_category(category, Node=category_tree):
        assert Node is not None

        for i in Node:
            if category == i:
                return [category]
            else:
                in_sub_tree = find_category(category, Node[i])
                if in_sub_tree is not None:
                    return [i] + in_sub_tree
            
        return None
    
    lhs = find_category(lhs)
    rhs = find_category(rhs)

    assert lhs is not None and rhs is not None
    for i in range(len(min(lhs, rhs))):
        if lhs[i] != rhs[i]:
            return 1 / (len(lhs) - i + len(rhs) - i + 1)
        
    return 1

def find_cals_proximity(lhs, rhs):
    return min(lhs, rhs) / max(lhs, rhs)

def find_fat_proximity(lhs, rhs):
    return min(lhs, rhs) / max(lhs, rhs)

def find_is_vegan_proximity(lhs, rhs):
    return int(lhs == rhs)

def find_raw_proximity(lhs, rhs):
    method = 'Без обработки'
    return int((rhs == method) == (lhs == method))

def find_taste_proximity(lhs, rhs):
    lhs = lhs.split(' ')
    rhs = rhs.split(' ')

    common = max(list(set(lhs).intersection(rhs)), [])

    return len(common) / (len(lhs) + len(rhs) - len(common))

def clac_proximity(df, func):
    matrix = []
    for i in df:
        matrix.append([])
        for j in df:
            matrix[-1].append(func(i, j))

    return matrix

def merge_matrix(lhs, rhs, *other):
    for i in range(len(lhs)):
        for j in range(len(lhs[i])):
            lhs[i][j] += rhs[i][j]
    
    if len(other) != 0:
        merge_matrix(lhs, *other)

    return lhs

def down_matrix(matrix, coef=0.5):
    matrix = copy(matrix)

    for i in range(len(matrix)):
        for j in range(len(matrix[i])):
            matrix[i][j] = matrix[i][j] * coef

    return matrix

In [None]:
df = pd.read_csv('data/dishes.csv')
taste = [i for i in df['Вкус']]
for i in range(len(taste)):
    taste[i] = taste[i].replace(';', ' ')
df['Вкус'] = taste

df['Название'] = df['Название'].str.lower()

df

In [None]:
matrix = clac_proximity(df['Вкус'], find_taste_proximity)

plt.matshow(matrix, cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()

In [None]:
matrix_for_recomendation = merge_matrix(
    down_matrix(clac_proximity(df['Приготовления'], find_raw_proximity), coef=0.1),
    down_matrix(clac_proximity(df['Вегетарианское'], find_is_vegan_proximity), coef=0.1),
    down_matrix(clac_proximity(df['Категория'], find_categories_proximity), coef=0.3),
    down_matrix(clac_proximity(df['Жирность'], find_fat_proximity), coef=0.2),
    down_matrix(clac_proximity(df['Калорийность'], find_cals_proximity), coef=0.2),
    down_matrix(clac_proximity(df['Вкус'], find_taste_proximity), coef=0.1)
)

plt.matshow(matrix_for_recomendation, cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()

In [None]:
matrix = clac_proximity(df['Категория'], find_categories_proximity)

plt.matshow(matrix, cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()

In [None]:
def get_recomendation(likes, dislikes):
    def find(name):
        return df.loc[df['Название'].str.lower() == name]

    recomendation = [0.5 for i in range(len(df))]
    for i in likes:
        dish_i = find(i)
        if len(dish_i) == 0:
            continue

        dish_i = dish_i.index[0]
        recomendation[dish_i] = None
        for j in range(len(matrix_for_recomendation[dish_i])):
            if recomendation[j] is None:
                continue

            recomendation[j] += matrix_for_recomendation[dish_i][j] / len(likes) * 0.5

    for i in dislikes:
        dish_i = find(i)
        if len(dish_i) == 0:
            continue
        
        dish_i = dish_i.index[0]
        recomendation[dish_i] = None
        for j in range(len(matrix_for_recomendation[dish_i])):
            if recomendation[j] is None:
                continue

            recomendation[j] -= matrix_for_recomendation[dish_i][j] / len(dislikes) * 0.5

    result = df
    result['Рекомендация'] = recomendation
    result = result.sort_values(by=['Рекомендация'], ascending=False)
    
    return result

In [None]:
def do_fliter(recomendation, category=None, cals=None, fat=None, taste=None, is_vegan=None, method=None):
    # from expert_options.csv
    term_to_fat = {
        'Не жирное': lambda x: int(x) <= 5,
        'Слегка жирное': lambda x: 5 < int(x) <= 10,
        'Жирное': lambda x: 10 < int(x) <= 18,
        'Очень жирное': lambda x: int(x) > 18,
    }

    if category is not None:
        recomendation = recomendation[recomendation['Категория'].apply(lambda x: x == category)]
    
    if cals is not None:
        if type(cals) is not tuple:
            is_max = True
        else:
            is_max = cals[1] == 'max'
            cals = cals[0]
        
        if is_max:
            recomendation = recomendation[recomendation['Калорийность'].apply(lambda x: x <= cals)]
        else:
            recomendation = recomendation[recomendation['Калорийность'].apply(lambda x: x >= cals)]
        
    if fat is not None:
        recomendation = recomendation[recomendation['Жирность'].apply(term_to_fat[fat])]

    if taste is not None: 
        recomendation = recomendation[recomendation['Вкус'].apply(lambda x: taste in x.split(' '))]
    
    if is_vegan is not None:
        recomendation = recomendation[recomendation['Вегетарианское'].apply(lambda x: x == is_vegan)]
    
    if method is not None:
        recomendation = recomendation[recomendation['Приготовления'].apply(lambda x: x == method)]

    return recomendation

In [None]:
def do_fliter_2(recomendation, filters: list):
    for f in filters:
        c, f = f[0], f[1]

        if c == 'taste':
            def taste_filter(value):
                value = value.split(' ')
                for i in value:
                    if f(i):
                        return True
                return False 

            recomendation = recomendation[recomendation['Вкус'].apply(taste_filter)]
        elif c == 'category':
            recomendation = recomendation[recomendation['Категория'].apply(f)]  
    
    return recomendation

In [None]:
from tkinter import N
from turtle import goto


class Request:
    like = None
    dislike = None
    
    filters = []

    count = None # None equal all

    @staticmethod
    def resolve_filters(filters_str: str) -> list:
        filters = []
        
        neg = False

        with_prev = False

        def norm_taste(taste):
            if taste[-2:] == 'ие':
                taste = taste[:-2] + 'ое'
            elif taste[-2:] == 'ые':
                taste = taste[:-2] + 'ое'

            return taste.lower()
        
        def norm_category(category):
            if category == 'мяса': return 'мясо'
            if category == 'говядины': return 'говядина'
            if category == 'свинины': return 'свинина'
            if category == 'баранины': return 'баранина'
            if category == 'рыбы': return 'рыба'
            if category == 'белой рыбы': return 'белая рыба'
            if category == 'красной рыбы': return 'красная рыба'
            if category == 'морепродуктов': return 'морепродукты'
            if category == 'птицы': return 'птица'
            if category == 'индейки': return 'индейка'
            if category == 'курицы': return 'карица'
            if category == 'овощей': return 'овощи'
            if category == 'фруктов': return 'фрукты'

        while len(filters_str) != 0:
            # пробел
            ###########################################################################
            space = re.match(r'(\s+)', filters_str)
            if space is not None:
                space = space.group(1)
                filters_str = filters_str[len(space):]

                continue

            # и
            ###########################################################################
            i = re.match(r'(и\s)', filters_str)
            if i is not None:
                i = i.group(1)
                filters_str = filters_str[len(i):]

                with_prev = True

                continue
            
            # запятая
            ###########################################################################
            c = re.match(r'(,)', filters_str)
            if c is not None:
                c = c.group(1)
                filters_str = filters_str[len(c):]

                continue
            
            # не
            ###########################################################################
            inverse = re.match(r'(не\s)', filters_str)
            if inverse is not None:
                inverse = inverse.group(1)
                filters_str = filters_str[len(inverse):]

                neg = True

                continue

            # вкус
            ###########################################################################
            taste = re.match(
                r'('
                    r'сладк(ие|ое)|'
                    r'солен(ые|ое)|'
                    r'кисл(ые|ое)|'
                    r'остр(ые|ое)|'
                    r'нейтральн(ые|ое)|'
                    r'горьк(ие|ое)'
                r')',
                filters_str,
                re.IGNORECASE
            )
            if taste is not None:
                taste = taste.group(1)
                filters_str = filters_str[len(taste):]

                value = norm_taste(taste)

                f = lambda x, v=value: x.lower() == v

                if neg:
                    neg = False

                    f = lambda x, r=f: not r(x)

                if with_prev:
                    with_prev = False

                    if filters[-1][0] == 'taste':
                        l = filters[-1][1]
                        f = lambda x, r=f: r(x) or l(x)

                        filters.pop()
                    
                filters.append(('taste', f))

                continue

            # категория
            ###########################################################################
            category = re.match(
                r'(из\s)'
                r'('
                    r'мяса|'
                    r'говядины|'
                    r'свинины|'
                    r'баранины|'
                    r'рыбы|'
                    r'белой рыбы|'
                    r'красной рыбы|'
                    r'морепродуктов|'
                    r'птицы|'
                    r'индейки|'
                    r'курицы|'
                    r'овощей|'
                    r'фруктов|'
                r')',
                filters_str,
                re.IGNORECASE
            )
            if category is not None:
                category = category.group(2)
                filters_str = filters_str[len('из ') + len(category):]

                value = norm_category(category)

                f = lambda x, v=value: x.lower() == v

                if neg:
                    neg = False

                    f = lambda x, r=f: not r(x)

                if with_prev:
                    with_prev = False
                    
                filters.append(('category', f))

                continue

            # вегетарианское

            # нераспознонное слово
            ###########################################################################
            stub = re.search(r'(.*)(\s|$)', filters_str, re.IGNORECASE)
            if stub is not None:
                stub = stub.group(1)
                filters_str = filters_str[len(stub):]

                continue

        return filters

    @staticmethod
    def resolve_count(count_str):
        if count_str == '':
            return None
        if count_str == 'одно':
            return 1
        if count_str == 'пару':
            return 2
        return int(count_str)

# user_text = 'Покажи все сладкие и соленые, не из фруктов блюда'
# user_text = 'порекомендуй одно блюдо как котлета по киевски'
user_text = 'Я люблю острые блюда'
# user_text = 'Порекомендуй острые блюда'
# user_text = 'Хочу что то соленое'
# user_text = 'Хочу что то как рататуй'
request = None
while request is None:
    # user_text = input().lower()

    if request is None:
        result = re.match(r'(порекомендуй|покажи\sвсе|выдай\sвсе|какие\sесть)\s(.*(\s)|)(блюда)', user_text, re.IGNORECASE)
        if result:
            request = Request()

            filters_str = result.group(2)

            request.filters = Request.resolve_filters(filters_str)
        
    if request is None:
        result = re.match(r'(порекомендуй)\s(\d+(\s)|пару(\s)|одно(\s)|)(блюд(о|а|))(\Z|\s((не(\s)|)(как\s|)(.*)))', user_text, re.IGNORECASE)
        if result:
            request = Request()

            count_str = result.group(2).strip()
            is_dislike_str = result.group(10).strip() == 'не'
            like_or_dislike_str = result.group(13)

            request.count = Request.resolve_count(count_str)
            if is_dislike_str:
                request.dislike = [like_or_dislike_str]
            else:
                request.like = [like_or_dislike_str]

            request.count = Request.resolve_count(count_str)
    
    if request is None:
        result = re.match(r'(мне\sнрав(ится|ятся)|я\sлюблю)\s(.*)\s(блюд(о|а|))', user_text, re.IGNORECASE)
        if result:
            request = Request()

            filters_str = result.group(3)

            request.filters = Request.resolve_filters(filters_str)

    if request is None:
        result = re.match(r'(хочу|порекомендуй)\s(что-то\s|что то\s|)(не(\s)|)((как (.*))|(.*))', user_text, re.IGNORECASE)
        if result:
            request = Request()

            is_dislike_str = result.group(3).strip() == 'не'
            like_or_dislike_str = result.group(7)

            filters_str = result.group(8)
            
            if filters_str is not None:
                request.filters = Request.resolve_filters(filters_str)

            if like_or_dislike_str is not None:
                if is_dislike_str:
                    request.dislike = [like_or_dislike_str]
                else:
                    request.like = [like_or_dislike_str]



    if request is None:
        print(
            'К сожалению я вас не понял, попробуйте переформулировать запрос, например:\n'
            'Я люблю острые блюда\n'
            'Порекомендуй блюда как котлета по киевски'
        )

        continue
    
    while request.like is None and request.dislike is None:
        print('какое вам нравится или не нравится из блюдо')
        user_text = input()

        result = re.match(r'(не\sзнаю|ничего|пропус(к|ти))', user_text, re.IGNORECASE)
        if result:
            break
        
        result = re.match(r'(мне\s(не\s|)нрав(ится|ятся)\s|я\sлюблю\s|()(не)|)(.*)', user_text, re.IGNORECASE)
        if result:
            is_dislike_str = result.group(1)
            if re.match(r'не', is_dislike_str)is None:
                is_dislike_str = result.group(2)
            like_or_dislike_str = result.group(6)
            if is_dislike_str is not None:
                request.dislike = [like_or_dislike_str.strip()]
            else:
                request.like = [like_or_dislike_str.strip()]
        
    
    break

print(
    f'[DEBUG]: request: filters count: {len(request.filters)}, '
    f'like: {request.like} '
    f'dislike: {request.dislike} '
    f'count: {request.count}'
)

In [None]:
recomendation = get_recomendation(request.like, request.dislike)
recomendation = do_fliter_2(recomendation, request.filters)
recomendation