In [None]:
import numpy as np
import pandas as pd
from typing import Dict, Tuple
from scipy import stats
from IPython.display import Image
from IPython.display import Image
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.datasets import load_iris, load_boston
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error, median_absolute_error, r2_score 
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances, manhattan_distances
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
%matplotlib inline
quoting=3 
sns.set(style="ticks")

In [None]:
data = pd.read_csv('winemag-data-130k-v2.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [None]:
data.shape

(129971, 14)

In [None]:
description_data = data[data['description'].notnull()]
description_data.shape

(129971, 14)

In [None]:
title = description_data['designation'].values
title[0:5]

array(['Vulkà Bianco', 'Avidagos', nan, 'Reserve Late Harvest',
       "Vintner's Reserve Wild Child Block"], dtype=object)

In [None]:
descriptions = description_data['description'].values
descriptions[0:5]

array(["Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering unripened apple, citrus and dried sage alongside brisk acidity.",
       "This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy red berry fruits and freshened with acidity. It's  already drinkable, although it will certainly be better from 2016.",
       'Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp acidity underscoring the flavors. The wine was all stainless-steel fermented.',
       'Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with notes of honey-drizzled guava and mango giving way to a slightly astringent, semidry finish.',
       "Much like the regular bottling from 2012, this comes across as rather rough and tannic, with rustic, earthy, herbal characteristics. Nonetheless, if you think of it

In [None]:
description_data.keys()

Index(['Unnamed: 0', 'country', 'description', 'designation', 'points',
       'price', 'province', 'region_1', 'region_2', 'taster_name',
       'taster_twitter_handle', 'title', 'variety', 'winery'],
      dtype='object')

In [None]:
wine_ids = description_data['Unnamed: 0'].values
wine_ids

array([     0,      1,      2, ..., 129968, 129969, 129970])

In [None]:
%%time
tfidf = TfidfVectorizer()
description_matrix = tfidf.fit_transform(descriptions)
description_matrix

CPU times: user 4.56 s, sys: 0 ns, total: 4.56 s
Wall time: 4.56 s


In [None]:
description_matrix

<129971x31275 sparse matrix of type '<class 'numpy.float64'>'
	with 4475479 stored elements in Compressed Sparse Row format>

**Фильтрация на основе содержания. Метод k-ближайших соседей**

In [None]:
class SimplerKnnRecomender:
  def __init__(self, X_matrix, X_ids, X_title, X_overview):
        """
        Входные параметры:
        X_matrix - обучающая выборка (матрица объект-признак)
        X_ids - массив идентификаторов объектов
        X_description - массив описаний объектов
        X_overview - массив описаний объектов
        """
        #Сохраняем параметры в переменных объекта
        self._X_matrix = X_matrix
        self.df = pd.DataFrame(
            {'id': pd.Series(X_ids, dtype='int'),
            'description': pd.Series(X_title, dtype='str'),
            'overview': pd.Series(X_overview, dtype='str'),
            'dist': pd.Series([], dtype='float')})
  
  def recommend_for_single_object(self, K: int, \
                X_matrix_object, cos_flag = True, manh_flag = False):
        """
        Метод формирования рекомендаций для одного объекта.
        Входные параметры:
        K - количество рекомендуемых соседей 
        X_matrix_object - строка матрицы объект-признак, соответствующая объекту
        cos_flag - флаг вычисления косинусного расстояния
        manh_flag - флаг вычисления манхэттэнского расстояния
        Возвращаемое значение: K найденных соседей
        """
        
        scale = 1000000
        # Вычисляем косинусную близость
        if cos_flag:
            dist = cosine_similarity(self._X_matrix, X_matrix_object)
            self.df['dist'] = dist * scale
            res = self.df.sort_values(by='dist', ascending=False)
            # Не учитываем рекомендации с единичным расстоянием,
            # так как это искомый объект
            res = res[res['dist'] < scale]
        
        else:
            if manh_flag:
                dist = manhattan_distances(self._X_matrix, X_matrix_object)
            else:
                dist = euclidean_distances(self._X_matrix, X_matrix_object)
            self.df['dist'] = dist * scale
            res = self.df.sort_values(by='dist', ascending=True)
            # Не учитываем рекомендации с единичным расстоянием,
            # так как это искомый объект
            res = res[res['dist'] > 0.0]            
        
        # Оставляем К первых рекомендаций
        res = res.head(K)
        return res


In [None]:
test_id = 11
print(title[test_id])
print(descriptions[test_id])

nan
This is a dry wine, very spicy, with a tight, taut texture and strongly mineral character layered with citrus as well as pepper. It's a food wine with its almost crisp aftertaste.


In [None]:
test_matrix = description_matrix[test_id]
test_matrix

<1x31275 sparse matrix of type '<class 'numpy.float64'>'
	with 25 stored elements in Compressed Sparse Row format>

In [None]:
skr1 = SimplerKnnRecomender(description_matrix, wine_ids, title, descriptions)

In [None]:
# 15 вин, наиболее похожих на Estate Vineyard Wadensvil Block
# в порядке убывания схожести на основе косинусного сходства
rec1 = skr1.recommend_for_single_object(15, test_matrix)
rec1

Unnamed: 0,id,description,overview,dist
24045,24045,,The wine is textured and tight with crisp acid...,633624.990866
90700,90700,Morgeot Premier Cru,This wine is still tight and crisp. It has ple...,442624.176096
58330,58330,Chartron la Fleur,"The wine is tight and nervy, very fresh, crisp...",432556.705703
66081,66081,,This taut and structured wine has weight as we...,430242.028148
78572,78572,,"This wine is tight, structured and taut. Still...",428504.458538
105230,105230,Pierre de Lune,This rich and ripe wine is full of apricot and...,425886.605501
25907,25907,,"Tight and structured, this wine has minerality...",424385.444731
99011,99011,Les Clos,This crisp wine offers plenty of acidity as we...,423757.52556
5406,5406,,Ripe Alvarinho gives a wine that is rich as we...,421592.5297
22652,22652,Léo de la Gaffelière,"Very herbaceous in character, this is a wine t...",418388.507228


In [None]:
# При поиске с помощью Евклидова расстояния получаем такой же результат
rec2 = skr1.recommend_for_single_object(15, test_matrix, cos_flag = False)
rec2

Unnamed: 0,id,description,overview,dist
24045,24045,,The wine is textured and tight with crisp acid...,856008.2
90700,90700,Morgeot Premier Cru,This wine is still tight and crisp. It has ple...,1055818.0
58330,58330,Chartron la Fleur,"The wine is tight and nervy, very fresh, crisp...",1065311.0
66081,66081,,This taut and structured wine has weight as we...,1067481.0
78572,78572,,"This wine is tight, structured and taut. Still...",1069108.0
105230,105230,Pierre de Lune,This rich and ripe wine is full of apricot and...,1071553.0
25907,25907,,"Tight and structured, this wine has minerality...",1072953.0
99011,99011,Les Clos,This crisp wine offers plenty of acidity as we...,1073539.0
5406,5406,,Ripe Alvarinho gives a wine that is rich as we...,1075553.0
22652,22652,Léo de la Gaffelière,"Very herbaceous in character, this is a wine t...",1078528.0


In [None]:
# Манхэттэнское расстояние дает абсолютно иные результаты поиска
rec3 = skr1.recommend_for_single_object(15, test_matrix, 
                                        cos_flag = False, manh_flag = True)
rec3

Unnamed: 0,id,description,overview,dist
24045,24045,,The wine is textured and tight with crisp acid...,3865262.0
22652,22652,Léo de la Gaffelière,"Very herbaceous in character, this is a wine t...",5251729.0
35502,35502,Perles,"Tight and sharp, this is an herbaceous wine wi...",5312967.0
58330,58330,Chartron la Fleur,"The wine is tight and nervy, very fresh, crisp...",5316624.0
25907,25907,,"Tight and structured, this wine has minerality...",5354298.0
21920,21920,Frais et Délicat,"This is crisp, fruity with apple and citrus fl...",5452536.0
97201,97201,Domaine la Rabiotte,"Tight, zingy and crisp, this wine has fresh, c...",5535851.0
70762,70762,Domaine du Seuil,The wine is tight and mineral in character. It...,5564448.0
128577,128577,Domaine Bel Eouve,"This is a tangy, spicy wine, a character that ...",5628584.0
78572,78572,,"This wine is tight, structured and taut. Still...",5644448.0


**Коллаборативная фильтрация. Метод на основе сингулярного разложения**

In [None]:
data.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [None]:
data3 = data[30000:55000]
# Количество уникальных виноделен
len(data3['winery'].unique())

9080

In [None]:
# Количество уникальных вин
len(data3['designation'].unique())

11639

In [None]:
# Сформируем матрицу взаимодействий на основе рейтингов
# Используется идея из статьи - https://towardsdatascience.com/beginners-guide-to-creating-an-svd-recommender-system-1fd7326d1f65
def create_utility_matrix(data):
    itemField = 'designation'
    userField = 'winery'
    valueField = 'points'  
    
    userList = data[userField].tolist()
    itemList = data[itemField].tolist()
    valueList = data[valueField].tolist()    
    
    users = list(set(userList))
    items = list(set(itemList))    
    
    users_index = {users[i]: i for i in range(len(users))}    
    pd_dict = {item: [0.0 for i in range(len(users))] for item in items}    
    
    for i in range(0,data.shape[0]):
        item = itemList[i]
        user = userList[i]
        value = valueList[i]    
        pd_dict[item][users_index[user]] = value    
    
    X = pd.DataFrame(pd_dict)
    X.index = users
        
    itemcols = list(X.columns)
    items_index = {itemcols[i]: i for i in range(len(itemcols))}
    
    return X, users_index, items_index

In [None]:
%%time
user_item_matrix, users_index, items_index = create_utility_matrix(data3)

CPU times: user 18.7 s, sys: 1.77 s, total: 20.5 s
Wall time: 20.1 s


In [None]:
user_item_matrix

Unnamed: 0,NaN,Château du Grand Vernay,Silhouette,Jenkins Ranch,Edles Tal,OSU Woodhall III,Grillo Parlante,Glintzberg,Blanc de Noirs Méthode Champenoise,Käferberg Reserve,...,Fleur de Fonplégade,Classico,Money Road Cuvée,Companhia das Lezírias Tyto Alba,Sweet Claire Late Harvest,Silvaspoons Vineyard,Andión,Rive di Refrontolo Millesimato Extra Dry,Abtsberg Spätlese Grosse Lage,Dry
Soos Creek,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Domaine Rieflé-Landmann,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sheldrake Point,87.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,86.0
Monte del Frà,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Recorba,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Equinox,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
My Essential,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Feliz Noche,89.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ervideira,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Выделение тестовой строки
user_item_matrix__test = user_item_matrix.loc[['San Giuseppe']]
user_item_matrix__test

Unnamed: 0,NaN,Château du Grand Vernay,Silhouette,Jenkins Ranch,Edles Tal,OSU Woodhall III,Grillo Parlante,Glintzberg,Blanc de Noirs Méthode Champenoise,Käferberg Reserve,...,Fleur de Fonplégade,Classico,Money Road Cuvée,Companhia das Lezírias Tyto Alba,Sweet Claire Late Harvest,Silvaspoons Vineyard,Andión,Rive di Refrontolo Millesimato Extra Dry,Abtsberg Spätlese Grosse Lage,Dry
San Giuseppe,86.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#taster_names = description_data['taster_name'].unique()
taster_names = np.delete(data3['winery'].unique(), 0)
taster_names = np.delete(taster_names, 7)
taster_names

array(['1000 Stories', 'Aresti', 'Bonny Doon', ..., 'Philippe Fontaine',
       'Pont de Chevalier', 'Perelada'], dtype=object)

In [None]:
# Оставшаяся часть матрицы для обучения
user_item_matrix__train = user_item_matrix.loc[taster_names]
user_item_matrix__train

Unnamed: 0,NaN,Château du Grand Vernay,Silhouette,Jenkins Ranch,Edles Tal,OSU Woodhall III,Grillo Parlante,Glintzberg,Blanc de Noirs Méthode Champenoise,Käferberg Reserve,...,Fleur de Fonplégade,Classico,Money Road Cuvée,Companhia das Lezírias Tyto Alba,Sweet Claire Late Harvest,Silvaspoons Vineyard,Andión,Rive di Refrontolo Millesimato Extra Dry,Abtsberg Spätlese Grosse Lage,Dry
1000 Stories,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aresti,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bonny Doon,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Brian Carter Cellars,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Château du Tertre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Château Franc Pipeau,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ferghettina,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Philippe Fontaine,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Pont de Chevalier,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
%%time
U, S, VT = np.linalg.svd(user_item_matrix__train.T)
V = VT.T

CPU times: user 24min 5s, sys: 40.2 s, total: 24min 45s
Wall time: 12min 54s


In [None]:
# Матрица соотношения между дегустаторами и латентными факторами
U.shape

(11639, 11639)

In [None]:
# Матрица соотношения между объектами и латентными факторами
V.shape

(9078, 9078)

In [None]:
S.shape

(9078,)

In [None]:
Sigma = np.diag(S)
Sigma.shape

(9078, 9078)

In [None]:
# Диагональная матрица сингулярных значений
Sigma

array([[5.78498871e+03, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 1.38995848e+03, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 1.11925590e+03, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        2.55550452e-14, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 2.15947495e-14, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 3.83921822e-15]])

In [None]:
# Используем 3 первых сингулярных значения
r=3
Ur = U[:, :r]
Sr = Sigma[:r, :r]
Vr = V[:, :r]
# Матрица соотношения между виноделом и латентными факторами
test_winery = np.mat(user_item_matrix__test.values)
test_winery.shape, test_winery

((1, 11639), matrix([[86.,  0.,  0., ...,  0.,  0.,  0.]]))

In [None]:
tmp = test_winery * Ur * np.linalg.inv(Sr)
tmp

matrix([[ 0.01536747, -0.05943863, -0.00133909]])

In [None]:
test_winery_result = np.array([tmp[0,0], tmp[0,1], tmp[0,2]])
test_winery_result

array([ 0.01536747, -0.05943863, -0.00133909])

In [None]:
# Вычисляем косинусную близость между текущим виноделом 
# и остальными виноделами
cos_sim = cosine_similarity(Vr, test_winery_result.reshape(1, -1))
cos_sim[:10]

array([[ 2.92109874e-20],
       [-2.62468151e-02],
       [-1.25002698e-18],
       [ 7.98440559e-19],
       [ 3.05655190e-17],
       [-6.43563605e-16],
       [ 1.00596290e-01],
       [ 9.17641541e-02],
       [ 9.81399704e-02],
       [-1.21050794e-18]])

In [None]:
# Преобразуем размерность массива
cos_sim_list = cos_sim.reshape(-1, cos_sim.shape[0])[0]
cos_sim_list[:10]

array([ 2.92109874e-20, -2.62468151e-02, -1.25002698e-18,  7.98440559e-19,
        3.05655190e-17, -6.43563605e-16,  1.00596290e-01,  9.17641541e-02,
        9.81399704e-02, -1.21050794e-18])

In [None]:
# Находим наиболее близкого винодела
recommended_winery_id = np.argsort(-cos_sim_list)[0]
recommended_winery_id

163

In [None]:
test_winery

matrix([[86.,  0.,  0., ...,  0.,  0.,  0.]])

In [None]:
# Получение названия вина
wine_list = list(user_item_matrix.columns)
def film_name_by_movieid(ind):
    try:
        wine = wine_list[ind]
        #print(wineId)
        #flt_links = data3[data['movieId'] == wineId]
        #tmdbId = int(flt_links['tmdbId'].values[0])
        #md_links = df_md[df_md['id'] == tmdbId]
        #res = md_links['title'].values[0]
        return wine
    except:
        return ''

In [None]:
# Вина, текущей винодельни:
i=1
for idx, item in enumerate(np.ndarray.flatten(np.array(test_winery))):
    if item > 0:
        wine_title = film_name_by_movieid(idx)
        print('{} - {} - {}'.format(idx, wine_title, item))
        if i==20:
            break
        else:
            i+=1

0 - nan - 86.0
8589 - Reserve - 87.0


In [None]:
# Вина, наиболее схожие с винодельней:
i=1
recommended_user_item_matrix = user_item_matrix.loc[['Oddero']]
for idx, item in enumerate(np.ndarray.flatten(np.array(recommended_user_item_matrix))):
    if item > 0:
        wine_title = film_name_by_movieid(idx)
        print('{} - {} - {}'.format(idx, wine_title, item))
        if i==20:
            break
        else:
            i+=1

0 - nan - 91.0
8461 - Bussia Soprana - 91.0
9424 - Rocche di Castiglione - 89.0
