#Первый подход

## Модуль импорта

In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors

## Прочтение CSV

In [2]:
meta_data = pd.read_csv('Data/out_content.zip')
meta_data.head()

Unnamed: 0,article_id,product_group_name_Accessories,product_group_name_Bags,product_group_name_Cosmetic,product_group_name_Fun,product_group_name_Furniture,product_group_name_Garment Full body,product_group_name_Garment Lower body,product_group_name_Garment Upper body,product_group_name_Garment and Shoe care,...,garment_group_name_Shorts,garment_group_name_Skirts,garment_group_name_Socks and Tights,garment_group_name_Special Offers,garment_group_name_Swimwear,garment_group_name_Trousers,garment_group_name_Trousers Denim,"garment_group_name_Under-, Nightwear",garment_group_name_Unknown,garment_group_name_Woven/Jersey/Knitted mix Baby
0,108775015,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,108775044,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,108775051,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,110065001,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,110065002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [3]:
meta_data['article_id'][1]

108775044

In [4]:
articles_df = pd.read_csv('Data/articles.csv.zip')
articles_df.head(3)

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
0,108775015,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
1,108775044,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,10,White,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
2,108775051,108775,Strap top (1),253,Vest top,Garment Upper body,1010017,Stripe,11,Off White,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.


## Функция рекомендаций

> созданная функция принимает `article ID` и `n` количество рекомендаций, предоставленных клиентом, и возвращает лучшие рекомендаций по товарам. Как и ожидалось от системы, основанной на контенте, возвращаемые рекомендации следуют строго по строке описания статьи. Ввод ID статьи используется для индексации dataframe на основе уникального ID статьи ('article_id'). Чтобы избежать большой нагрузки на систему, было принято решение немного оптимизировать работу модели. Так как полная матрица косинусного подобия сжирает очень много ресурсов, то была использована индексированная строка книги (y) для вычисления баллов сходства между всеми остальными строками по одному, создавая массив, который обозначает сходство только для этой конкретной записи. Эта оптимизация позволяет создавать функцию, которая вычисляет оценки только для заданной статьи, а не для всего набора данных. Полученные результаты сортируются по убыванию баллов и используются для индексации в кадре метаданных, чтобы вернуть информацию, относящуюся к лучшим рекомендациям.

### Модуль сборки

In [8]:
articles_df.shape

(105542, 25)

In [9]:
# Input Вход для идентификатора статьи, который возвращает номер индекса 'article_id' для статьи,
# который будет использоваться для вызова dataframe
articleid = input('Article ID: ')
article = articles_df.index[articles_df['article_id'] == int(articleid)]
article

Article ID: 110065011


Int64Index([5], dtype='int64')

In [10]:
# Извлечение отдельной строки, проиндексированной по ID статьи ('article_id'),
# можно использовать переменную 'article', заданную выше
y = np.array(meta_data.loc[article])
# делаем замену формы, для передачи в функцию cos подобия
y = y.reshape(1, -1)
y

array([[110065011,         0,         0,         0,         0,         0,
                0,         0,         0,         0,         0,         0,
                0,         0,         0,         0,         0,         1,
                0,         0,         0,         0,         1,         0,
                0,         0,         0,         0,         0,         0,
                0,         0,         0,         0,         0,         0,
                0,         0,         0,         0,         0,         0,
                0,         1,         0,         0]])

In [11]:
# Используем cos подобие для получения сходства с помощью cos расстояния
cos_sim = cosine_similarity(meta_data, y)
# Создаем dataframe с оценками схожести с ID статьи ('article_id') в качестве индекса 
cos_sim = pd.DataFrame(data=cos_sim, index=meta_data.index)
cos_sim.head()

Unnamed: 0,0
0,1.0
1,1.0
2,1.0
3,1.0
4,1.0


In [13]:
# Input используемый для запроса количества рекомендаций, которые пользователь хотел бы получить.
n_recs = int(input('How many recommendations? '))
# оценки cos подобие сортируем в порядке убывания
cos_sim.sort_values(by = 0, ascending = False, inplace=True)
results = cos_sim.index.values[1:n_recs+1]
results

How many recommendations? 10


array([70358, 70368, 70367, 70366, 70365, 70364, 70363, 70362, 70361,
       70360])

> Ниже представлена тестовая версия с алгоритмов KNN (метод ближайших соседей) в качестве эксперемента

In [14]:
knn = NearestNeighbors(n_neighbors=5)
knn.fit(meta_data)
index2 = knn.kneighbors(X=y, n_neighbors=n_recs+1, return_distance=False).flatten()
results2 = articles_df.iloc[index2].index.values[1:]
results2



array([ 4,  3,  2,  1,  0,  6,  7,  8,  9, 10])

In [15]:
# Используя переменную returned results, проиндексируем исходный фрейм метаданных,
# чтобы вернуть соответствующую информацию для каждой статьи
results_df = articles_df.loc[results2]
results_df.reset_index(inplace=True)
results_df.rename(columns={'prod_name':'Product Name', 'author':'Author',
                               'product_type_name':'Product Type Name', 'product_group_name':'Product Group Name',
                               'index_group_name':'Index Group Name', 'garment_group_name ':'Garment Group Name'}, inplace=True)
results_df

Unnamed: 0,index,article_id,product_code,Product Name,product_type_no,Product Type Name,Product Group Name,graphical_appearance_no,graphical_appearance_name,colour_group_code,...,department_name,index_code,index_name,index_group_no,Index Group Name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
0,4,110065002,110065,OP T-shirt (Idro),306,Bra,Underwear,1010016,Solid,10,...,Clean Lingerie,B,Lingeries/Tights,1,Ladieswear,61,Womens Lingerie,1017,"Under-, Nightwear","Microfibre T-shirt bra with underwired, moulded, lightly padded cups that shape the bust and provide good support. Narrow adjustable shoulder straps and a narrow hook-and-eye fastening at the back. Without visible seams for greater comfort."
1,3,110065001,110065,OP T-shirt (Idro),306,Bra,Underwear,1010016,Solid,9,...,Clean Lingerie,B,Lingeries/Tights,1,Ladieswear,61,Womens Lingerie,1017,"Under-, Nightwear","Microfibre T-shirt bra with underwired, moulded, lightly padded cups that shape the bust and provide good support. Narrow adjustable shoulder straps and a narrow hook-and-eye fastening at the back. Without visible seams for greater comfort."
2,2,108775051,108775,Strap top (1),253,Vest top,Garment Upper body,1010017,Stripe,11,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
3,1,108775044,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,10,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
4,0,108775015,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,9,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
5,6,111565001,111565,20 den 1p Stockings,304,Underwear Tights,Socks & Tights,1010016,Solid,9,...,Tights basic,B,Lingeries/Tights,1,Ladieswear,62,"Womens Nightwear, Socks & Tigh",1021,Socks and Tights,"Semi shiny nylon stockings with a wide, reinforced trim at the top. Use with a suspender belt. 20 denier."
6,7,111565003,111565,20 den 1p Stockings,302,Socks,Socks & Tights,1010016,Solid,13,...,Tights basic,B,Lingeries/Tights,1,Ladieswear,62,"Womens Nightwear, Socks & Tigh",1021,Socks and Tights,"Semi shiny nylon stockings with a wide, reinforced trim at the top. Use with a suspender belt. 20 denier."
7,8,111586001,111586,Shape Up 30 den 1p Tights,273,Leggings/Tights,Garment Lower body,1010016,Solid,9,...,Tights basic,B,Lingeries/Tights,1,Ladieswear,62,"Womens Nightwear, Socks & Tigh",1021,Socks and Tights,Tights with built-in support to lift the bottom. Black in 30 denier and light amber in 15 denier.
8,9,111593001,111593,Support 40 den 1p Tights,304,Underwear Tights,Socks & Tights,1010016,Solid,9,...,Tights basic,B,Lingeries/Tights,1,Ladieswear,62,"Womens Nightwear, Socks & Tigh",1021,Socks and Tights,"Semi shiny tights that shape the tummy, thighs and calves while also encouraging blood circulation in the legs. Elasticated waist."
9,10,111609001,111609,200 den 1p Tights,304,Underwear Tights,Socks & Tights,1010016,Solid,9,...,Tights basic,B,Lingeries/Tights,1,Ladieswear,62,"Womens Nightwear, Socks & Tigh",1021,Socks and Tights,Opaque matt tights. 200 denier.


In [16]:
print(f'The returned article index results for Cosine Similarity: {results}')
print(f'The returned book index results for K-Nearest Neighbors: {results2}')
print(results == results2)

The returned article index results for Cosine Similarity: [70358 70368 70367 70366 70365 70364 70363 70362 70361 70360]
The returned book index results for K-Nearest Neighbors: [ 4  3  2  1  0  6  7  8  9 10]
[False False False False False False False False False False]


## Функция cos подобия

In [17]:
meta_data

Unnamed: 0,article_id,product_group_name_Accessories,product_group_name_Bags,product_group_name_Cosmetic,product_group_name_Fun,product_group_name_Furniture,product_group_name_Garment Full body,product_group_name_Garment Lower body,product_group_name_Garment Upper body,product_group_name_Garment and Shoe care,...,garment_group_name_Shorts,garment_group_name_Skirts,garment_group_name_Socks and Tights,garment_group_name_Special Offers,garment_group_name_Swimwear,garment_group_name_Trousers,garment_group_name_Trousers Denim,"garment_group_name_Under-, Nightwear",garment_group_name_Unknown,garment_group_name_Woven/Jersey/Knitted mix Baby
0,108775015,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,108775044,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,108775051,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,110065001,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,110065002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105537,953450001,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
105538,953763001,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
105539,956217002,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
105540,957375001,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
# Функция, которая делает рекомендацию пользователю с помощью cos подобия, на вход принимает 
# внутри нужно руками указать id статьи и она вернет n-рекомендаций
def article_recommend():
    
    title = input('Article ID: ')
    article = articles_df.index[articles_df['article_id'] == int(articleid)]
    n_recs = int(input('How many recommendations? '))
    
    y = np.array(meta_data.loc[article]).reshape(1, -1)
    cos_sim = cosine_similarity(meta_data, y)
    cos_sim = pd.DataFrame(data=cos_sim, index=meta_data.index)
    cos_sim.sort_values(by = 0, ascending = False, inplace=True)
    results = cos_sim.index.values[1:n_recs+1]
    results_df = articles_df.loc[results]
    results_df.reset_index(inplace=True)
    results_df.rename(columns={'prod_name':'Product Name','product_type_name':'Product Type Name', 'product_group_name':'Product Group Name',
                               'index_group_name':'Index Group Name', 'garment_group_name':'Garment Group Name'}, inplace=True)
    results_df = results_df[['article_id', 'Product Name', 'Product Type Name', 'Product Group Name', 'Index Group Name', 'Garment Group Name']]
    return results_df

## Модуль оценки результатов

In [25]:
pd.set_option('display.max_colwidth', None)

In [26]:
article_recommend()

Article ID: 760158001
How many recommendations? 10


Unnamed: 0,article_id,Product Name,Product Type Name,Product Group Name,Index Group Name,Garment Group Name
0,760158001,DIV Rachel denim,Trousers,Garment Lower body,Divided,Unknown
1,760214002,Semide tie dress,Dress,Garment Full body,Ladieswear,Dresses Ladies
2,760208001,Class Cleo bracelet,Bracelet,Accessories,Ladieswear,Accessories
3,760195006,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
4,760195005,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
5,760195004,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
6,760195003,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
7,760195002,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
8,760195001,FLORA turtle neck,Top,Garment Upper body,Baby/Children,Jersey Basic
9,760184002,Attila Boot SB,Boots,Shoes,Baby/Children,Shoes


In [27]:
article_recommend()

Article ID: 953763001
How many recommendations? 10


Unnamed: 0,article_id,Product Name,Product Type Name,Product Group Name,Index Group Name,Garment Group Name
0,760158001,DIV Rachel denim,Trousers,Garment Lower body,Divided,Unknown
1,760214002,Semide tie dress,Dress,Garment Full body,Ladieswear,Dresses Ladies
2,760208001,Class Cleo bracelet,Bracelet,Accessories,Ladieswear,Accessories
3,760195006,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
4,760195005,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
5,760195004,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
6,760195003,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
7,760195002,FLORA turtle neck,T-shirt,Garment Upper body,Baby/Children,Jersey Basic
8,760195001,FLORA turtle neck,Top,Garment Upper body,Baby/Children,Jersey Basic
9,760184002,Attila Boot SB,Boots,Shoes,Baby/Children,Shoes
