# Data Gathering

In [1]:
from   farfetch import Farfetch
import pandas   as pd

In [2]:
ff = Farfetch()

Total number of product ratings:   21641
Total number of in-stock ratings:  16105
Total number of unique customers:  9667
Total number of unique products:   13518


## Farfetch Customer Reviews

In [3]:
# delete customer reviews from the existing Mongo NoSQL database
# ff.clear_review_collection()

# collect 100,000 customer reviews from the website
# ff.parse_site_reviews(100000)
# ff.save_reviews_to_json('../Data/farfetch_customer_reviews.json')

In [4]:
# check number of total documents in review collection
ff.review_collection.count_documents({})

100000

In [5]:
# examples of customer reviews
list(ff.review_collection.find({}, {'_id': 0}))[0:5]

[{'Date': 'Today',
  'Rating': 4.0,
  'Pieces': [{'Description': 'Sunnei, white Sabot slip-on suede low-top sneakers',
    'URL': 'https://www.farfetch.com/shopping/women/sunnei-white-sabot-slip-on-suede-low-top-sneakers-item-13571910.aspx'}],
  'Ordered From': "O'",
  'Reviewed by': 'JI SULIM'},
 {'Date': 'Today',
  'Rating': 5.0,
  'Pieces': [{'Description': 'Valentino, Valentino Garavani Open sneakers',
    'URL': 'https://www.farfetch.com/shopping/women/valentino-valentino-garavani-open-sneakers-item-13563167.aspx'}],
  'Ordered From': 'BIONDINI PARIS',
  'Reviewed by': 'Pitchanun',
  'Review': 'The service was superb!'},
 {'Date': 'Today',
  'Rating': 5.0,
  'Pieces': [{'Description': 'Dolce & Gabbana, DOLCE & GABBANA GH570AFUFFJ B0074 ??? Natural (Vegetable)->Cotton',
    'URL': 'https://www.farfetch.com/shopping/women/dolce-gabbana-dolce-gabbana-gh570afuffj-b0074-natural-vegetable-cotton-item-11487905.aspx'}],
  'Ordered From': 'Boutique',
  'Reviewed by': 'Marcio AlexandreWust'

In [6]:
# item information starts being collected around March 28, 2019
list(ff.review_collection.find({}, {'_id': 0}))[14400:14405]

[{'Date': '3/28/2019', 'Rating': 3.0, 'Pieces': []},
 {'Date': '3/28/2019', 'Rating': 5.0, 'Pieces': []},
 {'Date': '3/28/2019',
  'Rating': 5.0,
  'Pieces': [{'Description': 'Jil Sander, zip pocket shirt',
    'URL': 'https://www.farfetch.com/shopping/women/jil-sander-zip-pocket-shirt-item-13399429.aspx'}],
  'Ordered From': 'DELIBERTI',
  'Reviewed by': 'x'},
 {'Date': '3/28/2019', 'Rating': 5.0, 'Pieces': []},
 {'Date': '3/28/2019', 'Rating': 5.0, 'Pieces': []}]

## Farfetch Product Details

In [7]:
# delete product details from the existing Mongo NoSQL database
# ff.clear_product_collection()

# collect 18,504 product details from the website
# ff.parse_site_products(0)
# ff.save_products_to_json('../Data/farfetch_product_details.json')

In [8]:
# check number of total documents in product collection
ff.product_collection.count_documents({})

18504

In [9]:
# examples of product details
list(ff.product_collection.find({}, {'_id': 0}))[0:5]

[{'URL': 'https://www.farfetch.com/shopping/women/sunnei-white-sabot-slip-on-suede-low-top-sneakers-item-13571910.aspx',
  'Out of Stock': True},
 {'URL': 'https://www.farfetch.com/shopping/women/valentino-valentino-garavani-open-sneakers-item-13563167.aspx',
  'Original': '$695',
  'Designer': 'Valentino',
  'Product': 'Valentino Garavani Open sneakers',
  'Gender': 'Women',
  'Category': 'Shoes',
  'Subcategory': 'Sneakers',
  'Style': 'RW2S0781BLU',
  'Color': 'JN5 BIANCO BLUE',
  'Made In': 'Made in Italy'},
 {'URL': 'https://www.farfetch.com/shopping/women/dolce-gabbana-dolce-gabbana-gh570afuffj-b0074-natural-vegetable-cotton-item-11487905.aspx',
  'Out of Stock': True},
 {'URL': 'https://www.farfetch.com/shopping/women/joseph-slim-fit-trousers-item-13558158.aspx',
  'Original': '$252',
  'Discount': '50% Off',
  'On Sale': '$126',
  'Designer': 'Joseph',
  'Product': 'slim-fit trousers',
  'Gender': 'Women',
  'Category': 'Clothing',
  'Subcategory': 'Slim Pants',
  'Style': 'JP0

## Data Cleaning

In [10]:
# check utility matrix for recommender systems
utility_matrix, in_stock_reviews, users, items = ff.get_utility_matrix()
utility_matrix.head()

Total number of product ratings:   21641
Total number of in-stock ratings:  16105
Total number of unique customers:  9667
Total number of unique products:   13518


Unnamed: 0,User,URL,Rating
0,Pitchanun,https://www.farfetch.com/shopping/women/valent...,5.0
1,Pitchanun,https://www.farfetch.com/shopping/women/valent...,5.0
2,Marcel G.Torres,https://www.farfetch.com/shopping/women/valent...,4.0
3,Ebba Hellberg,https://www.farfetch.com/shopping/women/valent...,5.0
4,林寶鳳,https://www.farfetch.com/shopping/women/joseph...,5.0


In [11]:
# check null values of data set
print(in_stock_reviews.isnull().sum())
in_stock_reviews.head()

User           0
Item           0
URL            0
Rating         0
Category       0
Color          0
Designer       0
Discount       0
Gender         0
Made In        0
On Sale        0
Original       0
Product        0
Style          0
Subcategory    0
dtype: int64


Unnamed: 0,User,Item,URL,Rating,Category,Color,Designer,Discount,Gender,Made In,On Sale,Original,Product,Style,Subcategory
0,Pitchanun,"Valentino, Valentino Garavani Open sneakers",https://www.farfetch.com/shopping/women/valent...,5.0,Shoes,JN5 BIANCO BLUE,Valentino,0,Women,Made in Italy,695,695,Valentino Garavani Open sneakers,RW2S0781BLU,Sneakers
1,Pitchanun,"Valentino, Valentino Garavani Open sneakers",https://www.farfetch.com/shopping/women/valent...,5.0,Shoes,JN5 BIANCO BLUE,Valentino,0,Women,Made in Italy,695,695,Valentino Garavani Open sneakers,RW2S0781BLU,Sneakers
2,Marcel G.Torres,"Valentino, Valentino Garavani Open sneakers",https://www.farfetch.com/shopping/women/valent...,4.0,Shoes,JN5 BIANCO BLUE,Valentino,0,Women,Made in Italy,695,695,Valentino Garavani Open sneakers,RW2S0781BLU,Sneakers
3,Ebba Hellberg,"Valentino, Valentino Garavani Open sneakers",https://www.farfetch.com/shopping/women/valent...,5.0,Shoes,JN5 BIANCO BLUE,Valentino,0,Women,Made in Italy,695,695,Valentino Garavani Open sneakers,RW2S0781BLU,Sneakers
4,林寶鳳,"Joseph, slim-fit trousers",https://www.farfetch.com/shopping/women/joseph...,5.0,Clothing,0211 SMOKE,Joseph,50,Women,Imported,126,252,slim-fit trousers,JP000631,Slim Pants


In [12]:
# example of unique users
users.head()

Unnamed: 0,User
0,Pitchanun
2,Marcel G.Torres
3,Ebba Hellberg
4,林寶鳳
6,VIVIANE KAZMIERCZAK


In [13]:
# example of unique in-stock items
items.head()

Unnamed: 0,Item,URL,Category,Color,Designer,Discount,Gender,Made In,On Sale,Original,Product,Style,Subcategory
0,"Valentino, Valentino Garavani Open sneakers",https://www.farfetch.com/shopping/women/valent...,Shoes,JN5 BIANCO BLUE,Valentino,0,Women,Made in Italy,695,695,Valentino Garavani Open sneakers,RW2S0781BLU,Sneakers
4,"Joseph, slim-fit trousers",https://www.farfetch.com/shopping/women/joseph...,Clothing,0211 SMOKE,Joseph,50,Women,Imported,126,252,slim-fit trousers,JP000631,Slim Pants
7,"Joseph, slim-fit tank top",https://www.farfetch.com/shopping/women/joseph...,Clothing,0020 WHITE,Joseph,40,Women,Imported,155,259,slim-fit tank top,JF003206,Vests & Tank Tops
9,"Joseph, scoop neck vest top",https://www.farfetch.com/shopping/women/joseph...,Clothing,0358 CIEL,Joseph,40,Women,Imported,155,259,scoop neck vest top,JF003206,Vests & Tank Tops
11,"Lela Rose, bow neck short dress",https://www.farfetch.com/shopping/women/lela-r...,Clothing,LAVENDER,Lela Rose,30,Women,Made in United States,1043,1490,bow neck short dress,R199233,Day Dresses


## Product Similarity

In [14]:
similarity_features, similarity_matrix = ff.product_similarity()
similarity_features.to_pickle("../Data/content_similarity_features_GMC.pkl")
similarity_matrix.to_pickle(  "../Data/content_similarity_matrix_GMC.pkl")

Total number of product ratings:   21641
Total number of in-stock ratings:  16105
Total number of unique customers:  9667
Total number of unique products:   13518
URL            13518
Category          29
Color           7788
Designer        1184
Discount          16
Gender             3
Made In            5
Original        1300
Subcategory      374
dtype: int64


In [15]:
similarity_features = pd.read_pickle("../Data/content_similarity_features_GMC.pkl")
print(similarity_features.nunique())
similarity_features.head()

Original    1300
Discount      16
Gender         3
Made In        5
Category      29
dtype: int64


Unnamed: 0_level_0,Original,Discount,Gender,Made In,Category
URL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
https://www.farfetch.com/shopping/women/valentino-valentino-garavani-open-sneakers-item-13563167.aspx,695,0,Women,Made in Italy,Shoes
https://www.farfetch.com/shopping/women/joseph-slim-fit-trousers-item-13558158.aspx,252,50,Women,Imported,Clothing
https://www.farfetch.com/shopping/women/joseph-slim-fit-tank-top-item-13814471.aspx,259,40,Women,Imported,Clothing
https://www.farfetch.com/shopping/women/joseph-scoop-neck-vest-top-item-13814472.aspx,259,40,Women,Imported,Clothing
https://www.farfetch.com/shopping/women/lela-rose-bow-neck-short-dress-item-13500873.aspx,1490,30,Women,Made in United States,Clothing


In [16]:
similarity_matrix = pd.read_pickle("../Data/content_similarity_matrix_GMC.pkl")
print(similarity_matrix.shape)
similarity_matrix

(13518, 13518)


URL,https://www.farfetch.com/shopping/women/valentino-valentino-garavani-open-sneakers-item-13563167.aspx,https://www.farfetch.com/shopping/women/joseph-slim-fit-trousers-item-13558158.aspx,https://www.farfetch.com/shopping/women/joseph-slim-fit-tank-top-item-13814471.aspx,https://www.farfetch.com/shopping/women/joseph-scoop-neck-vest-top-item-13814472.aspx,https://www.farfetch.com/shopping/women/lela-rose-bow-neck-short-dress-item-13500873.aspx,https://www.farfetch.com/shopping/women/versace-il-tempio-printed-shirt-item-13577269.aspx,https://www.farfetch.com/shopping/women/gucci-gg-marmont-matelasse-leather-super-mini-bag-item-12301550.aspx,https://www.farfetch.com/shopping/women/uma-wang-reconstructed-dress-item-13937869.aspx,https://www.farfetch.com/shopping/women/dolce-gabbana-love-beanie-hat-item-13274741.aspx,https://www.farfetch.com/shopping/women/amapo-flared-jeans-item-12656634.aspx,...,https://www.farfetch.com/shopping/women/gucci-kids-childrens-original-gg-canvas-hat-item-13681559.aspx,https://www.farfetch.com/shopping/women/zanellato-medium-postina-tote-item-12280563.aspx,https://www.farfetch.com/shopping/women/emporio-armani-logo-windbreaker-item-13857873.aspx,https://www.farfetch.com/shopping/women/buffalo-tan-1339-classic-platform-sneakers-item-12608291.aspx,https://www.farfetch.com/shopping/women/prada-turtleneck-fitted-jumper-item-13418237.aspx,https://www.farfetch.com/shopping/women/max-mara-one-shoulder-jumpsuit-item-13851424.aspx,https://www.farfetch.com/shopping/women/yuul-yie-mi-pump-slingback-strap-pumps-item-13063614.aspx,https://www.farfetch.com/shopping/women/saint-laurent-monogram-wallet-on-chain-item-12332221.aspx,https://www.farfetch.com/shopping/women/marsell-paraccia-hi-top-sneakers-item-13007717.aspx,https://www.farfetch.com/shopping/women/steffen-schraut-flared-denim-skirt-item-13700157.aspx
URL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
https://www.farfetch.com/shopping/women/valentino-valentino-garavani-open-sneakers-item-13563167.aspx,1.000000,0.980703,0.988200,0.988200,0.999793,0.999379,0.999998,0.999310,0.995137,0.990253,...,0.999974,0.999997,0.999959,0.999977,0.999998,0.999866,0.994510,0.999998,0.998605,0.990342
https://www.farfetch.com/shopping/women/joseph-slim-fit-trousers-item-13558158.aspx,0.980703,1.000000,0.999075,0.999075,0.984452,0.986965,0.980708,0.987279,0.995173,0.998368,...,0.980622,0.980694,0.980645,0.980650,0.980715,0.983745,0.995769,0.980716,0.989642,0.998331
https://www.farfetch.com/shopping/women/joseph-slim-fit-tank-top-item-13814471.aspx,0.988200,0.999075,1.000000,1.000000,0.991092,0.992969,0.988204,0.993200,0.998463,0.999899,...,0.988130,0.988193,0.988154,0.988158,0.988210,0.990555,0.998794,0.988210,0.994894,0.999890
https://www.farfetch.com/shopping/women/joseph-scoop-neck-vest-top-item-13814472.aspx,0.988200,0.999075,1.000000,1.000000,0.991092,0.992969,0.988204,0.993200,0.998463,0.999899,...,0.988130,0.988193,0.988154,0.988158,0.988210,0.990555,0.998794,0.988210,0.994894,0.999890
https://www.farfetch.com/shopping/women/lela-rose-bow-neck-short-dress-item-13500873.aspx,0.999793,0.984452,0.991092,0.991092,1.000000,0.999887,0.999795,0.999857,0.996920,0.992862,...,0.999762,0.999790,0.999756,0.999760,0.999796,0.999990,0.996416,0.999797,0.999467,0.992939
https://www.farfetch.com/shopping/women/versace-il-tempio-printed-shirt-item-13577269.aspx,0.999379,0.986965,0.992969,0.992969,0.999887,1.000000,0.999381,0.999998,0.997980,0.994529,...,0.999347,0.999376,0.999342,0.999339,0.999383,0.999818,0.997566,0.999384,0.999844,0.994596
https://www.farfetch.com/shopping/women/gucci-gg-marmont-matelasse-leather-super-mini-bag-item-12301550.aspx,0.999998,0.980708,0.988204,0.988204,0.999795,0.999381,1.000000,0.999312,0.995140,0.990256,...,0.999974,0.999999,0.999961,0.999970,0.999999,0.999867,0.994509,1.000000,0.998606,0.990346
https://www.farfetch.com/shopping/women/uma-wang-reconstructed-dress-item-13937869.aspx,0.999310,0.987279,0.993200,0.993200,0.999857,0.999998,0.999312,1.000000,0.998103,0.994733,...,0.999277,0.999307,0.999267,0.999271,0.999313,0.999780,0.997702,0.999314,0.999875,0.994799
https://www.farfetch.com/shopping/women/dolce-gabbana-love-beanie-hat-item-13274741.aspx,0.995137,0.995173,0.998463,0.998463,0.996920,0.997980,0.995140,0.998103,1.000000,0.999132,...,0.995091,0.995133,0.995067,0.995084,0.995141,0.996601,0.999968,0.995143,0.998942,0.999159
https://www.farfetch.com/shopping/women/amapo-flared-jeans-item-12656634.aspx,0.990253,0.998368,0.999899,0.999899,0.992862,0.994529,0.990256,0.994733,0.999132,1.000000,...,0.990184,0.990247,0.990217,0.990221,0.990263,0.992383,0.999378,0.990262,0.996206,1.000000


In [17]:
similarity_matrix.describe()

URL,https://www.farfetch.com/shopping/women/valentino-valentino-garavani-open-sneakers-item-13563167.aspx,https://www.farfetch.com/shopping/women/joseph-slim-fit-trousers-item-13558158.aspx,https://www.farfetch.com/shopping/women/joseph-slim-fit-tank-top-item-13814471.aspx,https://www.farfetch.com/shopping/women/joseph-scoop-neck-vest-top-item-13814472.aspx,https://www.farfetch.com/shopping/women/lela-rose-bow-neck-short-dress-item-13500873.aspx,https://www.farfetch.com/shopping/women/versace-il-tempio-printed-shirt-item-13577269.aspx,https://www.farfetch.com/shopping/women/gucci-gg-marmont-matelasse-leather-super-mini-bag-item-12301550.aspx,https://www.farfetch.com/shopping/women/uma-wang-reconstructed-dress-item-13937869.aspx,https://www.farfetch.com/shopping/women/dolce-gabbana-love-beanie-hat-item-13274741.aspx,https://www.farfetch.com/shopping/women/amapo-flared-jeans-item-12656634.aspx,...,https://www.farfetch.com/shopping/women/gucci-kids-childrens-original-gg-canvas-hat-item-13681559.aspx,https://www.farfetch.com/shopping/women/zanellato-medium-postina-tote-item-12280563.aspx,https://www.farfetch.com/shopping/women/emporio-armani-logo-windbreaker-item-13857873.aspx,https://www.farfetch.com/shopping/women/buffalo-tan-1339-classic-platform-sneakers-item-12608291.aspx,https://www.farfetch.com/shopping/women/prada-turtleneck-fitted-jumper-item-13418237.aspx,https://www.farfetch.com/shopping/women/max-mara-one-shoulder-jumpsuit-item-13851424.aspx,https://www.farfetch.com/shopping/women/yuul-yie-mi-pump-slingback-strap-pumps-item-13063614.aspx,https://www.farfetch.com/shopping/women/saint-laurent-monogram-wallet-on-chain-item-12332221.aspx,https://www.farfetch.com/shopping/women/marsell-paraccia-hi-top-sneakers-item-13007717.aspx,https://www.farfetch.com/shopping/women/steffen-schraut-flared-denim-skirt-item-13700157.aspx
count,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,...,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0,13518.0
mean,0.987041,0.986106,0.989585,0.989585,0.988709,0.989686,0.987043,0.989796,0.991355,0.990323,...,0.986993,0.987035,0.987012,0.987009,0.987046,0.988413,0.991313,0.987047,0.99055,0.990352
std,0.035159,0.017494,0.020102,0.020102,0.032946,0.031332,0.035156,0.031123,0.02485,0.021196,...,0.035182,0.035163,0.035174,0.035178,0.035154,0.033381,0.02426,0.035152,0.029463,0.021249
min,0.392873,0.564953,0.528963,0.528963,0.411361,0.424941,0.392898,0.426701,0.481443,0.516945,...,0.392703,0.392833,0.392664,0.39268,0.392913,0.407705,0.48689,0.392939,0.440822,0.516395
25%,0.991012,0.980685,0.988177,0.988177,0.993503,0.995089,0.99101,0.99528,0.995052,0.990228,...,0.990951,0.990999,0.990977,0.990963,0.991012,0.99304,0.994449,0.991016,0.996604,0.990318
50%,0.99861,0.98739,0.99194,0.99194,0.99935,0.999296,0.998612,0.99924,0.99514,0.992755,...,0.998569,0.998606,0.998588,0.998597,0.998615,0.999249,0.994513,0.998615,0.998588,0.992819
75%,0.999974,0.995324,0.997554,0.997554,0.999785,0.999381,0.999973,0.999312,0.999072,0.998107,...,0.999946,0.999972,0.999957,0.99996,0.999974,0.999855,0.998993,0.999972,0.999274,0.998131
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
