# Project: Category prediction of IKEA furniture

## Data Visualization

In [68]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

%matplotlib inline
plt.style.use('ggplot') # emulate pretty r-style plots

In [79]:
#read the data and delete the index of entry
IKEA_df = pd.read_csv('IKEA_SA_Furniture_Web_Scrapings_sss.csv')
IKEA_df = IKEA_df.drop(IKEA_df.columns[0],axis =1)

In [70]:
IKEA_df.shape

(3694, 13)

In [71]:
IKEA_df.dtypes

item_id                int64
name                  object
category              object
price                float64
old_price             object
sellable_online         bool
link                  object
other_colors          object
short_description     object
designer              object
depth                float64
height               float64
width                float64
dtype: object

In [72]:
IKEA_df.count()

item_id              3694
name                 3694
category             3694
price                3694
old_price            3694
sellable_online      3694
link                 3694
other_colors         3694
short_description    3694
designer             3694
depth                2231
height               2706
width                3105
dtype: int64

In [73]:
IKEA_df.describe().round(2)

Unnamed: 0,item_id,price,depth,height,width
count,3694.0,3694.0,2231.0,2706.0,3105.0
mean,48632396.79,1078.21,54.38,101.68,104.47
std,28887094.1,1374.65,29.96,61.1,71.13
min,58487.0,3.0,1.0,1.0,1.0
25%,20390574.0,180.9,38.0,67.0,60.0
50%,49288078.0,544.7,47.0,83.0,80.0
75%,70403572.75,1429.5,60.0,124.0,140.0
max,99932615.0,9585.0,257.0,700.0,420.0


In [91]:
nun =  IKEA_df['category'].nunique()
print(IKEA_df['category'].unique())
print('There are ' + str(nun) + ' categories in total' )

['Bar furniture' 'Beds' 'Bookcases & shelving units'
 'Cabinets & cupboards' 'Café furniture' 'Chairs'
 'Chests of drawers & drawer units' "Children's furniture"
 'Nursery furniture' 'Outdoor furniture' 'Room dividers'
 'Sideboards, buffets & console tables' 'Sofas & armchairs'
 'Tables & desks' 'Trolleys' 'TV & media furniture' 'Wardrobes']
We have 17 categories in total


In [131]:
nun =  IKEA_df['designer'].nunique()
#print(IKEA_df['designer'].unique())
print('There are ' + str(nun) + ' designers in total' )

There are 381 designers in total


In [110]:
IKEA_df['designer_len'] = IKEA_df['designer'].str.len() 
IKEA_df['designer_len'].describe()

In [134]:
IKEA_df[IKEA_df['designer_len'] > 50].shape

(166, 14)

In [133]:
IKEA_df[IKEA_df['designer_len'] > 50].head(2)

Unnamed: 0,item_id,name,category,price,old_price,sellable_online,link,other_colors,short_description,designer,depth,height,width,designer_len
51,50468953,VATTVIKEN,Beds,995.0,No old price,True,https://www.ikea.com/sa/en/p/vattviken-armchai...,False,Armchair-bed,504.689.53 Small and easy-to-place chair-bed w...,83.0,86.0,92.0,403
77,90331091,RIDABU,Beds,100.0,SR 125,True,https://www.ikea.com/sa/en/p/ridabu-mirror-doo...,False,"Mirror door, 40x120 cm",903.310.91 The door can be hung with the openi...,,120.0,40.0,199


## Data Cleaning

In [81]:
IKEA_df['other_colors'] = IKEA_df['other_colors'].map({'Yes': True, 'No': False})

In [82]:
IKEA_df[IKEA_df.isnull().any(axis=1)].shape

(1795, 13)

In [86]:
IKEA_df[IKEA_df.isnull().any(axis=1)]

Unnamed: 0,item_id,name,category,price,old_price,sellable_online,link,other_colors,short_description,designer,depth,height,width
0,90420332,FREKVENS,Bar furniture,265.0,No old price,True,https://www.ikea.com/sa/en/p/frekvens-bar-tabl...,False,"Bar table, in/outdoor, 51x51 cm",Nicholai Wiig Hansen,,99.0,51.0
1,368814,NORDVIKEN,Bar furniture,995.0,No old price,False,https://www.ikea.com/sa/en/p/nordviken-bar-tab...,False,"Bar table, 140x80 cm",Francis Cayouette,,105.0,80.0
2,9333523,NORDVIKEN / NORDVIKEN,Bar furniture,2095.0,No old price,False,https://www.ikea.com/sa/en/p/nordviken-nordvik...,False,Bar table and 4 bar stools,Francis Cayouette,,,
7,60155602,DALFRED,Bar furniture,195.0,No old price,True,https://www.ikea.com/sa/en/p/dalfred-bar-stool...,False,"Bar stool, 63-74 cm",Sarah Fager,50.0,,50.0
9,69304221,EKEDALEN / EKEDALEN,Bar furniture,2176.0,"SR 2,375",True,https://www.ikea.com/sa/en/p/ekedalen-ekedalen...,False,Bar table and 4 bar stools,Ehlén Johansson,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3689,99157902,ELVARLI,Wardrobes,750.0,SR 820,True,https://www.ikea.com/sa/en/p/elvarli-1-section...,False,"1 section, 92x51x222-350 cm",Ehlén Johansson,50.0,,91.0
3690,9158152,ELVARLI,Wardrobes,1572.0,"SR 1,755",True,https://www.ikea.com/sa/en/p/elvarli-2-section...,False,"2 sections, 135x51x222-350 cm",Ehlén Johansson,50.0,,135.0
3691,59157541,ELVARLI,Wardrobes,924.0,"SR 1,050",True,https://www.ikea.com/sa/en/p/elvarli-2-section...,False,"2 sections, 175x51x222-350 cm",Ehlén Johansson,50.0,,175.0
3692,89157573,ELVARLI,Wardrobes,2745.0,"SR 3,130",True,https://www.ikea.com/sa/en/p/elvarli-3-section...,False,"3 sections, 178x51x222-350 cm",Ehlén Johansson,50.0,,178.0


In [83]:
size = ['depth','height','width']
IKEA_df[IKEA_df[size].isnull().all(axis=1)]

Unnamed: 0,item_id,name,category,price,old_price,sellable_online,link,other_colors,short_description,designer,depth,height,width
2,9333523,NORDVIKEN / NORDVIKEN,Bar furniture,2095.0,No old price,False,https://www.ikea.com/sa/en/p/nordviken-nordvik...,False,Bar table and 4 bar stools,Francis Cayouette,,,
9,69304221,EKEDALEN / EKEDALEN,Bar furniture,2176.0,"SR 2,375",True,https://www.ikea.com/sa/en/p/ekedalen-ekedalen...,False,Bar table and 4 bar stools,Ehlén Johansson,,,
28,29297227,STENSELE / NORRARYD,Bar furniture,1340.0,No old price,True,https://www.ikea.com/sa/en/p/stensele-norraryd...,False,Bar table and 2 bar stools,Nike Karlsson/Maja Ganszyniec,,,
32,19297275,NORRÅKER / NORRÅKER,Bar furniture,1266.0,"SR 1,385",True,https://www.ikea.com/sa/en/p/norraker-norraker...,False,"Bar table and 2 bar stools, 7...",J Karlsson/N Karlsson,,,
34,20336619,HENRIKSDAL,Bar furniture,40.0,No old price,True,https://www.ikea.com/sa/en/p/henriksdal-cover-...,False,Cover for bar stool with backrest,IKEA of Sweden,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3446,30323598,BILLSBRO,TV & media furniture,49.0,No old price,True,https://www.ikea.com/sa/en/p/billsbro-handle-s...,False,"Handle, 1880 mm",H Preutz/A Fredriksson,,,
3447,80414256,VASSVIKEN,TV & media furniture,100.0,No old price,True,https://www.ikea.com/sa/en/p/vassviken-door-dr...,False,"Door/drawer front, 60x38 cm",IKEA of Sweden,,,
3450,60334312,BILLSBRO,TV & media furniture,35.0,No old price,True,https://www.ikea.com/sa/en/p/billsbro-handle-w...,False,"Handle, 320 mm",H Preutz/A Fredriksson,,,
3576,89325289,PLATSA,Wardrobes,450.0,SR 475,True,https://www.ikea.com/sa/en/p/platsa-open-shelv...,False,"Open shelving unit, 140x40x63 cm",Ola Wihlborg/IKEA of Sweden,,,
