In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy
from nanoid import generate


def nano_id():
    return generate(
        '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
        size=12)

# Total Overview

In [2]:
data = pd.read_csv('../resource/product_data.csv', index_col=0)
data.shape

(10771, 5)

In [3]:
data.head(10)

Unnamed: 0,Product ID,Category,Sub-Category,Product Name,Unit Price
0,OFF-TEN-10000025,Office Supplies,Storage,"Tenex Lockers, Blue",204.15
1,OFF-SU-10000618,Office Supplies,Supplies,"Acme Trimmer, High Speed",44.58
2,OFF-TEN-10001585,Office Supplies,Storage,"Tenex Box, Single Width",16.53
3,OFF-PA-10001492,Office Supplies,Paper,"Enermax Note Cards, Premium",29.91
4,FUR-FU-10003447,Furniture,Furnishings,"Eldon Light Bulb, Duo Pack",25.26
5,OFF-PA-10001968,Office Supplies,Paper,"Eaton Computer Printout Paper, 8.5 x 11",30.69
6,TEC-CO-10002316,Technology,Copiers,"Brother Personal Copier, Laser",142.89
7,FUR-BO-10003966,Furniture,Bookcases,"Sauder Facets Collection Library, Sky Alder Fi...",170.98
8,OFF-FEL-10001541,Office Supplies,Storage,"Fellowes Lockers, Wire Frame",206.4
9,OFF-ST-10002161,Office Supplies,Storage,"Tenex Trays, Single Width",54.24


In [4]:
data = data.drop(['Product ID'], axis=1)
data = data.rename(columns={'Category': 'category_name', 'Sub-Category': 'second_category_name', 'Product Name': 'product_name', 'Unit Price': 'unit_price'})
data['category_id'] = ''
data['second_category_id'] = ''
data['third_category_id'] = ''
data['third_category_name'] = ''
data['product_id'] = ''
# data['image_url'] = ''
data = data[['product_id', 'product_name', 'category_id', 'category_name', 'second_category_id', 'second_category_name',
              'third_category_id', 'third_category_name', 'unit_price']]

In [5]:
data.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
0,,"Tenex Lockers, Blue",,Office Supplies,,Storage,,,204.15
1,,"Acme Trimmer, High Speed",,Office Supplies,,Supplies,,,44.58
2,,"Tenex Box, Single Width",,Office Supplies,,Storage,,,16.53
3,,"Enermax Note Cards, Premium",,Office Supplies,,Paper,,,29.91
4,,"Eldon Light Bulb, Duo Pack",,Furniture,,Furnishings,,,25.26
5,,"Eaton Computer Printout Paper, 8.5 x 11",,Office Supplies,,Paper,,,30.69
6,,"Brother Personal Copier, Laser",,Technology,,Copiers,,,142.89
7,,"Sauder Facets Collection Library, Sky Alder Fi...",,Furniture,,Bookcases,,,170.98
8,,"Fellowes Lockers, Wire Frame",,Office Supplies,,Storage,,,206.4
9,,"Tenex Trays, Single Width",,Office Supplies,,Storage,,,54.24


In [6]:
data['category_name'].value_counts()

Office Supplies    5957
Technology         2484
Furniture          2330
Name: category_name, dtype: int64

# Office Supplies

In [7]:
office_supplies = deepcopy(data[data['category_name'] == 'Office Supplies'])
office_supplies.shape

(5957, 9)

In [8]:
office_supplies.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
0,,"Tenex Lockers, Blue",,Office Supplies,,Storage,,,204.15
1,,"Acme Trimmer, High Speed",,Office Supplies,,Supplies,,,44.58
2,,"Tenex Box, Single Width",,Office Supplies,,Storage,,,16.53
3,,"Enermax Note Cards, Premium",,Office Supplies,,Paper,,,29.91
5,,"Eaton Computer Printout Paper, 8.5 x 11",,Office Supplies,,Paper,,,30.69
8,,"Fellowes Lockers, Wire Frame",,Office Supplies,,Storage,,,206.4
9,,"Tenex Trays, Single Width",,Office Supplies,,Storage,,,54.24
10,,"KitchenAid Coffee Grinder, Red",,Office Supplies,,Appliances,,,70.47
11,,"Hamilton Beach Refrigerator, Silver",,Office Supplies,,Appliances,,,333.92
13,,"Binney & Smith Pencil Sharpener, Easy-Erase",,Office Supplies,,Art,,,27.93


In [9]:
office_supplies['second_category_name'].value_counts()

Paper         825
Binders       781
Art           708
Storage       684
Envelopes     613
Labels        602
Appliances    587
Fasteners     583
Supplies      574
Name: second_category_name, dtype: int64

In [10]:
office_supplies.loc[:, 'category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Paper', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Binders', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Art', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Storage', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Envelopes', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Labels', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Appliances', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Fasteners', 'second_category_id'] = nano_id()
office_supplies.loc[office_supplies['second_category_name'] == 'Supplies', 'second_category_id'] = nano_id()

In [11]:
office_supplies.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
0,,"Tenex Lockers, Blue",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,,,204.15
1,,"Acme Trimmer, High Speed",O8xD8rO2X816,Office Supplies,KVraCWQHdXOj,Supplies,,,44.58
2,,"Tenex Box, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,,,16.53
3,,"Enermax Note Cards, Premium",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,,,29.91
5,,"Eaton Computer Printout Paper, 8.5 x 11",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,,,30.69
8,,"Fellowes Lockers, Wire Frame",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,,,206.4
9,,"Tenex Trays, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,,,54.24
10,,"KitchenAid Coffee Grinder, Red",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,,,70.47
11,,"Hamilton Beach Refrigerator, Silver",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,,,333.92
13,,"Binney & Smith Pencil Sharpener, Easy-Erase",O8xD8rO2X816,Office Supplies,iqGEIzIl1stw,Art,,,27.93


In [12]:
office_supplies['second_category_id'].value_counts()

WtzhL5cVilg6    825
N5ccdFcsnltk    781
iqGEIzIl1stw    708
e5a2vp9Bpczy    684
ZxKkOmS8NOKD    613
oCfS4bvy1LsH    602
hwpWTtn7Tl5o    587
C4AwzJFyQqMN    583
KVraCWQHdXOj    574
Name: second_category_id, dtype: int64

In [13]:
def random_assign_third_category(df, second_category_name, third_category_names, p):
    if (len(third_category_names) == 0 or len(p) == 0):
        raise ValueError('third_category_name and p must be non-empty lists')
    
    if (len(third_category_names) != len(p)):
        raise ValueError('Length of third_category_name and p must be equal')
    
    print(f'Assigning third category {third_category_names} for second category, {second_category_name}\n')

    row_len = len(df[df['second_category_name'] == second_category_name])

    print(f'Number of rows for {second_category_name}: {row_len}\n')

    random_third_category = np.random.choice(third_category_names, row_len, p=p)

    df.loc[df['second_category_name'] == second_category_name, 'third_category_name'] = random_third_category

    print(df[df['second_category_name'] == second_category_name].value_counts('third_category_name'))
    print('\n')

    for third_category_name in third_category_names:
        df.loc[(df['second_category_name'] == second_category_name) & 
                (df['third_category_name'] == third_category_name), 'third_category_id'] = nano_id()

    print(df[df['second_category_name'] == second_category_name].value_counts('third_category_id'))

## Paper

Paper has 4 third categories: Copy & Printer Paper, Photo Paper, Colored Paper and Large Quantity Paper.

In [14]:
random_assign_third_category(office_supplies, 
                            'Paper',
                            ['Copy & Printer Paper', 'Photo Paper', 'Colored Paper', 'Large Quantity Paper'], 
                            [0.5, 0.2, 0.2, 0.1] )

Assigning third category ['Copy & Printer Paper', 'Photo Paper', 'Colored Paper', 'Large Quantity Paper'] for second category, Paper

Number of rows for Paper: 825

third_category_name
Copy & Printer Paper    403
Colored Paper           174
Photo Paper             159
Large Quantity Paper     89
dtype: int64


third_category_id
0V6sLr6aGtuU    403
qqzbBdEGx2q0    174
MXZvWPGkLDmv    159
gi9OHivrGpvA     89
dtype: int64


## Binders

Binders has 3 third categories: A4 Binders, Pocket Binders and Mini Binders.

In [15]:
random_assign_third_category(office_supplies,
                            'Binders',
                            ['A4 Binders', 'Pocket Binders', 'Mini Binders'],
                            [0.5, 0.3, 0.2])

Assigning third category ['A4 Binders', 'Pocket Binders', 'Mini Binders'] for second category, Binders

Number of rows for Binders: 781

third_category_name
A4 Binders        409
Pocket Binders    224
Mini Binders      148
dtype: int64


third_category_id
gCtco1vmTVuQ    409
yqnCMUrXSiN3    224
38rg4WcUvmop    148
dtype: int64


## Art

Arts has 3 third categories: Oil Paints, Drawing Ink, Chalk

In [16]:
random_assign_third_category(office_supplies,
                            'Art',
                            ['Oil Paints', 'Drawing Ink', 'Chalk'],
                            [0.5, 0.3, 0.2])

Assigning third category ['Oil Paints', 'Drawing Ink', 'Chalk'] for second category, Art

Number of rows for Art: 708

third_category_name
Oil Paints     366
Drawing Ink    195
Chalk          147
dtype: int64


third_category_id
Tt8AQ7EawiV5    366
sEghhe5Z1x5Z    195
8rDeNB23qEHt    147
dtype: int64


## Storage

Storage has 3 third categories: Storage Bins & Boxes, File Folders, File Storage

In [17]:
random_assign_third_category(office_supplies,
                            'Storage',
                            ['Storage Bins & Boxes', 'File Folders', 'File Storage'],
                            [0.4, 0.3, 0.3])

Assigning third category ['Storage Bins & Boxes', 'File Folders', 'File Storage'] for second category, Storage

Number of rows for Storage: 684

third_category_name
Storage Bins & Boxes    270
File Folders            225
File Storage            189
dtype: int64


third_category_id
cGAY8PSjSybf    270
jS5eFcAuwcyS    225
gm6nCZ8aVa3g    189
dtype: int64


## Envelopes

Envelopes has 3 third categories: Business, Catalog, Security

In [18]:
random_assign_third_category(office_supplies,
                            'Envelopes',
                            ['Business', 'Catalog', 'Security'],
                            [0.2, 0.5, 0.3])

Assigning third category ['Business', 'Catalog', 'Security'] for second category, Envelopes

Number of rows for Envelopes: 613

third_category_name
Catalog     292
Security    185
Business    136
dtype: int64


third_category_id
yUwxpiq70mhM    292
HE4b01TkSJ1p    185
Hcw7hKx4Duob    136
dtype: int64


## Labels

Labels has 2 third categories: Address, Shipping, File

In [19]:
random_assign_third_category(office_supplies,
                            'Labels',
                            ['Address', 'Shipping', 'File'],
                            [0.6, 0.2, 0.2])

Assigning third category ['Address', 'Shipping', 'File'] for second category, Labels

Number of rows for Labels: 602

third_category_name
Address     375
File        117
Shipping    110
dtype: int64


third_category_id
SZidrffddp2x    375
4GP71kU2vHhr    117
1nVFuigFYNMQ    110
dtype: int64


## Appliances

Appliances has 1 third categories: Coffee, Water & Snacks

In [20]:
random_assign_third_category(office_supplies,
                            'Appliances',
                            ['Coffee, Water & Snacks'],                            
                            [1.0])

Assigning third category ['Coffee, Water & Snacks'] for second category, Appliances

Number of rows for Appliances: 587

third_category_name
Coffee, Water & Snacks    587
dtype: int64


third_category_id
LoI4lmPulhq4    587
dtype: int64


## Fasteners

Fasteners has 3 third categories: Clips & Fasteners, Hook and Loop Fasteners and Classification Folders

In [21]:
random_assign_third_category(office_supplies,
                            'Fasteners',
                            ['Clips & Fasteners', 'Hook and Loop Fasteners', 'Classification Folders'],
                            [0.3, 0.2, 0.5])

Assigning third category ['Clips & Fasteners', 'Hook and Loop Fasteners', 'Classification Folders'] for second category, Fasteners

Number of rows for Fasteners: 583

third_category_name
Classification Folders     287
Clips & Fasteners          180
Hook and Loop Fasteners    116
dtype: int64


third_category_id
rQn4fOmbQYV6    287
3u6ZiMAsyzWf    180
uQ4KU2vUp8RA    116
dtype: int64


## Supplies

Supplies has 3 third categories: Glue & Glue Sticks, Scissors and Decorative Tapes

In [22]:
random_assign_third_category(office_supplies,
                            'Supplies',
                            ['Glue & Glue Sticks', 'Scissors','Decorative Tapes'],
                            [0.3, 0.3, 0.4])

Assigning third category ['Glue & Glue Sticks', 'Scissors', 'Decorative Tapes'] for second category, Supplies

Number of rows for Supplies: 574

third_category_name
Decorative Tapes      224
Scissors              180
Glue & Glue Sticks    170
dtype: int64


third_category_id
Oj06cWFVJmoH    224
21WujeVWiMhJ    180
AABy1Yj6edjQ    170
dtype: int64


In [23]:
office_supplies.head(60)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
0,,"Tenex Lockers, Blue",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,cGAY8PSjSybf,Storage Bins & Boxes,204.15
1,,"Acme Trimmer, High Speed",O8xD8rO2X816,Office Supplies,KVraCWQHdXOj,Supplies,AABy1Yj6edjQ,Glue & Glue Sticks,44.58
2,,"Tenex Box, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,cGAY8PSjSybf,Storage Bins & Boxes,16.53
3,,"Enermax Note Cards, Premium",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,0V6sLr6aGtuU,Copy & Printer Paper,29.91
5,,"Eaton Computer Printout Paper, 8.5 x 11",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,0V6sLr6aGtuU,Copy & Printer Paper,30.69
8,,"Fellowes Lockers, Wire Frame",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,jS5eFcAuwcyS,File Folders,206.4
9,,"Tenex Trays, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,gm6nCZ8aVa3g,File Storage,54.24
10,,"KitchenAid Coffee Grinder, Red",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,LoI4lmPulhq4,"Coffee, Water & Snacks",70.47
11,,"Hamilton Beach Refrigerator, Silver",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,LoI4lmPulhq4,"Coffee, Water & Snacks",333.92
13,,"Binney & Smith Pencil Sharpener, Easy-Erase",O8xD8rO2X816,Office Supplies,iqGEIzIl1stw,Art,sEghhe5Z1x5Z,Drawing Ink,27.93


In [24]:
office_supplies.isnull().sum()

product_id              0
product_name            0
category_id             0
category_name           0
second_category_id      0
second_category_name    0
third_category_id       0
third_category_name     0
unit_price              0
dtype: int64

# Furniture

In [25]:
furniture = deepcopy(data[data['category_name'] == 'Furniture'])
furniture.shape

(2330, 9)

In [26]:
furniture.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
4,,"Eldon Light Bulb, Duo Pack",,Furniture,,Furnishings,,,25.26
7,,"Sauder Facets Collection Library, Sky Alder Fi...",,Furniture,,Bookcases,,,170.98
12,,"Advantus Photo Frame, Erganomic",,Furniture,,Furnishings,,,51.6
24,,"Safco Classic Bookcase, Traditional",,Furniture,,Bookcases,,,438.15
27,,Novimex Swivel Fabric Task Chair,,Furniture,,Chairs,,,150.98
28,,"Harbour Creations Bag Chairs, Red",,Furniture,,Chairs,,,61.62
29,,"Office Star Rocking Chair, Red",,Furniture,,Chairs,,,140.91
32,,Global Deluxe High-Back Manager's Chair,,Furniture,,Chairs,,,285.98
34,,Bevis Round Conference Room Tables and Bases,,Furniture,,Tables,,,179.29
36,,Harbour Creations Steel Folding Chair,,Furniture,,Chairs,,,86.25


In [27]:
furniture['second_category_name'].value_counts()

Furnishings    719
Chairs         647
Bookcases      583
Tables         381
Name: second_category_name, dtype: int64

In [28]:
furniture.loc[:, 'category_id'] = nano_id()
furniture.loc[furniture['second_category_name'] == 'Furnishings', 'second_category_id'] = nano_id()
furniture.loc[furniture['second_category_name'] == 'Chairs', 'second_category_id'] = nano_id()
furniture.loc[furniture['second_category_name'] == 'Bookcases', 'second_category_id'] = nano_id()
furniture.loc[furniture['second_category_name'] == 'Tables', 'second_category_id'] = nano_id()


In [29]:
furniture.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
4,,"Eldon Light Bulb, Duo Pack",TQMnS5aEiOTt,Furniture,uJujcjmijtWM,Furnishings,,,25.26
7,,"Sauder Facets Collection Library, Sky Alder Fi...",TQMnS5aEiOTt,Furniture,QJq4Lp1JUP97,Bookcases,,,170.98
12,,"Advantus Photo Frame, Erganomic",TQMnS5aEiOTt,Furniture,uJujcjmijtWM,Furnishings,,,51.6
24,,"Safco Classic Bookcase, Traditional",TQMnS5aEiOTt,Furniture,QJq4Lp1JUP97,Bookcases,,,438.15
27,,Novimex Swivel Fabric Task Chair,TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,,,150.98
28,,"Harbour Creations Bag Chairs, Red",TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,,,61.62
29,,"Office Star Rocking Chair, Red",TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,,,140.91
32,,Global Deluxe High-Back Manager's Chair,TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,,,285.98
34,,Bevis Round Conference Room Tables and Bases,TQMnS5aEiOTt,Furniture,E2zR8n3joH68,Tables,,,179.29
36,,Harbour Creations Steel Folding Chair,TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,,,86.25


In [30]:
furniture['second_category_id'].value_counts()

uJujcjmijtWM    719
a90C0BfQbneg    647
QJq4Lp1JUP97    583
E2zR8n3joH68    381
Name: second_category_id, dtype: int64

## Furnishings

Furnishings has 4 third categories: File Cabinets, Magazine Holders, Shelving, Magazine Holders

In [31]:
random_assign_third_category(furniture, 
                            'Furnishings',
                            ['File Cabinets', 'Magazine Holders', 'Shelving','Magazine Holders'], 
                            [0.3, 0.4, 0.2, 0.1] )

Assigning third category ['File Cabinets', 'Magazine Holders', 'Shelving', 'Magazine Holders'] for second category, Furnishings

Number of rows for Furnishings: 719

third_category_name
Magazine Holders    368
File Cabinets       209
Shelving            142
dtype: int64


third_category_id
YK07LphIAao5    368
iihlE9DOaPPG    209
wrABaPay7bWc    142
dtype: int64


## Chairs

Chairs has 3 third categories: Office Chairs, Game Chairs, Stacking Chairs

In [32]:
random_assign_third_category(furniture, 
                            'Chairs',
                            ['Office Chairs', 'Game Chairs', 'Stacking Chairs'], 
                            [0.3, 0.5, 0.2] )

Assigning third category ['Office Chairs', 'Game Chairs', 'Stacking Chairs'] for second category, Chairs

Number of rows for Chairs: 647

third_category_name
Game Chairs        319
Office Chairs      214
Stacking Chairs    114
dtype: int64


third_category_id
42mepTquRuAo    319
JVAkT98IxHCr    214
cn2Ru6EyCcMS    114
dtype: int64


## Bookcases

Bookcases has 4 third categories: Card Files, Cases & Holders, Desktop Organizers, Carts & Stands

In [33]:
random_assign_third_category(furniture, 
                            'Bookcases',
                            ['Card Files', 'Cases & Holders', 'Desktop Organizers', 'Carts & Stands'], 
                            [0.2, 0.3, 0.3, 0.2] )

Assigning third category ['Card Files', 'Cases & Holders', 'Desktop Organizers', 'Carts & Stands'] for second category, Bookcases

Number of rows for Bookcases: 583

third_category_name
Desktop Organizers    172
Cases & Holders       167
Carts & Stands        130
Card Files            114
dtype: int64


third_category_id
F0RTA6ViSJS7    172
itKSfJOXb5ua    167
E5MdaVxCdRXK    130
gUgpubVoW8ew    114
dtype: int64


## Tables

Tables has 3 third categories: Folding Tables, Office Desks and Accent & Coffee Tables

In [34]:
random_assign_third_category(furniture, 
                            'Tables',
                            ['Folding Tables', 'Office Desks','Accent & Coffee Tables'], 
                            [0.4, 0.3, 0.3] )

Assigning third category ['Folding Tables', 'Office Desks', 'Accent & Coffee Tables'] for second category, Tables

Number of rows for Tables: 381

third_category_name
Folding Tables            148
Office Desks              128
Accent & Coffee Tables    105
dtype: int64


third_category_id
AyKLpwQdYbZp    148
dU9u9SAO1uPO    128
PfeYajBaxsKf    105
dtype: int64


In [35]:
furniture.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
4,,"Eldon Light Bulb, Duo Pack",TQMnS5aEiOTt,Furniture,uJujcjmijtWM,Furnishings,wrABaPay7bWc,Shelving,25.26
7,,"Sauder Facets Collection Library, Sky Alder Fi...",TQMnS5aEiOTt,Furniture,QJq4Lp1JUP97,Bookcases,itKSfJOXb5ua,Cases & Holders,170.98
12,,"Advantus Photo Frame, Erganomic",TQMnS5aEiOTt,Furniture,uJujcjmijtWM,Furnishings,YK07LphIAao5,Magazine Holders,51.6
24,,"Safco Classic Bookcase, Traditional",TQMnS5aEiOTt,Furniture,QJq4Lp1JUP97,Bookcases,F0RTA6ViSJS7,Desktop Organizers,438.15
27,,Novimex Swivel Fabric Task Chair,TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,42mepTquRuAo,Game Chairs,150.98
28,,"Harbour Creations Bag Chairs, Red",TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,42mepTquRuAo,Game Chairs,61.62
29,,"Office Star Rocking Chair, Red",TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,42mepTquRuAo,Game Chairs,140.91
32,,Global Deluxe High-Back Manager's Chair,TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,JVAkT98IxHCr,Office Chairs,285.98
34,,Bevis Round Conference Room Tables and Bases,TQMnS5aEiOTt,Furniture,E2zR8n3joH68,Tables,AyKLpwQdYbZp,Folding Tables,179.29
36,,Harbour Creations Steel Folding Chair,TQMnS5aEiOTt,Furniture,a90C0BfQbneg,Chairs,42mepTquRuAo,Game Chairs,86.25


In [36]:
furniture.isnull().sum()

product_id              0
product_name            0
category_id             0
category_name           0
second_category_id      0
second_category_name    0
third_category_id       0
third_category_name     0
unit_price              0
dtype: int64

# Technology

In [37]:
technology = deepcopy(data[data['category_name'] == 'Technology'])
technology.shape

(2484, 9)

In [38]:
technology.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
6,,"Brother Personal Copier, Laser",,Technology,,Copiers,,,142.89
15,,"Memorex Memory Card, USB",,Technology,,Accessories,,,68.32
25,,"Logitech Router, Erganomic",,Technology,,Accessories,,,245.13
31,,"Panasonic Receipt Printer, White",,Technology,,Machines,,,119.31
33,,"Sharp Fax Machine, High-Speed",,Technology,,Copiers,,,293.85
39,,"Apple Speaker Phone, with Caller ID",,Technology,,Phones,,,123.3
41,,"Konica Printer, White",,Technology,,Machines,,,266.25
45,,"Belkin Keyboard, USB",,Technology,,Accessories,,,84.09
46,,"Cisco Signal Booster, Full Size",,Technology,,Phones,,,153.12
54,,"StarTech Receipt Printer, Wireless",,Technology,,Machines,,,110.58


In [39]:
technology['second_category_name'].value_counts()

Phones         733
Accessories    700
Copiers        537
Machines       514
Name: second_category_name, dtype: int64

In [40]:
technology.loc[:, 'category_id'] = nano_id()
technology.loc[technology['second_category_name'] == 'Phones', 'second_category_id'] = nano_id()
technology.loc[technology['second_category_name'] == 'Accessories', 'second_category_id'] = nano_id()
technology.loc[technology['second_category_name'] == 'Copiers', 'second_category_id'] = nano_id()
technology.loc[technology['second_category_name'] == 'Machines', 'second_category_id'] = nano_id()

In [41]:
technology['second_category_id'].value_counts()

UdyjL8htbIem    733
Ia6gM6hxs1Ow    700
O6uSgzSb85HK    537
iXwl6cYOBkSZ    514
Name: second_category_id, dtype: int64

In [42]:
technology.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
6,,"Brother Personal Copier, Laser",cI0l0hb5Knmv,Technology,O6uSgzSb85HK,Copiers,,,142.89
15,,"Memorex Memory Card, USB",cI0l0hb5Knmv,Technology,Ia6gM6hxs1Ow,Accessories,,,68.32
25,,"Logitech Router, Erganomic",cI0l0hb5Knmv,Technology,Ia6gM6hxs1Ow,Accessories,,,245.13
31,,"Panasonic Receipt Printer, White",cI0l0hb5Knmv,Technology,iXwl6cYOBkSZ,Machines,,,119.31
33,,"Sharp Fax Machine, High-Speed",cI0l0hb5Knmv,Technology,O6uSgzSb85HK,Copiers,,,293.85
39,,"Apple Speaker Phone, with Caller ID",cI0l0hb5Knmv,Technology,UdyjL8htbIem,Phones,,,123.3
41,,"Konica Printer, White",cI0l0hb5Knmv,Technology,iXwl6cYOBkSZ,Machines,,,266.25
45,,"Belkin Keyboard, USB",cI0l0hb5Knmv,Technology,Ia6gM6hxs1Ow,Accessories,,,84.09
46,,"Cisco Signal Booster, Full Size",cI0l0hb5Knmv,Technology,UdyjL8htbIem,Phones,,,153.12
54,,"StarTech Receipt Printer, Wireless",cI0l0hb5Knmv,Technology,iXwl6cYOBkSZ,Machines,,,110.58


## Phones

Phone has 4 third categories: Office Phones, Landline Phones, Mobile Phones, Smartphones

In [43]:
random_assign_third_category(technology, 
                            'Phones',
                            ['Office Phones', 'Landline Phones', 'Mobile Phones', 'Smartphones'], 
                            [0.3, 0.2, 0.3, 0.2] )

Assigning third category ['Office Phones', 'Landline Phones', 'Mobile Phones', 'Smartphones'] for second category, Phones

Number of rows for Phones: 733

third_category_name
Office Phones      226
Mobile Phones      214
Landline Phones    181
Smartphones        112
dtype: int64


third_category_id
LOYgbvPNm5UK    226
kiI3QVPZx20S    214
7I8BWW492grp    181
04Bmug672M1i    112
dtype: int64


## Accessories

Accessories has 3 third categories: Mobile Accessories, Metworking & Wi-Fi, Hard Drives & Data Storage

In [44]:
random_assign_third_category(technology, 
                            'Accessories',
                            ['Mobile Accessories', 'Metworking & Wi-Fi', 'Hard Drives & Data Storage'], 
                            [0.3, 0.4, 0.3] )

Assigning third category ['Mobile Accessories', 'Metworking & Wi-Fi', 'Hard Drives & Data Storage'] for second category, Accessories

Number of rows for Accessories: 700

third_category_name
Metworking & Wi-Fi            300
Hard Drives & Data Storage    219
Mobile Accessories            181
dtype: int64


third_category_id
zH25BJILUbiu    300
HEb20LvSltS6    219
uAWgBRLwIEl1    181
dtype: int64


## Copiers

Copiers has 3 third categories: All-in-One Printers, Laser Printers, Inkjet Printers

In [45]:
random_assign_third_category(technology, 
                            'Copiers',
                            ['All-in-One Printers', 'Laser Printers', 'Inkjet Printers'], 
                            [0.2, 0.5, 0.3] )

Assigning third category ['All-in-One Printers', 'Laser Printers', 'Inkjet Printers'] for second category, Copiers

Number of rows for Copiers: 537

third_category_name
Laser Printers         249
Inkjet Printers        177
All-in-One Printers    111
dtype: int64


third_category_id
muJZOG51p9Pv    249
cLhuwQSbME5n    177
R3QMOyrkSbft    111
dtype: int64


## Machines 

Machine has 3 third categories: Die Cutting, Fax Machines, Shredders

In [46]:
random_assign_third_category(technology, 
                            'Machines',
                            ['Die Cutting', 'Fax Machines', 'Shredders'], 
                            [0.2, 0.4, 0.4] )

Assigning third category ['Die Cutting', 'Fax Machines', 'Shredders'] for second category, Machines

Number of rows for Machines: 514

third_category_name
Fax Machines    210
Shredders       209
Die Cutting      95
dtype: int64


third_category_id
06uQsYBqsU57    210
kRNDYMvkwWTF    209
tURn4OznViZs     95
dtype: int64


In [47]:
technology.head(10)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
6,,"Brother Personal Copier, Laser",cI0l0hb5Knmv,Technology,O6uSgzSb85HK,Copiers,muJZOG51p9Pv,Laser Printers,142.89
15,,"Memorex Memory Card, USB",cI0l0hb5Knmv,Technology,Ia6gM6hxs1Ow,Accessories,zH25BJILUbiu,Metworking & Wi-Fi,68.32
25,,"Logitech Router, Erganomic",cI0l0hb5Knmv,Technology,Ia6gM6hxs1Ow,Accessories,zH25BJILUbiu,Metworking & Wi-Fi,245.13
31,,"Panasonic Receipt Printer, White",cI0l0hb5Knmv,Technology,iXwl6cYOBkSZ,Machines,kRNDYMvkwWTF,Shredders,119.31
33,,"Sharp Fax Machine, High-Speed",cI0l0hb5Knmv,Technology,O6uSgzSb85HK,Copiers,muJZOG51p9Pv,Laser Printers,293.85
39,,"Apple Speaker Phone, with Caller ID",cI0l0hb5Knmv,Technology,UdyjL8htbIem,Phones,7I8BWW492grp,Landline Phones,123.3
41,,"Konica Printer, White",cI0l0hb5Knmv,Technology,iXwl6cYOBkSZ,Machines,tURn4OznViZs,Die Cutting,266.25
45,,"Belkin Keyboard, USB",cI0l0hb5Knmv,Technology,Ia6gM6hxs1Ow,Accessories,zH25BJILUbiu,Metworking & Wi-Fi,84.09
46,,"Cisco Signal Booster, Full Size",cI0l0hb5Knmv,Technology,UdyjL8htbIem,Phones,04Bmug672M1i,Smartphones,153.12
54,,"StarTech Receipt Printer, Wireless",cI0l0hb5Knmv,Technology,iXwl6cYOBkSZ,Machines,06uQsYBqsU57,Fax Machines,110.58


In [48]:
technology.isnull().sum()

product_id              0
product_name            0
category_id             0
category_name           0
second_category_id      0
second_category_name    0
third_category_id       0
third_category_name     0
unit_price              0
dtype: int64

In [49]:
result = pd.concat([office_supplies, furniture, technology])
result.shape

(10771, 9)

In [50]:
result.head(20)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
0,,"Tenex Lockers, Blue",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,cGAY8PSjSybf,Storage Bins & Boxes,204.15
1,,"Acme Trimmer, High Speed",O8xD8rO2X816,Office Supplies,KVraCWQHdXOj,Supplies,AABy1Yj6edjQ,Glue & Glue Sticks,44.58
2,,"Tenex Box, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,cGAY8PSjSybf,Storage Bins & Boxes,16.53
3,,"Enermax Note Cards, Premium",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,0V6sLr6aGtuU,Copy & Printer Paper,29.91
5,,"Eaton Computer Printout Paper, 8.5 x 11",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,0V6sLr6aGtuU,Copy & Printer Paper,30.69
8,,"Fellowes Lockers, Wire Frame",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,jS5eFcAuwcyS,File Folders,206.4
9,,"Tenex Trays, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,gm6nCZ8aVa3g,File Storage,54.24
10,,"KitchenAid Coffee Grinder, Red",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,LoI4lmPulhq4,"Coffee, Water & Snacks",70.47
11,,"Hamilton Beach Refrigerator, Silver",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,LoI4lmPulhq4,"Coffee, Water & Snacks",333.92
13,,"Binney & Smith Pencil Sharpener, Easy-Erase",O8xD8rO2X816,Office Supplies,iqGEIzIl1stw,Art,sEghhe5Z1x5Z,Drawing Ink,27.93


In [51]:
product_random_ids = result['product_id'].apply(lambda x: nano_id())

In [52]:
result.loc[:, 'product_id'] = product_random_ids

In [53]:
result.head(20)

Unnamed: 0,product_id,product_name,category_id,category_name,second_category_id,second_category_name,third_category_id,third_category_name,unit_price
0,bguGYmIzBXeF,"Tenex Lockers, Blue",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,cGAY8PSjSybf,Storage Bins & Boxes,204.15
1,bb4yPyycNt30,"Acme Trimmer, High Speed",O8xD8rO2X816,Office Supplies,KVraCWQHdXOj,Supplies,AABy1Yj6edjQ,Glue & Glue Sticks,44.58
2,6frP11X2Lk5L,"Tenex Box, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,cGAY8PSjSybf,Storage Bins & Boxes,16.53
3,EffF7kN7qGHd,"Enermax Note Cards, Premium",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,0V6sLr6aGtuU,Copy & Printer Paper,29.91
5,PlkqzI360PyP,"Eaton Computer Printout Paper, 8.5 x 11",O8xD8rO2X816,Office Supplies,WtzhL5cVilg6,Paper,0V6sLr6aGtuU,Copy & Printer Paper,30.69
8,gSawXTJpxRfr,"Fellowes Lockers, Wire Frame",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,jS5eFcAuwcyS,File Folders,206.4
9,FNK4N0oBgIcn,"Tenex Trays, Single Width",O8xD8rO2X816,Office Supplies,e5a2vp9Bpczy,Storage,gm6nCZ8aVa3g,File Storage,54.24
10,a8qliXBl9LNF,"KitchenAid Coffee Grinder, Red",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,LoI4lmPulhq4,"Coffee, Water & Snacks",70.47
11,9LzjfJNhM8t8,"Hamilton Beach Refrigerator, Silver",O8xD8rO2X816,Office Supplies,hwpWTtn7Tl5o,Appliances,LoI4lmPulhq4,"Coffee, Water & Snacks",333.92
13,PXkHzXOTvTmo,"Binney & Smith Pencil Sharpener, Easy-Erase",O8xD8rO2X816,Office Supplies,iqGEIzIl1stw,Art,sEghhe5Z1x5Z,Drawing Ink,27.93


In [54]:
result.to_csv('../resource/data/fake_product_data.csv', index=False)