## Data Loading

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
# Load the dataset while handling errors
product_data = pd.read_csv('datasets/styles.csv', on_bad_lines='skip')
image_data = pd.read_csv('datasets/images.csv')

## Data Info

In [3]:
# Display basic information about the dataset
display(product_data.head(),product_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44424 entries, 0 to 44423
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   id                  44424 non-null  int64  
 1   gender              44424 non-null  object 
 2   masterCategory      44424 non-null  object 
 3   subCategory         44424 non-null  object 
 4   articleType         44424 non-null  object 
 5   baseColour          44409 non-null  object 
 6   season              44403 non-null  object 
 7   year                44423 non-null  float64
 8   usage               44107 non-null  object 
 9   productDisplayName  44417 non-null  object 
dtypes: float64(1), int64(1), object(8)
memory usage: 3.4+ MB


Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt


None

In [4]:
display(image_data.head(),image_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44446 entries, 0 to 44445
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  44446 non-null  object
 1   link      44446 non-null  object
dtypes: object(2)
memory usage: 694.6+ KB


Unnamed: 0,filename,link
0,15970.jpg,http://assets.myntassets.com/v1/images/style/p...
1,39386.jpg,http://assets.myntassets.com/v1/images/style/p...
2,59263.jpg,http://assets.myntassets.com/v1/images/style/p...
3,21379.jpg,http://assets.myntassets.com/v1/images/style/p...
4,53759.jpg,http://assets.myntassets.com/v1/images/style/p...


None

## Merge two tables

In [5]:

image_data['id'] = image_data['filename'].str.replace('.jpg', '')

image_data['id'] = image_data['id'].astype(int)

data = pd.merge(product_data, image_data[['id', 'link']], on='id', how='left')

In [6]:
display(data.head(),data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44424 entries, 0 to 44423
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   id                  44424 non-null  int64  
 1   gender              44424 non-null  object 
 2   masterCategory      44424 non-null  object 
 3   subCategory         44424 non-null  object 
 4   articleType         44424 non-null  object 
 5   baseColour          44409 non-null  object 
 6   season              44403 non-null  object 
 7   year                44423 non-null  float64
 8   usage               44107 non-null  object 
 9   productDisplayName  44417 non-null  object 
 10  link                44424 non-null  object 
dtypes: float64(1), int64(1), object(9)
memory usage: 3.7+ MB


Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt,http://assets.myntassets.com/v1/images/style/p...
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans,http://assets.myntassets.com/v1/images/style/p...
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch,http://assets.myntassets.com/v1/images/style/p...
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants,http://assets.myntassets.com/v1/images/style/p...
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt,http://assets.myntassets.com/v1/images/style/p...


None

## Is There Any Duplicate Links?

In [7]:
# Check for duplicate link values
duplicate_links = data[data.duplicated(subset='link', keep=False)]

# Display the count of duplicate links
duplicate_links_count = duplicate_links['link'].value_counts()


display(duplicate_links)

# Display the duplicate links and their counts
print("Duplicate Links and Their Counts:")
print(duplicate_links_count)

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
6695,39403,Men,Apparel,Topwear,Shirts,Black,Summer,2014.0,Casual,U.S. Polo Assn. Men Black Tailored Fit Casual ...,undefined
12730,5359,Men,Footwear,Shoes,Sports Shoes,White,Summer,2011.0,Sports,Kipsta F300 Tf Sr Ah09,http://assets.myntassets.com/v1/images/style/p...
13313,5357,Men,Footwear,Shoes,Sports Shoes,Red,Fall,2011.0,Sports,Kipsta F300 Sala Shoes,http://assets.myntassets.com/v1/images/style/p...
15657,5356,Men,Footwear,Shoes,Sports Shoes,Red,Fall,2011.0,Sports,Kalenji Kapteren Aw Red 2010,http://assets.myntassets.com/v1/images/style/p...
16194,39410,Men,Apparel,Topwear,Shirts,Cream,Summer,2014.0,Casual,U.S. Polo Assn. Men Cream-Coloured Tailored Fi...,undefined
16670,5358,Men,Footwear,Shoes,Sports Shoes,White,Summer,2011.0,Sports,Kipsta Calcetto 5 Sr White,http://assets.myntassets.com/v1/images/style/p...
32309,39401,Men,Apparel,Bottomwear,Jeans,Blue,Winter,2016.0,Casual,U.S. Polo Assn. Denim Co. Men Blue Slim Straig...,undefined
36381,39425,Men,Apparel,Topwear,Tshirts,Red,Spring,2013.0,Casual,U.S. Polo Assn. Men Red Polo T-Shirt,undefined
40000,12347,Men,Apparel,Topwear,Suits,Red,Winter,2010.0,Casual,Fastrack Men Red Manhattan Regular Fit Solid F...,undefined


Duplicate Links and Their Counts:
link
undefined                                                                                              5
http://assets.myntassets.com/v1/images/style/properties/588ca3614eb1f2b165346029efb0ae80_images.jpg    2
http://assets.myntassets.com/v1/images/style/properties/f064e90e3d026a23298593e2607d93cf_images.jpg    2
Name: count, dtype: int64


In [8]:
# Remove row where link is undefined
# Remove Kipsta F300 Sala Shoes because it's picture is inappropiate with product's itself
data = data[(data['link'] != 'undefined')]

data = data[(data['id'] != 5357)]

In [9]:
# They might be the same product, that's why I keep them.
data[data.duplicated(subset='link', keep=False)]

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
12730,5359,Men,Footwear,Shoes,Sports Shoes,White,Summer,2011.0,Sports,Kipsta F300 Tf Sr Ah09,http://assets.myntassets.com/v1/images/style/p...
16670,5358,Men,Footwear,Shoes,Sports Shoes,White,Summer,2011.0,Sports,Kipsta Calcetto 5 Sr White,http://assets.myntassets.com/v1/images/style/p...


## Missing Values

In [10]:
# Missing Values Analysis
missing_values = data.isnull().sum().reset_index()
missing_values.columns = ['Column', 'Missing Values']
missing_values['Percentage'] = (missing_values['Missing Values'] / len(data)) * 100
missing_values['Percentage'] = missing_values['Percentage'].apply(lambda x: f"{x:.3f}%")
display(missing_values)

Unnamed: 0,Column,Missing Values,Percentage
0,id,0,0.000%
1,gender,0,0.000%
2,masterCategory,0,0.000%
3,subCategory,0,0.000%
4,articleType,0,0.000%
5,baseColour,15,0.034%
6,season,21,0.047%
7,year,1,0.002%
8,usage,317,0.714%
9,productDisplayName,7,0.016%


In [11]:
data[data['baseColour'].isnull()]

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
6206,11293,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Texas Jeans,http://assets.myntassets.com/v1/images/style/p...
9516,41347,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Black Texas Jeans,http://assets.myntassets.com/v1/images/style/p...
12763,11288,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Texas Jeans,http://assets.myntassets.com/v1/images/style/p...
14291,30991,Women,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,,http://assets.myntassets.com/assets/images/309...
15526,57698,Women,Personal Care,Skin,Face Moisturisers,,Spring,2017.0,Casual,Lotus Herbals Quincenourish Quince Seed Nouris...,http://assets.myntassets.com/assets/images/576...
15708,41355,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Millard Jeans,http://assets.myntassets.com/v1/images/style/p...
17268,43122,Men,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS Man Eau De Toilette 75 ml,http://assets.myntassets.com/assets/images/431...
17639,45687,Men,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,,http://assets.myntassets.com/assets/images/456...
20209,11309,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Floyd Jeans,http://assets.myntassets.com/v1/images/style/p...
23484,45698,Women,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,,http://assets.myntassets.com/assets/images/456...


### Dropped Missing productDisplayName

In [12]:
data = data.dropna(subset=['productDisplayName'])

In [13]:
# Missing Values Analysis
missing_values = data.isnull().sum().reset_index()
missing_values.columns = ['Column', 'Missing Values']
missing_values['Percentage'] = (missing_values['Missing Values'] / len(data)) * 100
missing_values['Percentage'] = missing_values['Percentage'].apply(lambda x: f"{x:.3f}%")
display(missing_values)

Unnamed: 0,Column,Missing Values,Percentage
0,id,0,0.000%
1,gender,0,0.000%
2,masterCategory,0,0.000%
3,subCategory,0,0.000%
4,articleType,0,0.000%
5,baseColour,10,0.023%
6,season,21,0.047%
7,year,1,0.002%
8,usage,312,0.703%
9,productDisplayName,0,0.000%


### Filling Missing baseColour

In [14]:
data[data['baseColour'].isnull()]

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
6206,11293,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Texas Jeans,http://assets.myntassets.com/v1/images/style/p...
9516,41347,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Black Texas Jeans,http://assets.myntassets.com/v1/images/style/p...
12763,11288,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Texas Jeans,http://assets.myntassets.com/v1/images/style/p...
15526,57698,Women,Personal Care,Skin,Face Moisturisers,,Spring,2017.0,Casual,Lotus Herbals Quincenourish Quince Seed Nouris...,http://assets.myntassets.com/assets/images/576...
15708,41355,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Millard Jeans,http://assets.myntassets.com/v1/images/style/p...
17268,43122,Men,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS Man Eau De Toilette 75 ml,http://assets.myntassets.com/assets/images/431...
20209,11309,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Floyd Jeans,http://assets.myntassets.com/v1/images/style/p...
34553,41357,Men,Apparel,Bottomwear,Jeans,,Fall,2016.0,Casual,Wrangler Men Blue Millard Jeans,http://assets.myntassets.com/v1/images/style/p...
43260,43116,Women,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS Seductive Women EDT Spray,http://assets.myntassets.com/assets/images/431...
44224,43120,Men,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS by Marciano Men Eau De Toilette 50 ml,http://assets.myntassets.com/assets/images/431...


In [15]:
id_list = [11293, 41347, 11288, 41355, 11309, 41357]
colour_list = ['Blue', 'Black', 'Blue', 'Blue', 'Blue', 'Blue']

fill_values = dict(zip(id_list, colour_list))

for id_value, colour_value in fill_values.items():
    data.loc[data['id'] == id_value, 'baseColour'] = colour_value

data[data['baseColour'].isnull()]

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
15526,57698,Women,Personal Care,Skin,Face Moisturisers,,Spring,2017.0,Casual,Lotus Herbals Quincenourish Quince Seed Nouris...,http://assets.myntassets.com/assets/images/576...
17268,43122,Men,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS Man Eau De Toilette 75 ml,http://assets.myntassets.com/assets/images/431...
43260,43116,Women,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS Seductive Women EDT Spray,http://assets.myntassets.com/assets/images/431...
44224,43120,Men,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS by Marciano Men Eau De Toilette 50 ml,http://assets.myntassets.com/assets/images/431...


Perfume shouldn't have a baseColour that's why I will left them as NaN

### Filling Missing Season and Year

In [16]:
data['season'].value_counts(dropna=False)

season
Summer    21468
Fall      11430
Winter     8515
Spring     2977
NaN          21
Name: count, dtype: int64

In [17]:
data[data['season'].isnull()]

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
282,5402,Women,Footwear,Shoes,Sports Shoes,Purple,,2011.0,Sports,Kalenji Ekiden 200 Wn Purple 2011,http://assets.myntassets.com/v1/images/style/p...
5997,5389,Unisex,Footwear,Shoes,Sports Shoes,Yellow,,2011.0,Sports,Kalenji Kapteren 200 Yellow Fw,http://assets.myntassets.com/v1/images/style/p...
6510,5380,Women,Footwear,Shoes,Sports Shoes,Purple,,2011.0,Sports,Quechua Forclaz 100 Lady Purple,http://assets.myntassets.com/v1/images/style/p...
12658,5392,Unisex,Footwear,Shoes,Sports Shoes,White,,2011.0,Sports,Kalenji Crossport 100 Whi/champ,http://assets.myntassets.com/v1/images/style/p...
12994,5361,Men,Footwear,Shoes,Sports Shoes,Black,,2011.0,Sports,Kipsta F300 Fg Sr Ah09,http://assets.myntassets.com/v1/images/style/p...
13523,5368,Men,Footwear,Shoes,Sports Shoes,Red,,2011.0,Sports,Kalenji Kiprun 1000 White Red Ss10,http://assets.myntassets.com/v1/images/style/p...
16042,5360,Women,Footwear,Shoes,Sports Shoes,Grey,,2011.0,Sports,Kalenji Kapteren 100 Blue/grey,http://assets.myntassets.com/v1/images/style/p...
21272,53781,Men,Apparel,Topwear,Tshirts,Blue,,,Sports,Puma Men Blue Sless Round Neck T-shirt,http://assets.myntassets.com/assets/images/537...
23835,5406,Women,Footwear,Shoes,Sports Shoes,Blue,,2011.0,Sports,Kalenji Ekiden Blue Scratch 2011,http://assets.myntassets.com/v1/images/style/p...
24198,5401,Women,Footwear,Shoes,Sports Shoes,Blue,,2011.0,Sports,Kalenji Ekiden 100 Lady Blue 2011,http://assets.myntassets.com/v1/images/style/p...


In [18]:
# Puma Men Blue Sless Round Neck T-shirt seems like Summer T-Shirt
data.loc[data['id'] == 53781,'season'] = 'Summer'

In [19]:
# When I checked the product links, I decided to categorize them as "Spring"
data['season'] = data['season'].fillna('spring')

In [20]:
# There is only one missing year, I am going to replace it with mean value
data['year'] = data['year'].fillna(data['year'].mean())

### What about Usage?

In [21]:
# Missing Values Analysis
missing_values = data.isnull().sum().reset_index()
missing_values.columns = ['Column', 'Missing Values']
missing_values['Percentage'] = (missing_values['Missing Values'] / len(data)) * 100
missing_values['Percentage'] = missing_values['Percentage'].apply(lambda x: f"{x:.3f}%")
display(missing_values)

Unnamed: 0,Column,Missing Values,Percentage
0,id,0,0.000%
1,gender,0,0.000%
2,masterCategory,0,0.000%
3,subCategory,0,0.000%
4,articleType,0,0.000%
5,baseColour,4,0.009%
6,season,0,0.000%
7,year,0,0.000%
8,usage,312,0.703%
9,productDisplayName,0,0.000%


In [22]:
data['usage'].value_counts(dropna=False)

usage
Casual          34399
Sports           4024
Ethnic           3208
Formal           2345
NaN               312
Smart Casual       67
Party              29
Travel             26
Home                1
Name: count, dtype: int64

In [23]:
data[data['usage'].isnull()]

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
87,56489,Women,Personal Care,Nails,Nail Polish,Bronze,Spring,2017.0,,Streetwear Ash Nail Polish # 31,http://assets.myntassets.com/v1/images/style/p...
92,52029,Unisex,Apparel,Topwear,Rain Jacket,Coffee Brown,Summer,2012.0,,Just Natural Unisex Charcoal Rain Jacket,http://assets.myntassets.com/v1/images/style/p...
292,55001,Women,Personal Care,Lips,Lipstick,Pink,Spring,2017.0,,Lakme Absolute Lip Last Day Kiss Lip Colour,http://assets.myntassets.com/assets/images/550...
479,57563,Women,Personal Care,Lips,Lipstick,Brown,Spring,2017.0,,Lotus Herbals Pure Colours Nutty Brown Lipstic...,http://assets.myntassets.com/assets/images/575...
511,55006,Women,Personal Care,Lips,Lip Gloss,Copper,Spring,2017.0,,Lakme Sheer Satin Lip Gloss 51,http://assets.myntassets.com/v1/images/style/p...
...,...,...,...,...,...,...,...,...,...,...,...
43633,56605,Women,Personal Care,Makeup,Kajal and Eyeliner,Black,Spring,2017.0,,Streetwear Black Eye Liner 01,http://assets.myntassets.com/v1/images/style/p...
44079,57715,Women,Personal Care,Lips,Lip Gloss,Red,Spring,2017.0,,Lotus Herbals Seduction Sappy Watermelon Lip G...,http://assets.myntassets.com/v1/images/style/p...
44224,43120,Men,Personal Care,Fragrance,Perfume and Body Mist,,Spring,2017.0,,GUESS by Marciano Men Eau De Toilette 50 ml,http://assets.myntassets.com/assets/images/431...
44227,55045,Women,Personal Care,Lips,Lipstick,Purple,Spring,2017.0,,Lakme Enrich Satins Lipstick 461,http://assets.myntassets.com/v1/images/style/p...


In [24]:
data[data['usage'].isnull()]['subCategory'].value_counts()

subCategory
Lips                        102
Nails                        51
Makeup                       44
Skin Care                    28
Shoe Accessories             20
Skin                         15
Topwear                      14
Eyes                          9
Wallets                       8
Loungewear and Nightwear      6
Fragrance                     5
Bath and Body                 3
Sandal                        2
Innerwear                     2
Bottomwear                    2
Beauty Accessories            1
Name: count, dtype: int64

In [25]:
# I decide to fill NaN rows in 'usage' column with 'subCategory'

data.loc[data['usage'].isnull(), 'usage'] = data['subCategory']

### Checking Missing Values for the Last Time

In [26]:
# Missing Values Analysis
missing_values = data.isnull().sum().reset_index()
missing_values.columns = ['Column', 'Missing Values']
missing_values['Percentage'] = (missing_values['Missing Values'] / len(data)) * 100
missing_values['Percentage'] = missing_values['Percentage'].apply(lambda x: f"{x:.3f}%")
display(missing_values)

Unnamed: 0,Column,Missing Values,Percentage
0,id,0,0.000%
1,gender,0,0.000%
2,masterCategory,0,0.000%
3,subCategory,0,0.000%
4,articleType,0,0.000%
5,baseColour,4,0.009%
6,season,0,0.000%
7,year,0,0.000%
8,usage,0,0.000%
9,productDisplayName,0,0.000%


## Adjusting Data Types 

In [27]:
display(data.head(),data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 44411 entries, 0 to 44423
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   id                  44411 non-null  int64  
 1   gender              44411 non-null  object 
 2   masterCategory      44411 non-null  object 
 3   subCategory         44411 non-null  object 
 4   articleType         44411 non-null  object 
 5   baseColour          44407 non-null  object 
 6   season              44411 non-null  object 
 7   year                44411 non-null  float64
 8   usage               44411 non-null  object 
 9   productDisplayName  44411 non-null  object 
 10  link                44411 non-null  object 
dtypes: float64(1), int64(1), object(9)
memory usage: 5.1+ MB


Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt,http://assets.myntassets.com/v1/images/style/p...
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans,http://assets.myntassets.com/v1/images/style/p...
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch,http://assets.myntassets.com/v1/images/style/p...
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants,http://assets.myntassets.com/v1/images/style/p...
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt,http://assets.myntassets.com/v1/images/style/p...


None

In [29]:
data['gender'] = data['gender'].astype('category')
data['masterCategory'] = data['masterCategory'].astype('category')
data['subCategory'] = data['subCategory'].astype('category')
data['articleType'] = data['articleType'].astype('category')
data['baseColour'] = data['baseColour'].astype('category')
data['season'] = data['season'].astype('category')
data['year'] = data['year'].astype('int64')
data['usage'] = data['usage'].astype('category')


data.dtypes

id                       int64
gender                category
masterCategory        category
subCategory           category
articleType           category
baseColour            category
season                category
year                     int64
usage                 category
productDisplayName      object
link                    object
dtype: object

In [30]:
data.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName,link
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011,Casual,Turtle Check Men Navy Blue Shirt,http://assets.myntassets.com/v1/images/style/p...
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012,Casual,Peter England Men Party Blue Jeans,http://assets.myntassets.com/v1/images/style/p...
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016,Casual,Titan Women Silver Watch,http://assets.myntassets.com/v1/images/style/p...
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011,Casual,Manchester United Men Solid Black Track Pants,http://assets.myntassets.com/v1/images/style/p...
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012,Casual,Puma Men Grey T-shirt,http://assets.myntassets.com/v1/images/style/p...
