## <span style='color:navy'> Prepping </span>

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('../data/perfumes_df_ready.csv')

In [3]:
def ready_df(df):
    """
    Specific to the 'perfumes_df_ready.csv'. 
    Cleans up the 'gender' variable.
    Adds dummies of 'designer', 'group', 'gender' to the dataframe, and drops original ones.
    Adds 'perfume_id' instead of 'perfume_name' to make all numeric dataframe
    """
    # cleaning up the gender column in data
    gender = []
    for k in df['perfume_name']:
        gender.append(k.split('for')[1].lower().strip())


    for n, k in enumerate(gender):
        if ('her' in k[:4]) or ('women' in k[:]) and ('women and men' not in k):
            gender[n]='women'
        elif ('him' in k[:4]) or ('men' in k[:4]):
            gender[n] = 'men'
        elif ('women and men' in k):
            gender[n] = 'women and men'
        else:
            gender[n] = 'unknown'
            
    
    # replace the old with the new
    df['gender'] = gender
    
    # get dummies 
    dummies = pd.get_dummies(df[['designer', 'group', 'gender']])
    
    # add them to the dataframe
    new_df = pd.concat([df, dummies], axis = 1, sort = False)
    
    # add a quick perfume_id instead of perfume_name
    new_df['perfume_id'] = df.index 
    new_df.drop(['all_notes','top_notes', 'middle_notes','base_notes', 'synopsis', 'main_accords', 'perfume_name',
                'designer', 'group', 'gender'], axis = 1, inplace = True)
    
    return new_df

In [4]:
data.shape

(4478, 105)

In [5]:
data = ready_df(data)

In [6]:
pd.set_option('max_columns', 110)

In [7]:
data.head()

Unnamed: 0,overall_rating,total_num_voters,longvity_poor,longvity_weak,longvity_moderate,longvity_long_lasting,longvity_very_long_lasting,sillage_soft,sillage_moderate,sillage_heavy,sillage_enormous,have_it,had_it,want_it,my_signature,love_it,like_it,dislike_it,spring,summer,fall,day,night,salty,smoky,Unnamed: 35,coconut,white floral,fresh spicy,white wine,vanilla,warm spicy,whiskey,ginger,conifer,caramel,woody,vinyl,tropical,musk,fruity,coca cola,powdery,floral,aromatic,milky,yellow floral,nutty,cacao,aquatic,camphor,red fruits,sweet,oud,almond,...,designer_Lacoste Fragrances,designer_Lancome,designer_Marc Jacobs,designer_Narciso Rodriguez,designer_Nina Ricci,designer_Paco Rabanne,designer_Prada,designer_Ralph Lauren,designer_Salvador Dali,designer_Serge Lutens,designer_Tom Ford,designer_Valentino,designer_Van Cleef & Arpels,designer_Versace,designer_Viktor&Rolf,designer_Yves Rocher,designer_Yves Saint Laurent,group_Aromatic,group_Aromatic Aquatic,group_Aromatic Fougere,group_Aromatic Fruity,group_Aromatic Green,group_Aromatic Spicy,group_Chypre,group_Chypre Floral,group_Chypre Fruity,group_Citrus,group_Citrus Aromatic,group_Citrus Gourmand,group_Floral,group_Floral Aldehyde,group_Floral Aquatic,group_Floral Fruity,group_Floral Fruity Gourmand,group_Floral Green,group_Floral Woody Musk,group_Leather,group_Oriental,group_Oriental Floral,group_Oriental Fougere,group_Oriental Spicy,group_Oriental Vanilla,group_Oriental Woody,group_Woody,group_Woody Aquatic,group_Woody Aromatic,group_Woody Chypre,group_Woody Floral Musk,group_Woody Spicy,group_not found,gender_men,gender_unknown,gender_women,gender_women and men,perfume_id
0,4.11,410.0,14,25,35,14,12,47,34,20,31,279,40,272,3,100,74,26,3,20,54,5,56,0.0,0.0,0.0,80.0,119.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,3.94,2311.0,33,39,173,402,97,87,285,366,111,2021,638,1252,64,100,70,37,55,41,12,58,41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,96.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
2,2.65,123.0,2,7,12,9,6,11,26,8,9,50,12,80,-1,44,45,100,22,35,13,28,44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,87.0,0.0,87.0,0.0,65.0,0.0,0.0,0.0,0.0,0.0,0.0,130.0,...,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2
3,1.67,9.0,5,1,0,0,2,3,1,0,3,-1,-1,11,-1,20,1,100,40,40,40,80,40,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,3
4,4.44,47.0,1,0,0,2,0,2,2,4,5,21,6,189,1,100,50,10,25,15,5,25,40,0.0,0.0,0.0,0.0,130.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,66.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,4


In [8]:
# making sure we don't have any nulls in the main data frame
nulls = data.isnull().sum().to_frame()
nulls.loc[nulls[0] != 0, :]

Unnamed: 0,0


### <span style='color:navy'> Turning the prediction into a classification problem </span>
<span style='color:navy'> By making overall ratings into bins of 0.5 increments. And this is how, 

   for each 0.5 step, take the ceiling. e.g. 1.3 is 1.5; for 3.6 is 4 and so on
steps 
    
1. check if it falls between the number {1, 2, 3, 4} (with $>$) and the {same number + 0.5} (with $\leq$)
   if True $\Rightarrow$ it's the number + 0.5
2. if it falls between the {number + .5} (with $>$) and {number +1} (with $\leq$) $\Rightarrow$ it's the number +1
3. otherwise, it must be that it's a flat review score, keep it. 
note that int(y) is the integer part of the number, regardless of the value of the decimal 
 </span>

In [9]:
def classify_target(df):
    
    # make sure format is unified
    ratings_continuous = df['overall_rating'].map(lambda x: round(float(x), 2))
    
    # make target into bins of 0.5 width
    new_y = []
    for y in ratings_continuous:
        if (y > int(y)) & (y <= int(y) + .5):
            new_y.append(int(y) + .5)

        elif (y > int(y) + .5) & (y < int(y) + 1):
            new_y.append(int(y) + 1)

        elif (y == int(y)):
            new_y.append(y)
            
    # add the new ratings to the data frame; and remove old ones
    df['ratings_classes'] = new_y
    df.drop('overall_rating', axis = 1, inplace = True)
    
    return df

In [10]:
data = classify_target(data)

In [11]:
# how many classes we have now
data['ratings_classes'].value_counts().shape

(10,)

In [12]:
# size/percentages of each class
data['ratings_classes'].value_counts(normalize = True).map(lambda x: round(x, 2))

 4.0    0.43
 4.5    0.36
 3.5    0.11
 5.0    0.06
 3.0    0.02
-1.0    0.01
 2.5    0.01
 1.0    0.00
 2.0    0.00
 1.5    0.00
Name: ratings_classes, dtype: float64

In [13]:
data['ratings_classes'].value_counts()

 4.0    1916
 4.5    1633
 3.5     496
 5.0     268
 3.0      91
-1.0      34
 2.5      26
 1.0       9
 2.0       4
 1.5       1
Name: ratings_classes, dtype: int64

In [14]:
import warnings
warnings.filterwarnings('ignore')

In [15]:
## Making the testing and training data frames

# the data frame of perfumes not having overall rating, that I want to predict
test_df = data.loc[data['ratings_classes'] == -1, :]
test_df.drop(['ratings_classes'], axis = 1, inplace = True)

# The training data frame
df = data.loc[data['ratings_classes']!=-1, :]

print(df.shape, test_df.shape)

(4444, 179) (34, 178)


<span style='color:navy'> Things to keep in mind, 
- while analyzing the data in perfume_chars_analysis.ipynb, we found that the accuracy of the lasso model improved with adding polynomial features to the dataframe. in addition to selecting best 200 features.
- we have an imbalanced multi-class classification problem now. our aim next is to find a model that beats the baseline accuracy found in the cell above. </span>

Note: In order to run classfication models, we can't have the target type as float, it will be considered continuous and produce errors. we need to encode it explicitly into categories. <br />
to keep track of what's what, I am going to do it by hand. I am gonna use a dictionary for that, and map it to the target.

In [75]:
# to change it by hand, I need a dictionary, 
# they keys of the dictionary are the values:
print('sorted list ', sorted(list(data['ratings_classes'].value_counts().index)))

# and the values are numbers between 1 to 9 inclusive. these are gonna be the new classes (categories)

sorted list  [-1.0, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]


In [17]:
def categorize_classes(df):
    """
    Run classify_target() before this one, or it won't work.
    """
    
    # make the dictionary to map the old values to the new categories
    classes_dict = {key: value for key, value in zip(sorted(list(df['ratings_classes'].value_counts().index)), 
                                             range(1,10))}
    
    # replace
    df['labels'] = df['ratings_classes'].map(classes_dict)
    
    return df

In [18]:
# and categorize the target in training df
df = categorize_classes(df)

In [19]:
print(data.shape, df.shape, test_df.shape)
# now df contains a 'labels' column and a 'ratings_classes' column. data contains only the latter. 

(4478, 179) (4444, 180) (34, 178)


***
## <span style='color:navy'> Modeling </span>

In [37]:
from sklearn.model_selection import train_test_split

X = df.drop(['ratings_classes', 'labels'], axis = 1)
y = df['labels']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 62019)

In [38]:
print(X_train.shape, X_test.shape)

(3555, 178) (889, 178)


In [39]:
# standarizing the data frames
def standarize_datasets(X_train = X_train, X_test = X_test):
    
    from sklearn.preprocessing import StandardScaler

    ss = StandardScaler()

    X_train = ss.fit_transform(X_train)
    X_test = ss.transform(X_test)
    
    return X_train, X_test

In [40]:
X_train = standarize_datasets()[0]
X_test = standarize_datasets()[1]

In [41]:
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.svm import SVC

In [42]:
def run_model(model, X_train = X_train, X_test = X_test):
    
    from sklearn.preprocessing import MinMaxScaler
    
    if model != MultinomialNB:
            
        model = model()
        model.fit(X_train, y_train)

        score = cross_val_score(model, X_test, y_test, cv = 5).mean()
    
    else:
        mmax = MinMaxScaler() # MinMaxScaler can't have negative values in training dataset
        X_tr_min = mmax.fit_transform(X_train)
        X_ts_min = mmax.transform(X_test)
        
        model = MultinomialNB()
        model.fit(X_tr_min, y_train)
        
        score = cross_val_score(model, X_ts_min, y_test, cv = 5).mean()
        
    
    return round(score, 2)    

In [43]:
# testing several models at once, in their default settings.
models_list = [RandomForestClassifier, AdaBoostClassifier, KNeighborsClassifier, 
              GaussianNB, MultinomialNB, SVC]

In [44]:
for m in models_list:
    print(m)
    print(run_model(m))
    print('------')

<class 'sklearn.ensemble.forest.RandomForestClassifier'>
0.71
------
<class 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'>
0.61
------
<class 'sklearn.neighbors.classification.KNeighborsClassifier'>
0.49
------
<class 'sklearn.naive_bayes.GaussianNB'>
0.25
------
<class 'sklearn.naive_bayes.MultinomialNB'>
0.53
------
<class 'sklearn.svm.classes.SVC'>
0.61
------


<span style='color:navy'> Brining in the big hammer </span>

In [45]:
from xgboost import XGBClassifier
from xgboost import XGBRFClassifier

In [46]:
xg1 = XGBClassifier()
xg2 = XGBRFClassifier()

In [47]:
xg1.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1,
       max_delta_step=0, max_depth=3, min_child_weight=1, missing=None,
       n_estimators=100, n_jobs=1, nthread=None,
       objective='multi:softprob', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=None, silent=None,
       subsample=1, verbosity=1)

In [48]:
xg1.score(X_test, y_test)

0.9291338582677166

In [49]:
xg2.fit(X_train, y_train)

XGBRFClassifier(base_score=0.5, colsample_bylevel=1, colsample_bynode=0.8,
        colsample_bytree=1, gamma=0, learning_rate=1, max_delta_step=0,
        max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
        n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,
        reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
        silent=None, subsample=0.8, verbosity=1)

In [50]:
xg2.score(X_test, y_test)

0.8695163104611924

<span style='color:navy'> To make sure this is the best model we can get. I'll do GridSearch over it. <br /> 
    By the way the GridSearching takes an hour to finish. It was not worth it, we got 0.0079 increase in accuracy </span>

In [55]:
# Grid searching over the XGBClassifier() 
from sklearn.model_selection import GridSearchCV

xg1_params = {
    'max_depth' : [3, 7, 15],
    'learning_rate' : [.1, .5, .9],
    'n_estimators' : [100, 300],
    'gamma' : [0, .5]
}

gs = GridSearchCV(xg1, xg1_params, cv = 5)

gs.fit(X_train, y_train)

gs.score(X_test, y_test)

0.937007874015748

In [58]:
# more accuracy we get is
0.937007874015748 - 0.9291338582677166

0.007874015748031482

In [59]:
gs.best_params_

{'gamma': 0.5, 'learning_rate': 0.1, 'max_depth': 15, 'n_estimators': 300}

<span style='color:navy'> Recall that defaults of the `XGBClassifier` are `{'gamma': 0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators' = 100}` </span>

In [72]:
type(X_train)

numpy.ndarray

In [69]:
gs.predict(test_df.values)

array([7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 7, 8, 8, 7, 7, 7, 8, 8, 7, 7, 7,
       8, 8, 8, 7, 7, 7, 8, 7, 7, 7, 8, 8])

In [70]:
test_df['labels'] = gs.predict(test_df.values)

In [71]:
test_df.head(2)

Unnamed: 0,total_num_voters,longvity_poor,longvity_weak,longvity_moderate,longvity_long_lasting,longvity_very_long_lasting,sillage_soft,sillage_moderate,sillage_heavy,sillage_enormous,have_it,had_it,want_it,my_signature,love_it,like_it,dislike_it,spring,summer,fall,day,night,salty,smoky,Unnamed: 35,coconut,white floral,fresh spicy,white wine,vanilla,warm spicy,whiskey,ginger,conifer,caramel,woody,vinyl,tropical,musk,fruity,coca cola,powdery,floral,aromatic,milky,yellow floral,nutty,cacao,aquatic,camphor,red fruits,sweet,oud,almond,animalic,...,designer_Lancome,designer_Marc Jacobs,designer_Narciso Rodriguez,designer_Nina Ricci,designer_Paco Rabanne,designer_Prada,designer_Ralph Lauren,designer_Salvador Dali,designer_Serge Lutens,designer_Tom Ford,designer_Valentino,designer_Van Cleef & Arpels,designer_Versace,designer_Viktor&Rolf,designer_Yves Rocher,designer_Yves Saint Laurent,group_Aromatic,group_Aromatic Aquatic,group_Aromatic Fougere,group_Aromatic Fruity,group_Aromatic Green,group_Aromatic Spicy,group_Chypre,group_Chypre Floral,group_Chypre Fruity,group_Citrus,group_Citrus Aromatic,group_Citrus Gourmand,group_Floral,group_Floral Aldehyde,group_Floral Aquatic,group_Floral Fruity,group_Floral Fruity Gourmand,group_Floral Green,group_Floral Woody Musk,group_Leather,group_Oriental,group_Oriental Floral,group_Oriental Fougere,group_Oriental Spicy,group_Oriental Vanilla,group_Oriental Woody,group_Woody,group_Woody Aquatic,group_Woody Aromatic,group_Woody Chypre,group_Woody Floral Musk,group_Woody Spicy,group_not found,gender_men,gender_unknown,gender_women,gender_women and men,perfume_id,labels
307,-1.0,0,0,0,0,0,1,0,0,2,-1,-1,11,-1,1,1,1,1,100,100,100,100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.0,0.0,0.0,0.0,126.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,84.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,307,7
460,-1.0,0,0,0,0,0,1,0,1,0,-1,-1,24,-1,1,1,1,1,1,1,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.0,0.0,0.0,0.0,127.0,0.0,130.0,117.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,460,7


<span style='color:navy'> Reversing labels back to floats of reviews. </span>

In [83]:
{key: value for key, value in zip(sorted(list(df['ratings_classes'].value_counts().index)), 
                                             range(1,10))}

{1.0: 1, 1.5: 2, 2.0: 3, 2.5: 4, 3.0: 5, 3.5: 6, 4.0: 7, 4.5: 8, 5.0: 9}

In [84]:
# Earlier, we had this dictionary to connect the two 
{key: value for key, value in zip(sorted(list(df['ratings_classes'].value_counts().index)), 
                                             range(1,10))}

# for easier reading
for s in {key: value for key, value in zip(sorted(list(df['ratings_classes'].value_counts().index)), 
                                             range(1,10))}.items():
    print(s)

(1.0, 1)
(1.5, 2)
(2.0, 3)
(2.5, 4)
(3.0, 5)
(3.5, 6)
(4.0, 7)
(4.5, 8)
(5.0, 9)


In [94]:
# reversing the keys and values to assign it back to our predictions
temp_values = {key: value for key, value in zip(sorted(list(df['ratings_classes'].value_counts().index)), 
                                             range(1,10))}.keys()

temp_keys = {key: value for key, value in zip(sorted(list(df['ratings_classes'].value_counts().index)), 
                                             range(1,10))}.values()

In [95]:
reverse_dict = {key : value for key, value in zip(temp_keys, temp_values)}

In [97]:
for s in reverse_dict.items():
    print(s)

(1, 1.0)
(2, 1.5)
(3, 2.0)
(4, 2.5)
(5, 3.0)
(6, 3.5)
(7, 4.0)
(8, 4.5)
(9, 5.0)


In [98]:
test_df['predicted_ratings'] = test_df['labels'].map(reverse_dict)

In [101]:
test_df['predicted_ratings']

307     4.0
460     4.0
730     4.0
805     4.0
1083    4.0
1104    4.0
1119    4.0
1263    4.0
1338    4.0
1384    4.5
1501    4.0
1511    4.0
1614    4.5
1684    4.5
1695    4.0
2141    4.0
2571    4.0
2659    4.5
2708    4.5
2803    4.0
2815    4.0
2925    4.0
3095    4.5
3138    4.5
3197    4.5
3325    4.0
3370    4.0
3467    4.0
3500    4.5
3598    4.0
3653    4.0
3671    4.0
3777    4.5
3878    4.5
Name: predicted_ratings, dtype: float64

In [111]:
df['ratings_classes'].value_counts()

4.0    1916
4.5    1633
3.5     496
5.0     268
3.0      91
2.5      26
1.0       9
2.0       4
1.5       1
Name: ratings_classes, dtype: int64

<span style='color:navy'> It makes sense that we had all 4.0 or 4.5 because these are the most popular category of ratings. It is trusted well enough though, because we have 0.937 accuracy model.  </span>

In [114]:
# reminder of base line accuracy
df['ratings_classes'].value_counts(normalize = True).round(2)

4.0    0.43
4.5    0.37
3.5    0.11
5.0    0.06
3.0    0.02
2.5    0.01
1.0    0.00
2.0    0.00
1.5    0.00
Name: ratings_classes, dtype: float64

In [118]:
test_df.head()

Unnamed: 0,total_num_voters,longvity_poor,longvity_weak,longvity_moderate,longvity_long_lasting,longvity_very_long_lasting,sillage_soft,sillage_moderate,sillage_heavy,sillage_enormous,have_it,had_it,want_it,my_signature,love_it,like_it,dislike_it,spring,summer,fall,day,night,salty,smoky,Unnamed: 35,coconut,white floral,fresh spicy,white wine,vanilla,warm spicy,whiskey,ginger,conifer,caramel,woody,vinyl,tropical,musk,fruity,coca cola,powdery,floral,aromatic,milky,yellow floral,nutty,cacao,aquatic,camphor,red fruits,sweet,oud,almond,animalic,...,designer_Marc Jacobs,designer_Narciso Rodriguez,designer_Nina Ricci,designer_Paco Rabanne,designer_Prada,designer_Ralph Lauren,designer_Salvador Dali,designer_Serge Lutens,designer_Tom Ford,designer_Valentino,designer_Van Cleef & Arpels,designer_Versace,designer_Viktor&Rolf,designer_Yves Rocher,designer_Yves Saint Laurent,group_Aromatic,group_Aromatic Aquatic,group_Aromatic Fougere,group_Aromatic Fruity,group_Aromatic Green,group_Aromatic Spicy,group_Chypre,group_Chypre Floral,group_Chypre Fruity,group_Citrus,group_Citrus Aromatic,group_Citrus Gourmand,group_Floral,group_Floral Aldehyde,group_Floral Aquatic,group_Floral Fruity,group_Floral Fruity Gourmand,group_Floral Green,group_Floral Woody Musk,group_Leather,group_Oriental,group_Oriental Floral,group_Oriental Fougere,group_Oriental Spicy,group_Oriental Vanilla,group_Oriental Woody,group_Woody,group_Woody Aquatic,group_Woody Aromatic,group_Woody Chypre,group_Woody Floral Musk,group_Woody Spicy,group_not found,gender_men,gender_unknown,gender_women,gender_women and men,perfume_id,labels,predicted_ratings
307,-1.0,0,0,0,0,0,1,0,0,2,-1,-1,11,-1,1,1,1,1,100,100,100,100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.0,0.0,0.0,0.0,126.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,84.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,307,7,4.0
460,-1.0,0,0,0,0,0,1,0,1,0,-1,-1,24,-1,1,1,1,1,1,1,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,95.0,0.0,0.0,0.0,127.0,0.0,130.0,117.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,460,7,4.0
730,-1.0,0,1,0,0,0,1,0,0,1,-1,-1,34,-1,1,1,1,50,100,1,100,100,0.0,0.0,0.0,0.0,104.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,111.0,0.0,0.0,0.0,108.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,730,7,4.0
805,-1.0,0,0,0,0,0,0,0,0,0,-1,-1,4,-1,1,1,1,100,1,1,100,1,0.0,0.0,0.0,0.0,0.0,38.0,0.0,0.0,50.0,0.0,0.0,0.0,0.0,130.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,63.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,805,7,4.0
1083,-1.0,0,0,0,0,0,0,0,2,1,-1,-1,7,-1,1,1,1,1,1,1,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,65.0,0.0,0.0,0.0,0.0,0.0,130.0,0.0,0.0,0.0,0.0,0.0,94.0,0.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1083,7,4.0
