In [99]:
# Dependencies
import pandas as pd

In [100]:
# Load csv file
perfume_dataset = pd.read_csv("perfume.csv")

In [101]:
# Let us look at the dataset
perfume_dataset.head(4)

Unnamed: 0,brand,title,date,accords,rating_score,votes,longevity_poor,longevity_weak,longevity_moderate,longevity_long,...,notes_12,notes_13,notes_14,notes_15,notes_16,notes_17,notes_18,notes_19,notes_20,gender
0,The-Spirit-of-Dubai,Aamal The Spirit of Dubai for women and men,2017,"woody,earthy,animalic,amber,musky,balsamic",5.0,3,0,0,0,0,...,Base3Moss,Base4Agarwood (Oud),Base5Indian Oud,,,,,,,women
1,Ajmal,Aatifa Ajmal for women and men,2014,"fresh spicy,woody,musky,rose,amber",4.2,10,1,0,0,0,...,,,,,,,,,,women
2,Al-Jazeera-Perfumes,AA Al-Jazeera Perfumes for women and men,0,"rose,woody,musky,oud,fruity",0.0,0,0,0,0,0,...,,,,,,,,,,women
3,Art-of-Scent-Swiss-Perfumes,aarewasser Art of Scent - Swiss Perfumes for w...,2010,"white floral,green,ozonic,fresh,animalic",0.0,1,0,0,0,0,...,,,,,,,,,,women


In [102]:
# We want to only keep the rows that are of interest in a smaller dataframe. In this case they are
# brand, title, date, accords, rating_score, votes, clswinter, clsspring, clssummer, clsautumn, clsday, clsnight, and gender
frag_reduced = perfume_dataset[["brand","title", "date", "accords", "rating_score", "votes", "clswinter",
                                "clsspring", "clssummer", "clsautumn", "clsday", "clsnight", "gender"]]
frag_reduced.head()

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
0,The-Spirit-of-Dubai,Aamal The Spirit of Dubai for women and men,2017,"woody,earthy,animalic,amber,musky,balsamic",5.0,3,50,50,50,50,50,50,women
1,Ajmal,Aatifa Ajmal for women and men,2014,"fresh spicy,woody,musky,rose,amber",4.2,10,100,60,60,60,80,80,women
2,Al-Jazeera-Perfumes,AA Al-Jazeera Perfumes for women and men,0,"rose,woody,musky,oud,fruity",0.0,0,1,1,1,1,1,1,women
3,Art-of-Scent-Swiss-Perfumes,aarewasser Art of Scent - Swiss Perfumes for w...,2010,"white floral,green,ozonic,fresh,animalic",0.0,1,1,100,100,100,100,100,women
4,Hamidi-Oud-Perfumes,Aaliyah Hamidi Oud & Perfumes for women and men,0,"woody,warm spicy,amber,balsamic,musky",0.0,2,100,100,100,100,100,100,women


In [103]:
# First we would like to see how many fragrances there are for each gender listed
fragrance_count = frag_reduced["gender"].value_counts()
fragrance_count

women     39761
man         850
unisex      122
Name: gender, dtype: int64

In [104]:
# let us create a dataframe for these three gender counts
women_count = fragrance_count["women"]
man_count = fragrance_count["man"]
unisex = fragrance_count["unisex"]
Fragrance_Gender_Counts = pd.DataFrame({"Total Women Fragrances": [women_count],
              "Total Men Fragrances": [man_count],
             "Total Unisex Fragrances": [unisex]
             })
Fragrance_Gender_Counts

Unnamed: 0,Total Women Fragrances,Total Men Fragrances,Total Unisex Fragrances
0,39761,850,122


In [105]:
### we will export this as a csv
Fragrance_Gender_Counts.to_csv("Fragrance_Gender_Counts.csv", encoding = "utf-8")

In [106]:
# Now, we will do score analysis, and see what are the averages given for each category
total_rating_avg = frag_reduced["rating_score"].mean()
total_votes_avg = frag_reduced["votes"].mean()
total_winter_avg = frag_reduced["clswinter"].mean()
total_spring_avg = frag_reduced["clsspring"].mean()
total_summer_avg = frag_reduced["clssummer"].mean()
total_fall_avg = frag_reduced["clsautumn"].mean()
total_day_avg = frag_reduced["clsday"].mean()
total_night_avg = frag_reduced["clsnight"].mean()

In [107]:
# we will put the average results in a dataframe
Ratings_Averages = pd.DataFrame({"Total Votes Average": [total_votes_avg],
              "Total Rating Average (0-5)": [total_rating_avg],
              "Total Rating Winter Average (0-100)": [total_winter_avg],
              "Total Rating Spring Average (0-100)": [total_spring_avg],
              "Total Rating Summer Average (0-100)": [total_summer_avg],\
              "Total Rating Fall Average (0-100)": [total_fall_avg],
              "Total Rating Day Average (0-100)": [total_day_avg],
              "Total Rating Night Average (0-100)": [total_night_avg]
}).round(2)
Ratings_Averages

Unnamed: 0,Total Votes Average,Total Rating Average (0-5),Total Rating Winter Average (0-100),Total Rating Spring Average (0-100),Total Rating Summer Average (0-100),Total Rating Fall Average (0-100),Total Rating Day Average (0-100),Total Rating Night Average (0-100)
0,86.44,3.19,25.36,35.09,27.63,33.19,47.21,31.7


In [108]:
### we will export this as a csv
Ratings_Averages.to_csv("Fragrance_Ratings_Averages.csv", encoding = "utf-8")

In [109]:
# First we want to see the score distribution overall for fragrances from 0 to 5
# Establish the bins 0 to 1, 1 to 2, 2 to 3, 3 to 4, 4 to 5
score_bins_5 = [0, 1, 2, 3, 4, 5]
group_names_5 = ["< 1", "1 to 2", "2 to 3", "3 to 4", "4 to 5"]

In [110]:
# Now we will apply the segmentation to the distribution
Total_Rating_Distribution = pd.cut(frag_reduced["rating_score"], score_bins_5, labels=group_names_5).value_counts()
Total_Rating_Distribution_DF = pd.DataFrame({"Overall Fragrances With Rating": Total_Rating_Distribution}).sort_index()
Total_Rating_Distribution_DF

Unnamed: 0,Overall Fragrances With Rating
< 1,784
1 to 2,250
2 to 3,2796
3 to 4,20477
4 to 5,17598


In [111]:
### we will export this as a csv
Total_Rating_Distribution_DF.to_csv("Total_Rating_Distribution.csv", encoding = "utf-8", index = True, header = True)

In [112]:
# Now we want to see the score distribution of fragrances by spring, summer, fall, day, and night
# Establish the bins 0 to 10, 10 to 20, 20 to 30, 30 to 40, 40 to 50, 50 to 60, 60 to 70, 70 to 80, 80 to 90, 90 to 100
score_bins = [0,10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
group_names = ["< 10", "10 to 20", "20 to 30", "30 to 40", "40 to 50", "50 to 60", "60 to 70", "70 to 80", "80 to 90","90 to 100"]

In [113]:
# Now we will apply the segmentation to the distribution for spring
Total__Spring_Distribution = pd.cut(frag_reduced["clsspring"], score_bins, labels=group_names).value_counts()
Total_Spring_Distribution_DF = pd.DataFrame({"Spring Fragrances With Rating":Total__Spring_Distribution}).sort_index()
Total_Spring_Distribution_DF

Unnamed: 0,Spring Fragrances With Rating
< 10,16878
10 to 20,4262
20 to 30,4590
30 to 40,5541
40 to 50,6527
50 to 60,2786
60 to 70,2634
70 to 80,1547
80 to 90,631
90 to 100,5816


In [114]:
# Now we will apply the segmentation to the distribution for summer
Total_Distribution_Summer = pd.cut(frag_reduced["clssummer"], score_bins, labels=group_names).value_counts()
Total_Summer_Distribution_DF = pd.DataFrame({"Summer Fragrances With Rating":Total_Distribution_Summer}).sort_index()
Total_Summer_Distribution_DF

Unnamed: 0,Summer Fragrances With Rating
< 10,22456
10 to 20,5255
20 to 30,4171
30 to 40,4400
40 to 50,4936
50 to 60,1939
60 to 70,2148
70 to 80,1371
80 to 90,661
90 to 100,3875


In [115]:
# Now we will apply the segmentation to the distribution for fall
Total_Distribution_Fall = pd.cut(frag_reduced["clsautumn"], score_bins, labels=group_names).value_counts()
Total_Fall_Distribution_DF = pd.DataFrame({"Fall Fragrances With Rating":Total_Distribution_Fall}).sort_index()
Total_Fall_Distribution_DF

Unnamed: 0,Fall Fragrances With Rating
< 10,17676
10 to 20,5121
20 to 30,4780
30 to 40,5689
40 to 50,6262
50 to 60,2190
60 to 70,2044
70 to 80,1177
80 to 90,432
90 to 100,5841


In [116]:
# Now we will apply the segmentation to the distribution for winter
Total_Distribution_Winter = pd.cut(frag_reduced["clswinter"], score_bins, labels=group_names).value_counts()
Total_Winter_Distribution_DF = pd.DataFrame({"Winter Fragrances With Rating":Total_Distribution_Winter}).sort_index()
Total_Winter_Distribution_DF

Unnamed: 0,Winter Fragrances With Rating
< 10,24553
10 to 20,5260
20 to 30,4161
30 to 40,4330
40 to 50,4501
50 to 60,1596
60 to 70,1445
70 to 80,814
80 to 90,303
90 to 100,4249


In [117]:
# Now we will apply the segmentation to the distribution for Day
Total_Distribution_Day = pd.cut(frag_reduced["clsday"], score_bins, labels=group_names).value_counts()
Total_Day_Distribution_DF = pd.DataFrame({"Day Fragrances With Rating":Total_Distribution_Day}).sort_index()
Total_Day_Distribution_DF

Unnamed: 0,Day Fragrances With Rating
< 10,13105
10 to 20,1682
20 to 30,2937
30 to 40,4408
40 to 50,6475
50 to 60,3907
60 to 70,4359
70 to 80,3086
80 to 90,1675
90 to 100,9578


In [118]:
# Now we will apply the segmentation to the distribution for Night
Total_Distribution_Night = pd.cut(frag_reduced["clsday"], score_bins, labels=group_names).value_counts()
Total_Night_Distribution_DF = pd.DataFrame({"Night Fragrances With Rating":Total_Distribution_Night}).sort_index()
Total_Night_Distribution_DF

Unnamed: 0,Night Fragrances With Rating
< 10,13105
10 to 20,1682
20 to 30,2937
30 to 40,4408
40 to 50,6475
50 to 60,3907
60 to 70,4359
70 to 80,3086
80 to 90,1675
90 to 100,9578


In [119]:
# Now we will merge all the dataframes for the rating distributions into one with inner join by index
Merge_Day_Night = pd.merge(Total_Night_Distribution_DF, Total_Day_Distribution_DF, left_index=True, right_index=True)
Merge_Winter_Fall = pd.merge(Total_Winter_Distribution_DF, Total_Fall_Distribution_DF, left_index=True, right_index=True)
Merge_Summer_spring = pd.merge(Total_Summer_Distribution_DF, Total_Spring_Distribution_DF, left_index=True, right_index=True)
Merge_Day_Night_Winter_Fall = pd.merge(Merge_Day_Night, Merge_Winter_Fall, left_index=True, right_index=True)
All_Ratings_Merged = pd.merge(Merge_Day_Night_Winter_Fall, Merge_Summer_spring, left_index=True, right_index=True)

In [120]:
All_Ratings_Merged

Unnamed: 0,Night Fragrances With Rating,Day Fragrances With Rating,Winter Fragrances With Rating,Fall Fragrances With Rating,Summer Fragrances With Rating,Spring Fragrances With Rating
< 10,13105,13105,24553,17676,22456,16878
10 to 20,1682,1682,5260,5121,5255,4262
20 to 30,2937,2937,4161,4780,4171,4590
30 to 40,4408,4408,4330,5689,4400,5541
40 to 50,6475,6475,4501,6262,4936,6527
50 to 60,3907,3907,1596,2190,1939,2786
60 to 70,4359,4359,1445,2044,2148,2634
70 to 80,3086,3086,814,1177,1371,1547
80 to 90,1675,1675,303,432,661,631
90 to 100,9578,9578,4249,5841,3875,5816


In [121]:
### we will export this as a csv
All_Ratings_Merged.to_csv("All_Ratings_Merged.csv", encoding = "utf-8", index = True, header = True)

In [122]:
# Now, we will take a look at the top fragrances with the best score for each segment
# Let us look at the top fragrances rated overall based on score and votes
top_all = frag_reduced.sort_values(by=['rating_score', 'votes'], ascending=False)
top_all

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
16187,Guerlain,Shalimar Extrait Guerlain for women,1925,"floral,citrus,aromatic,powdery,vanilla,white f...",5.0,18,20,6,6,20,13,20,women
26361,Chanel,Coromandel Parfum Chanel for women and men,2019,"balsamic,citrus,amber,warm spicy,patchouli",5.0,15,37,37,37,50,50,50,
14201,Sultan-Pasha-Attars,Equilibre Sultan Pasha Attars for women and men,2018,"citrus,white floral,amber,powdery,woody",5.0,14,25,33,33,25,25,25,women
27247,Royal-Crown,Oud Jasmine Royal Crown for women and men,2016,"white floral,animalic,amber,warm spicy,woody,t...",5.0,14,25,41,25,25,33,25,women
5893,Fragrance-Du-Bois,Oud Intense Fragrance Du Bois for women and men,2017,"oud,fresh spicy,balsamic,woody",5.0,13,20,10,10,30,10,30,women
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,"citrus,vanilla,rose,white floral,amber",0.0,0,1,1,1,1,1,1,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,"green,floral,woody",0.0,0,1,1,1,1,1,1,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,"vanilla,powdery,sweet,soft spicy,woody",0.0,0,1,1,1,1,1,1,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,"fruity,sweet,balsamic,rose,patchouli,amber",0.0,0,1,1,1,1,1,1,women


In [123]:
# we will keep only the brand, title, date, and gender in a simplified dataframe
top_simple = top_all[["brand", "title", "date", "gender"]]
top_simple

Unnamed: 0,brand,title,date,gender
16187,Guerlain,Shalimar Extrait Guerlain for women,1925,women
26361,Chanel,Coromandel Parfum Chanel for women and men,2019,
14201,Sultan-Pasha-Attars,Equilibre Sultan Pasha Attars for women and men,2018,women
27247,Royal-Crown,Oud Jasmine Royal Crown for women and men,2016,women
5893,Fragrance-Du-Bois,Oud Intense Fragrance Du Bois for women and men,2017,women
...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,women


In [124]:
### we will export this as a csv
top_simple.to_csv("Top_all.csv", encoding = "utf-8", index = True, header = True)

In [125]:
# Let us look at the top 5 fragrances for winter based on score and votes
top_winter = frag_reduced.sort_values(by=['clswinter', 'votes'], ascending=False)
top_winter

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
15116,Loree-Rodkin,Gothic II Loree Rodkin for women and men,2013,"warm spicy,balsamic,vanilla,patchouli,woody,wh...",4.25,90,100,88,77,91,85,94,women
13105,Armaf,Derby Club House Ascot Armaf for men,0,"amber,woody,tobacco,warm spicy,musky",4.14,66,100,64,10,92,67,89,
10113,Game-of-Thrones,Fire and Blood Game of Thrones for women,2012,"balsamic,warm spicy,powdery,smoky,leather,ambe...",4.44,62,100,5,5,13,10,45,women
1428,Abercrombie-Fitch,Perfume No.1 Abercrombie & Fitch for women,2011,"woody,citrus,balsamic,aromatic,powdery,floral,...",3.72,56,100,35,25,75,65,95,women
16102,24,24 Elixir Gold 24 for women and men,2015,"woody,honey,sweet,coconut,warm spicy,vanilla,P...",3.60,54,100,10,10,70,20,90,women
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,"citrus,vanilla,rose,white floral,amber",0.00,0,1,1,1,1,1,1,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,"green,floral,woody",0.00,0,1,1,1,1,1,1,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,"vanilla,powdery,sweet,soft spicy,woody",0.00,0,1,1,1,1,1,1,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,"fruity,sweet,balsamic,rose,patchouli,amber",0.00,0,1,1,1,1,1,1,women


In [126]:
# we will keep only the brand, title, date, and gender in a simplified dataframe
top_winter_simple = top_winter[["brand", "title", "date", "gender"]]
top_winter_simple

Unnamed: 0,brand,title,date,gender
15116,Loree-Rodkin,Gothic II Loree Rodkin for women and men,2013,women
13105,Armaf,Derby Club House Ascot Armaf for men,0,
10113,Game-of-Thrones,Fire and Blood Game of Thrones for women,2012,women
1428,Abercrombie-Fitch,Perfume No.1 Abercrombie & Fitch for women,2011,women
16102,24,24 Elixir Gold 24 for women and men,2015,women
...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,women


In [127]:
### we will export this as a csv
top_winter_simple.to_csv("Top_winter.csv", encoding = "utf-8", index = True, header = True)

In [128]:
# Let us look at the top 5 fragrances for spring based on score and votes
top_spring = frag_reduced.sort_values(by=['clsspring', 'votes'], ascending=False)
top_spring

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
37149,Ferrari,Vetiver Essence Ferrari for men,2014,"warm spicy,woody,aromatic,earthy,green,Pictures",3.81,144,50,100,35,85,100,50,
44314,Jean-Paul-Gaultier,Le Male In The Navy Jean Paul Gaultier for men,2018,"aromatic,marine,vanilla,green,amber,aquatic,Pi...",3.31,136,6,100,97,47,89,52,
36031,English-Laundry,English Laundry Signature for him English Laun...,2010,"aromatic,woody,fresh spicy,citrus,musky,floral...",4.13,131,31,100,57,85,92,61,
45148,Oriflame,Innocent White Lilac Oriflame for women,2016,"floral,green,fresh,soft spicy,woody,Pictures",4.03,107,4,100,48,24,91,4,women
42456,Al-Haramain-Perfumes,Mystique Homme Al Haramain Perfumes for men,0,"white floral,aromatic,citrus,fresh spicy,sweet...",3.76,104,61,100,28,84,84,33,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,"citrus,vanilla,rose,white floral,amber",0.00,0,1,1,1,1,1,1,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,"green,floral,woody",0.00,0,1,1,1,1,1,1,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,"vanilla,powdery,sweet,soft spicy,woody",0.00,0,1,1,1,1,1,1,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,"fruity,sweet,balsamic,rose,patchouli,amber",0.00,0,1,1,1,1,1,1,women


In [129]:
# we will keep only the brand, title, date, and gender in a simplified dataframe
top_spring_simple = top_spring[["brand", "title", "date", "gender"]]
top_spring_simple

Unnamed: 0,brand,title,date,gender
37149,Ferrari,Vetiver Essence Ferrari for men,2014,
44314,Jean-Paul-Gaultier,Le Male In The Navy Jean Paul Gaultier for men,2018,
36031,English-Laundry,English Laundry Signature for him English Laun...,2010,
45148,Oriflame,Innocent White Lilac Oriflame for women,2016,women
42456,Al-Haramain-Perfumes,Mystique Homme Al Haramain Perfumes for men,0,
...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,women


In [130]:
### we will export this as a csv
top_spring_simple.to_csv("Top_spring.csv", encoding = "utf-8", index = True, header = True)

In [131]:
# Let us look at the top 5 fragrances for summer based on score and votes
top_summer = frag_reduced.sort_values(by=['clssummer', 'votes'], ascending=False)
top_summer

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
26110,Bond-No-9,Coney Island Bond No 9 for women and men,2007,"citrus,aromatic,fruity,woody,aquatic,sweet,Pic...",3.35,525,4,40,100,5,98,19,women
11698,Paco-Rabanne,Invictus Aqua (2018) Paco Rabanne for men,2018,"marine,woody,powdery,citrus,aromatic,ozonic,Vi...",3.66,448,9,75,100,20,93,39,
984,Escada,Sorbetto Rosso Escada for women,2018,"aquatic,ozonic,fruity,sweet,fresh,Pictures",3.61,291,6,40,100,11,95,13,women
25378,Calvin-Klein,CK One Summer 2011 Calvin Klein for women and men,2011,"citrus,aromatic,marine,fruity,fresh,Pictures",4.01,269,1,25,100,1,83,5,women
39138,Moschino,Moschino Forever Sailing Moschino for men,2013,"aromatic,fresh spicy,citrus,green,woody,Pictures",3.88,252,9,66,100,27,99,28,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,"citrus,vanilla,rose,white floral,amber",0.00,0,1,1,1,1,1,1,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,"green,floral,woody",0.00,0,1,1,1,1,1,1,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,"vanilla,powdery,sweet,soft spicy,woody",0.00,0,1,1,1,1,1,1,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,"fruity,sweet,balsamic,rose,patchouli,amber",0.00,0,1,1,1,1,1,1,women


In [132]:
# we will keep only the brand, title, date, and gender in a simplified dataframe
top_summer_simple = top_summer[["brand", "title", "date", "gender"]]
top_summer_simple

Unnamed: 0,brand,title,date,gender
26110,Bond-No-9,Coney Island Bond No 9 for women and men,2007,women
11698,Paco-Rabanne,Invictus Aqua (2018) Paco Rabanne for men,2018,
984,Escada,Sorbetto Rosso Escada for women,2018,women
25378,Calvin-Klein,CK One Summer 2011 Calvin Klein for women and men,2011,women
39138,Moschino,Moschino Forever Sailing Moschino for men,2013,
...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,women


In [133]:
### we will export this as a csv
top_summer_simple.to_csv("Top_summer.csv", encoding = "utf-8", index = True, header = True)

In [134]:
# Let us look at the top 5 fragrances for autumn based on score and votes
top_autumn = frag_reduced.sort_values(by=['clsautumn', 'votes'], ascending=False)
top_autumn

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
41376,Al-Rehab,Golden Al-Rehab for women and men,0,"amber,floral,vanilla,caramel,sweet",3.44,114,96,18,18,100,57,93,women
36861,L-Erbolario,Indaco L'Erbolario for women and men,2017,"warm spicy,aromatic,balsamic,cacao,woody,sweet...",3.29,97,83,16,1,100,44,69,women
6977,Jaguar,Excellence Jaguar for men,2012,"powdery,citrus,sweet,vanilla,amber,floral,Pict...",3.91,96,82,60,42,100,68,88,
4799,Parfums-de-Marly,Akaster Parfums de Marly for women and men,2015,"aromatic,rose,fresh spicy,oud,woody",3.44,74,88,44,40,100,76,96,women
25482,Pinaud-Clubman,Clubman Special Reserve Pinaud Clubman for men,0,"leather,balsamic,animalic,floral,green",4.29,72,89,50,21,100,81,68,man
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,"citrus,vanilla,rose,white floral,amber",0.00,0,1,1,1,1,1,1,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,"green,floral,woody",0.00,0,1,1,1,1,1,1,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,"vanilla,powdery,sweet,soft spicy,woody",0.00,0,1,1,1,1,1,1,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,"fruity,sweet,balsamic,rose,patchouli,amber",0.00,0,1,1,1,1,1,1,women


In [135]:
# we will keep only the brand, title, date, and gender in a simplified dataframe
top_autumn_simple = top_autumn[["brand", "title", "date", "gender"]]
top_autumn_simple

Unnamed: 0,brand,title,date,gender
41376,Al-Rehab,Golden Al-Rehab for women and men,0,women
36861,L-Erbolario,Indaco L'Erbolario for women and men,2017,women
6977,Jaguar,Excellence Jaguar for men,2012,
4799,Parfums-de-Marly,Akaster Parfums de Marly for women and men,2015,women
25482,Pinaud-Clubman,Clubman Special Reserve Pinaud Clubman for men,0,man
...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,women


In [136]:
### we will export this as a csv
top_autumn_simple.to_csv("Top_autumn.csv", encoding = "utf-8", index = True, header = True)

In [137]:
# Let us look at the top 5 fragrances for day based on score and votes
top_day = frag_reduced.sort_values(by=['clsday', 'votes'], ascending=False)
top_day

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
37530,Escada,Rockin' Rio Escada for women,2005,"sweet,tropical,fruity,citrus,coconut,Pictures",3.87,684,8,19,97,6,100,22,women
7295,Jean-Paul-Gaultier,Le Beau Male Jean Paul Gaultier for men,2013,"aromatic,fresh spicy,green,musky,herbal,bitter...",3.18,484,14,75,94,21,100,24,
18103,Est-e-Lauder,Beyond Paradise For Men Estée Lauder for men,2004,"white floral,citrus,floral,fresh spicy,woody",3.61,383,12,72,70,17,100,30,
46321,Jesus-Del-Pozo,Quasar Jesus Del Pozo for men,1994,"aromatic,fresh spicy,fruity,woody,green,Pictures",3.99,328,16,79,80,24,100,26,
2025,Salvatore-Ferragamo,Acqua Essenziale Colonia Salvatore Ferragamo f...,2013,"aromatic,citrus,fresh spicy,aquatic,amber,Pict...",4.15,325,4,90,93,31,100,23,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,"citrus,vanilla,rose,white floral,amber",0.00,0,1,1,1,1,1,1,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,"green,floral,woody",0.00,0,1,1,1,1,1,1,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,"vanilla,powdery,sweet,soft spicy,woody",0.00,0,1,1,1,1,1,1,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,"fruity,sweet,balsamic,rose,patchouli,amber",0.00,0,1,1,1,1,1,1,women


In [138]:
# we will keep only the brand, title, date, and gender in a simplified dataframe
top_day_simple = top_day[["brand", "title", "date", "gender"]]
top_day_simple

Unnamed: 0,brand,title,date,gender
37530,Escada,Rockin' Rio Escada for women,2005,women
7295,Jean-Paul-Gaultier,Le Beau Male Jean Paul Gaultier for men,2013,
18103,Est-e-Lauder,Beyond Paradise For Men Estée Lauder for men,2004,
46321,Jesus-Del-Pozo,Quasar Jesus Del Pozo for men,1994,
2025,Salvatore-Ferragamo,Acqua Essenziale Colonia Salvatore Ferragamo f...,2013,
...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,women


In [139]:
### we will export this as a csv
top_day_simple.to_csv("Top_day.csv", encoding = "utf-8", index = True, header = True)

In [140]:
# Let us look at the top 5 fragrances for night based on score and votes
top_night = frag_reduced.sort_values(by=['clsnight', 'votes'], ascending=False)
top_night

Unnamed: 0,brand,title,date,accords,rating_score,votes,clswinter,clsspring,clssummer,clsautumn,clsday,clsnight,gender
25337,Joop-,Joop! Homme Joop! for men,1989,"warm spicy,vanilla,white floral,cinnamon,balsa...",3.31,3783,88,20,14,51,34,100,
15948,Oriflame,Mirage Oriflame for women,2010,"balsamic,woody,aromatic,citrus,amber,powdery,P...",3.47,506,82,18,14,71,40,100,women
8936,Oriflame,Amethyst Fatale Oriflame for women,2007,"balsamic,woody,powdery,amber,warm spicy,rose,P...",3.65,225,80,20,12,53,40,100,women
17409,Kimora-Lee-Simmons,Baby Phat Seductive Goddess Kimora Lee Simmons...,2008,"patchouli,vanilla,powdery,leather,balsamic",3.54,177,64,8,11,56,26,100,women
15322,Avon,Treselle Mystique Avon for women,2011,"honey,white floral,fruity,sweet,rose",3.73,128,75,17,5,92,43,100,women
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,"citrus,vanilla,rose,white floral,amber",0.00,0,1,1,1,1,1,1,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,"green,floral,woody",0.00,0,1,1,1,1,1,1,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,"vanilla,powdery,sweet,soft spicy,woody",0.00,0,1,1,1,1,1,1,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,"fruity,sweet,balsamic,rose,patchouli,amber",0.00,0,1,1,1,1,1,1,women


In [141]:
# we will keep only the brand, title, date, and gender in a simplified dataframe
top_night_simple = top_night[["brand", "title", "date", "gender"]]
top_night_simple

Unnamed: 0,brand,title,date,gender
25337,Joop-,Joop! Homme Joop! for men,1989,
15948,Oriflame,Mirage Oriflame for women,2010,women
8936,Oriflame,Amethyst Fatale Oriflame for women,2007,women
17409,Kimora-Lee-Simmons,Baby Phat Seductive Goddess Kimora Lee Simmons...,2008,women
15322,Avon,Treselle Mystique Avon for women,2011,women
...,...,...,...,...
51188,Prince-Obolenski,Prince Obolenski Prince Obolenski for women,1960,women
51189,Novaya-Zarya,Znakomiy Obraz Novaya Zarya for women,1970,women
51191,Natural-Scents,Vanilla Obsession Natural Scents for women,2015,women
51205,Swiss-Arabian,Wajd Swiss Arabian for women and men,0,women


In [142]:
### we will export this as a csv
top_night_simple.to_csv("Top_night.csv", encoding = "utf-8", index = True, header = True)