In [134]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import re

In [135]:
df = pd.read_csv('ground_truth_season_color.csv', delimiter=';')  

In [136]:
#Function to calculate the nearest season classification 

df['Vector'] = df.apply(lambda row: np.array([row['R'], row['G'], row['B']]), axis=1)

def classify_season(rgb_point):
    distances = []
    
    # Convert point to vector 
    rgb_vector = np.array(rgb_point)
    
    #Iterate throung each line to calculate distance 
    for index, row in df.iterrows():
        # Calculate distance between point that will be classify 
        distance = np.linalg.norm(rgb_vector - row['Vector'])
        distances.append((row['season'], distance, row['R'], row['G'], row['B']))
    
    # rank by closest distance
    distances.sort(key=lambda x: x[1])
    
    # Take 2 nearest season
    closest_seasons = distances[:2]
    total_distance = sum(dist[1] for dist in closest_seasons)
    
    result = []
    for season, distance, r, g, b in closest_seasons:
        percentage = (1 - (distance / total_distance)) * 100
        result.append((season, percentage, r, g, b))
    
    return result


def hex_to_rgb(hex_string):
    # Return None for NaN values or empty strings
    if pd.isna(hex_string) or hex_string == '':
        return None
    # Remove '#' if present and convert hex string to RGB tuple
    hex_string = hex_string.lstrip('#')
    return tuple(int(hex_string[i:i+2], 16) for i in (0, 2, 4))

def determine_type(row):
    title = row['Product Title'].lower()
    url = row['Product URL'].lower()

    lip = ["lip", "matte", "vinyl"]
    foundation_cussion = ["foundation", "cussion"]
    powder = ["powder"]
    cheek = ["blush", "cheek"]
    eye = ["eye"]

    if any(keyword in title or keyword in url for keyword in lip):
        return "lip"
    elif any(keyword in title or keyword in url for keyword in foundation_cussion):
        return "foundation & cussion"
    elif any(keyword in title or keyword in url for keyword in powder) and not any(keyword in title or keyword in url for keyword in foundation_cussion):
        return "powder"
    elif any(keyword in title or keyword in url for keyword in cheek):
        return "cheek"
    elif any(keyword in title or keyword in url for keyword in eye):
        return "eye"
    else:
        return "face"

# Wardah 

In [137]:
path_wardah = "C:\Database\Capstone\get_data\wardah_all_products_colors.csv"
df_wardah = pd.read_csv(path_wardah)

# Convert df wardah hexcode to RGB 
df_wardah['Color RGB'] = df_wardah['Color HEX'].fillna('').apply(hex_to_rgb)

#Apply function classify_season to 'Color RGB'
df_wardah_filtered = df_wardah.dropna(subset=['Color RGB'])
classified_data = df_wardah_filtered['Color RGB'].apply(classify_season)

# Convert and concat to Dataframe 
classified_df = pd.DataFrame(classified_data.tolist(), columns=['Season 1', 'Season 2'])
df_wardah = pd.concat([df_wardah, classified_df], axis=1)

# Buat DataFrame sementara dari kolom 'Season 1'
temp_df1 = df_wardah['Season 1'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))
temp_df2 = df_wardah['Season 2'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))

# Ubah tipe data kolom menjadi yang sesuai
temp_df1[[1, 2, 3]] = temp_df1[[1, 2, 3]].astype(float)
temp_df2[[1, 2, 3]] = temp_df2[[1, 2, 3]].astype(float)

# Buat kolom 'S1 Closest Color' dengan menggabungkan nilai 'S1 R', 'S1 G', dan 'S1 B'
temp_df1['S1 Closest Color'] = temp_df1.apply(lambda row: tuple(row[2:]), axis=1)
temp_df2['S2 Closest Color'] = temp_df2.apply(lambda row: tuple(row[2:]), axis=1)

# Beri nama kolom
temp_df1.columns = ['Season 1 Name', 'Season 1 Percent', 'S1 R', 'S1 G', 'S1 B', 'S1 Closest Color']
temp_df2.columns = ['Season 2 Name', 'Season 2 Percent', 'S2 R', 'S2 G', 'S2 B', 'S2 Closest Color']

# Gabungkan DataFrame sementara dengan DataFrame utama
df_wardah = pd.concat([df_wardah, temp_df1[['Season 1 Name', 'Season 1 Percent', 'S1 Closest Color']], temp_df2[['Season 2 Name', 'Season 2 Percent', 'S2 Closest Color']]], axis=1)

# Drop kolom 'Season 1' dan 'Season 2' yang sudah tidak dibutuhkan lagi
df_wardah.drop(columns=['Season 1', 'Season 2'], inplace=True)

df_wardah['Type'] = df_wardah.apply(determine_type, axis=1)

In [138]:
df_wardah["Type"].value_counts()

Type
lip                     158
foundation & cussion     63
face                     38
powder                   18
cheek                     5
Name: count, dtype: int64

In [139]:
# Rearrange columns
df_wardah['Product Title'] = df_wardah['Product Title'].str.replace(' | Wardah Indonesia', '', regex=False)
df_wardah['Brand'] = "wardah"
new_column_order = ['Product Title', 'Brand', 'Type','Variant Name', 'Color HEX', 'Color RGB', 'Season 1 Name',	'Season 1 Percent','S1 Closest Color','Season 2 Name','Season 2 Percent','S2 Closest Color','Product URL']
df_wardah = df_wardah.reindex(columns=new_column_order)


In [140]:
df_wardah.to_csv('wardah.csv', index=False)

# Somethinc 

In [141]:
# Baca DataFrame dari file CSV df_somethinc
path_somethinc = "C:/Database/Capstone/get_data/somethinc_all_products_colors.csv"
df_somethinc = pd.read_csv(path_somethinc)

df_somethinc.rename(columns={ 'Product Name':'Product Title'}, inplace=True)

# Convert df_somethinc hexcode to RGB 
df_somethinc['Color RGB'] = df_somethinc['Color HEX'].fillna('').apply(hex_to_rgb)

# Apply function classify_season to 'Color RGB'
df_somethinc_filtered = df_somethinc.dropna(subset=['Color RGB'])
classified_data = df_somethinc_filtered['Color RGB'].apply(classify_season)

# Convert and concat to DataFrame 
classified_df = pd.DataFrame(classified_data.tolist(), columns=['Season 1', 'Season 2'])
df_somethinc = pd.concat([df_somethinc, classified_df], axis=1)

# Buat DataFrame sementara dari kolom 'Season 1'
temp_df1 = df_somethinc['Season 1'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))
temp_df2 = df_somethinc['Season 2'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))

# Ubah tipe data kolom menjadi yang sesuai
temp_df1[[1, 2, 3]] = temp_df1[[1, 2, 3]].astype(float)
temp_df2[[1, 2, 3]] = temp_df2[[1, 2, 3]].astype(float)

# Buat kolom 'S1 Closest Color' dengan menggabungkan nilai 'S1 R', 'S1 G', dan 'S1 B'
temp_df1['S1 Closest Color'] = temp_df1.apply(lambda row: tuple(row[2:]), axis=1)
temp_df2['S2 Closest Color'] = temp_df2.apply(lambda row: tuple(row[2:]), axis=1)

# Beri nama kolom
temp_df1.columns = ['Season 1 Name', 'Season 1 Percent', 'S1 R', 'S1 G', 'S1 B', 'S1 Closest Color']
temp_df2.columns = ['Season 2 Name', 'Season 2 Percent', 'S2 R', 'S2 G', 'S2 B', 'S2 Closest Color']

# Gabungkan DataFrame sementara dengan DataFrame utama
df_somethinc = pd.concat([df_somethinc, temp_df1[['Season 1 Name', 'Season 1 Percent', 'S1 Closest Color']], temp_df2[['Season 2 Name', 'Season 2 Percent', 'S2 Closest Color']]], axis=1)

# Drop kolom 'Season 1' dan 'Season 2' yang sudah tidak dibutuhkan lagi
df_somethinc.drop(columns=['Season 1', 'Season 2'], inplace=True)

df_somethinc['Type'] = df_somethinc.apply(determine_type, axis=1)


In [142]:
# Rearrange columns
df_somethinc['Brand'] = "somethinc"
new_column_order = ['Product Title', 'Brand', 'Type','Variant Name', 'Color HEX', 'Color RGB', 'Season 1 Name',	'Season 1 Percent','S1 Closest Color','Season 2 Name','Season 2 Percent','S2 Closest Color','Product URL']
df_somethinc = df_somethinc.reindex(columns=new_column_order)

In [143]:
df_somethinc

Unnamed: 0,Product Title,Brand,Type,Variant Name,Color HEX,Color RGB,Season 1 Name,Season 1 Percent,S1 Closest Color,Season 2 Name,Season 2 Percent,S2 Closest Color,Product URL
0,Copy Paste Tinted Sunscreen SPF 40 PA++++ 10ml,somethinc,face,PERLE,#efd4cb,"(239, 212, 203)",'spring clear',52.102569,"(255.0, 191.0, 218)",'autumn soft',47.897431,"(222.0, 193.0, 182)",https://somethinc.com/id/product/detail/copy-p...
1,Copy Paste Tinted Sunscreen SPF 40 PA++++ 10ml,somethinc,face,TIFFANY,#c99770,"(201, 151, 112)",'autumn warm',64.318333,"(217.0, 159.0, 113)",'autumn soft',35.681667,"(224.0, 134.0, 127)",https://somethinc.com/id/product/detail/copy-p...
2,Copy Paste Tinted Sunscreen SPF 40 PA++++ 10ml,somethinc,face,BUTTER,#e8cbb9,"(232, 203, 185)",'autumn soft',72.611527,"(222.0, 193.0, 182)",'spring light',27.388473,"(253.0, 171.0, 187)",https://somethinc.com/id/product/detail/copy-p...
3,Copy Paste Tinted Sunscreen SPF 40 PA++++ 10ml,somethinc,face,LINEN,#dcb79b,"(220, 183, 155)",'autumn soft',56.051725,"(222.0, 193.0, 182)",'spring light',43.948275,"(255.0, 176.0, 146)",https://somethinc.com/id/product/detail/copy-p...
4,Copy Paste Tinted Sunscreen SPF 40 PA++++ 10ml,somethinc,face,ALTER,#ddb090,"(221, 176, 144)",'spring light',51.092855,"(255.0, 176.0, 146)",'autumn warm',48.907145,"(217.0, 159.0, 113)",https://somethinc.com/id/product/detail/copy-p...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,OMBRELLA Lip Totem Tint NEW SHADES,somethinc,lip,Seoul,#c65326,"(198, 83, 38)",'autumn soft',58.356073,"(167.0, 85.0, 47)",'autumn warm',41.643927,"(241.0, 96.0, 32)",https://somethinc.com/id/product/detail/ombrel...
193,Mon Cherie COTTON LIPS - (Dusty Red Ombre Lips),somethinc,lip,Whale,#b56c70,"(181, 108, 112)",'summer light',55.937586,"(164.0, 104.0, 114)",'summer soft',44.062414,"(177.0, 89.0, 123)",https://somethinc.com/id/product/detail/mon-ch...
194,Mon Cherie COTTON LIPS - (Dusty Red Ombre Lips),somethinc,lip,Chérie,#974c56,"(151, 76, 86)",'summer cool ',50.000000,"(152.0, 42.0, 85)",'winter deep',50.000000,"(152.0, 42.0, 85)",https://somethinc.com/id/product/detail/mon-ch...
195,Somethinc x Sabrina - Fabric Lasting Tint,somethinc,face,Smart,#b56052,"(181, 96, 82)",'summer light',50.445827,"(164.0, 104.0, 114)",'autumn warm',49.554173,"(218.0, 103.0, 85)",https://somethinc.com/id/product/detail/someth...


In [144]:
df_somethinc["Type"].value_counts()

Type
face                    85
lip                     69
foundation & cussion    19
powder                  16
cheek                    8
Name: count, dtype: int64

In [145]:
df_somethinc.to_csv('somethinc.csv', index=False)

# Hanasui

In [146]:
# Baca DataFrame dari file CSV df_hanasui
path_hanasui = "C:/Database/Capstone/get_data/hanasui_all_products_colors.csv"
df_hanasui = pd.read_csv(path_hanasui)

# Ubah nama kolom 'Product Name' menjadi 'Product Title' di df_hanasui
df_hanasui.rename(columns={'Product Name': 'Product Title'}, inplace=True)

# Convert hexcode dari df_hanasui ke RGB 
df_hanasui['Color RGB'] = df_hanasui['Color HEX'].fillna('').apply(hex_to_rgb)

# Terapkan fungsi classify_season ke 'Color RGB'
df_hanasui_filtered = df_hanasui.dropna(subset=['Color RGB'])
classified_data = df_hanasui_filtered['Color RGB'].apply(classify_season)

# Konversi dan gabungkan ke DataFrame 
classified_df = pd.DataFrame(classified_data.tolist(), columns=['Season 1', 'Season 2'])
df_hanasui = pd.concat([df_hanasui, classified_df], axis=1)

# Buat DataFrame sementara dari kolom 'Season 1'
temp_df1 = df_hanasui['Season 1'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))
temp_df2 = df_hanasui['Season 2'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))

# Ubah tipe data kolom ke tipe yang sesuai
temp_df1[[1, 2, 3]] = temp_df1[[1, 2, 3]].astype(float)
temp_df2[[1, 2, 3]] = temp_df2[[1, 2, 3]].astype(float)

# Buat kolom 'S1 Closest Color' dengan menggabungkan nilai 'S1 R', 'S1 G', dan 'S1 B'
temp_df1['S1 Closest Color'] = temp_df1.apply(lambda row: tuple(row[2:]), axis=1)
temp_df2['S2 Closest Color'] = temp_df2.apply(lambda row: tuple(row[2:]), axis=1)

# Beri nama kolom
temp_df1.columns = ['Season 1 Name', 'Season 1 Percent', 'S1 R', 'S1 G', 'S1 B', 'S1 Closest Color']
temp_df2.columns = ['Season 2 Name', 'Season 2 Percent', 'S2 R', 'S2 G', 'S2 B', 'S2 Closest Color']

# Gabungkan DataFrame sementara dengan DataFrame utama
df_hanasui = pd.concat([df_hanasui, temp_df1[['Season 1 Name', 'Season 1 Percent', 'S1 Closest Color']], temp_df2[['Season 2 Name', 'Season 2 Percent', 'S2 Closest Color']]], axis=1)

# Hapus kolom 'Season 1' dan 'Season 2' yang tidak lagi diperlukan
df_hanasui.drop(columns=['Season 1', 'Season 2'], inplace=True)

# Terapkan fungsi determine_type ke df_hanasui untuk menentukan tipe produk
df_hanasui['Type'] = df_hanasui.apply(determine_type, axis=1)


In [147]:
# Rearrange columns
df_hanasui['Brand'] = "hanasui"
new_column_order = ['Product Title', 'Brand', 'Type','Variant Name', 'Color HEX', 'Color RGB', 'Season 1 Name',	'Season 1 Percent','S1 Closest Color','Season 2 Name','Season 2 Percent','S2 Closest Color','Product URL']
df_hanasui = df_hanasui.reindex(columns=new_column_order)

In [148]:
df_hanasui

Unnamed: 0,Product Title,Brand,Type,Variant Name,Color HEX,Color RGB,Season 1 Name,Season 1 Percent,S1 Closest Color,Season 2 Name,Season 2 Percent,S2 Closest Color,Product URL
0,Hanasui Mattedorable Lip Cream 01 Kiss,hanasui,lip,01 Kiss,#9c3c44,"(156, 60, 68)",'summer cool ',50.000000,"(152.0, 42.0, 85)",'winter deep',50.000000,"(152.0, 42.0, 85)",https://hanasui.id/makeup/lip_cream
1,Hanasui Mattedorable Lip Cream 02 Posh,hanasui,lip,02 Posh,#c63464,"(198, 52, 100)",'summer cool ',50.158086,"(208.0, 49.0, 79)",'winter clear',49.841914,"(194.0, 31.0, 90)",https://hanasui.id/makeup/lip_cream
2,Hanasui Mattedorable Lip Cream 03 Star,hanasui,lip,03 Star,#e64468,"(230, 68, 104)",'spring warm',50.093889,"(251.0, 95.0, 87)",'summer cool ',49.906111,"(208.0, 49.0, 79)",https://hanasui.id/makeup/lip_cream
3,Hanasui Mattedorable Lip Cream 04 Chic,hanasui,lip,04 Chic,#e06874,"(224, 104, 116)",'spring light',51.487515,"(253.0, 107.0, 122)",'autumn warm',48.512485,"(218.0, 103.0, 85)",https://hanasui.id/makeup/lip_cream
4,Hanasui Mattedorable Lip Cream 05 Classy,hanasui,lip,05 Classy,#c48474,"(196, 132, 116)",'summer soft',67.908490,"(197.0, 121.0, 125)",'autumn soft',32.091510,"(224.0, 134.0, 127)",https://hanasui.id/makeup/lip_cream
...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,Hanasui Glazedorable Vinyl Stain 04 Bless Me,hanasui,lip,Bless Me,#d73b73,"(215, 59, 115)",'winter deep',61.866439,"(205.0, 52.0, 135)",'summer cool ',38.133561,"(208.0, 49.0, 79)",https://hanasui.id/makeup/vinyl_stain
101,Hanasui Glazedorable Vinyl Stain 05 Speechless,hanasui,lip,05 Speechless,#8f3f2b,"(143, 63, 43)",'autumn warm',62.358418,"(133.0, 67.0, 35)",'spring warm',37.641582,"(144.0, 76.0, 25)",https://hanasui.id/makeup/vinyl_stain
102,Hanasui Glazedorable Vinyl Stain 06 Stay Cool,hanasui,lip,Stay Cool,#a95b51,"(169, 91, 81)",'autumn soft',50.877640,"(167.0, 85.0, 47)",'summer light',49.122360,"(164.0, 104.0, 114)",https://hanasui.id/makeup/vinyl_stain
103,Hanasui Perfect Cheek Blush & Go Pink,hanasui,cheek,Go Pink,#f49ca1,"(244, 156, 161)",'spring clear',64.910678,"(253.0, 167.0, 165)",'spring light',35.089322,"(255.0, 176.0, 146)",https://hanasui.id/makeup/blush_on


In [149]:
df_hanasui['Type'].value_counts()

Type
lip                     63
foundation & cussion    33
powder                   4
face                     3
cheek                    2
Name: count, dtype: int64

In [150]:
df_hanasui.to_csv('hanasui.csv', index=False)

# Emina

In [151]:
import pandas as pd

# Baca DataFrame dari file CSV df_emina
path_emina = "C:/Database/Capstone/get_data/emina_all_products_colors.csv"
df_emina = pd.read_csv(path_emina)

# Ubah nama kolom 'Product Name' menjadi 'Product Title' di df_emina
df_emina.rename(columns={'Product Name': 'Product Title'}, inplace=True)

# Convert hexcode dari df_emina ke RGB 
df_emina['Color RGB'] = df_emina['Color HEX'].fillna('').apply(hex_to_rgb)

# Terapkan fungsi classify_season ke 'Color RGB'
df_emina_filtered = df_emina.dropna(subset=['Color RGB'])
classified_data = df_emina_filtered['Color RGB'].apply(classify_season)

# Konversi dan gabungkan ke DataFrame 
classified_df = pd.DataFrame(classified_data.tolist(), columns=['Season 1', 'Season 2'])
df_emina = pd.concat([df_emina, classified_df], axis=1)

# Buat DataFrame sementara dari kolom 'Season 1'
temp_df1 = df_emina['Season 1'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))
temp_df2 = df_emina['Season 2'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))

# Ubah tipe data kolom ke tipe yang sesuai
temp_df1[[1, 2, 3]] = temp_df1[[1, 2, 3]].astype(float)
temp_df2[[1, 2, 3]] = temp_df2[[1, 2, 3]].astype(float)

# Buat kolom 'S1 Closest Color' dengan menggabungkan nilai 'S1 R', 'S1 G', dan 'S1 B'
temp_df1['S1 Closest Color'] = temp_df1.apply(lambda row: tuple(row[2:]), axis=1)
temp_df2['S2 Closest Color'] = temp_df2.apply(lambda row: tuple(row[2:]), axis=1)

# Beri nama kolom
temp_df1.columns = ['Season 1 Name', 'Season 1 Percent', 'S1 R', 'S1 G', 'S1 B', 'S1 Closest Color']
temp_df2.columns = ['Season 2 Name', 'Season 2 Percent', 'S2 R', 'S2 G', 'S2 B', 'S2 Closest Color']

# Gabungkan DataFrame sementara dengan DataFrame utama
df_emina = pd.concat([df_emina, temp_df1[['Season 1 Name', 'Season 1 Percent', 'S1 Closest Color']], temp_df2[['Season 2 Name', 'Season 2 Percent', 'S2 Closest Color']]], axis=1)

# Hapus kolom 'Season 1' dan 'Season 2' yang tidak lagi diperlukan
df_emina.drop(columns=['Season 1', 'Season 2'], inplace=True)

# Terapkan fungsi determine_type ke df_emina untuk menentukan tipe produk
df_emina['Type'] = df_emina.apply(determine_type, axis=1)


In [152]:
# Rearrange columns
df_emina['Brand'] = "emina"
new_column_order = ['Product Title', 'Brand', 'Type','Variant Name', 'Color HEX', 'Color RGB', 'Season 1 Name',	'Season 1 Percent','S1 Closest Color','Season 2 Name','Season 2 Percent','S2 Closest Color','Product URL']
df_emina = df_emina.reindex(columns=new_column_order)
df_emina

Unnamed: 0,Product Title,Brand,Type,Variant Name,Color HEX,Color RGB,Season 1 Name,Season 1 Percent,S1 Closest Color,Season 2 Name,Season 2 Percent,S2 Closest Color,Product URL
0,Watercolor Lip Serum,emina,lip,04 Haze,#efb492,"(239, 180, 146)",'spring light',62.031230,"(255.0, 176.0, 146)",'spring clear',37.968770,"(253.0, 167.0, 165)",https://www.eminacosmetics.com/watercolor-lip-...
1,Watercolor Lip Serum,emina,lip,03 Dusk,#e8b28a,"(232, 178, 138)",'spring light',58.750055,"(255.0, 176.0, 146)",'autumn warm',41.249945,"(217.0, 159.0, 113)",https://www.eminacosmetics.com/watercolor-lip-...
2,Daily Matte Compact Powder,emina,lip,01 Light,#e8c5a0,"(232, 197, 160)",'autumn soft',58.229515,"(222.0, 193.0, 182)",'spring light',41.770485,"(255.0, 176.0, 146)",https://www.eminacosmetics.com/daily-matte-com...
3,Daily Matte Cushion,emina,lip,C01 - Light,#fbd899,"(251, 216, 153)",'spring light',58.910654,"(255.0, 241.0, 166)",'spring light',41.089346,"(255.0, 176.0, 146)",https://www.eminacosmetics.com/daily-matte-cus...
4,Daily Matte BB Cream,emina,lip,01 Light,#eadfcd,"(234, 223, 205)",'spring light',52.629912,"(239.0, 239.0, 234)",'summer light',47.370088,"(206.0, 218.0, 229)",https://www.eminacosmetics.com/daily-matte-bb-...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,Poppin' Matte,emina,lip,Lowkey,#f27f7f,"(242, 127, 127)",'autumn soft',54.748652,"(224.0, 134.0, 127)",'spring light',45.251348,"(253.0, 107.0, 122)",https://www.eminacosmetics.com/poppin-matte-
59,Pop Rouge Pressed Eye Shadow,emina,eye,Posh,#e7c5d1,"(231, 197, 209)",'spring clear',52.193152,"(255.0, 191.0, 218)",'autumn soft',47.806848,"(222.0, 193.0, 182)",https://www.eminacosmetics.com/-pop-rouge-pres...
60,Pop Rouge Pressed Eye Shadow,emina,eye,Nude,#8c5e5e,"(140, 94, 94)",'summer light',53.883803,"(164.0, 104.0, 114)",'summer cool ',46.116197,"(122.0, 66.0, 113)",https://www.eminacosmetics.com/-pop-rouge-pres...
61,Pop Rouge Pressed Eye Shadow,emina,eye,Colorful,#608189,"(96, 129, 137)",'winter deep',53.168468,"(95.0, 98.0, 107)",'summer soft',46.831532,"(76.0, 173.0, 129)",https://www.eminacosmetics.com/-pop-rouge-pres...


In [153]:
df_emina['Type'].value_counts()
df_emina.to_csv('emina.csv', index=False)

# Pixy

In [154]:
# Baca DataFrame dari file CSV df_emina
path_pixy = "C:/Database/Capstone/get_data/pixy_all_products_colors.csv"
df_pixy = pd.read_csv(path_pixy)

# Ubah nama kolom 'Product Name' menjadi 'Product Title' di df_emina
#df_emina.rename(columns={'Product Name': 'Product Title'}, inplace=True)

# Convert hexcode dari df_emina ke RGB 
df_pixy['Color RGB'] = df_pixy['Color HEX'].fillna('').apply(hex_to_rgb)

# Terapkan fungsi classify_season ke 'Color RGB'
df_pixy_filtered = df_pixy.dropna(subset=['Color RGB'])
classified_data = df_pixy_filtered['Color RGB'].apply(classify_season)

# Konversi dan gabungkan ke DataFrame 
classified_df = pd.DataFrame(classified_data.tolist(), columns=['Season 1', 'Season 2'])
df_pixy = pd.concat([df_pixy, classified_df], axis=1)

# Buat DataFrame sementara dari kolom 'Season 1'
temp_df1 = df_pixy['Season 1'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))
temp_df2 = df_pixy['Season 2'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))

# Ubah tipe data kolom ke tipe yang sesuai
temp_df1[[1, 2, 3]] = temp_df1[[1, 2, 3]].astype(float)
temp_df2[[1, 2, 3]] = temp_df2[[1, 2, 3]].astype(float)

# Buat kolom 'S1 Closest Color' dengan menggabungkan nilai 'S1 R', 'S1 G', dan 'S1 B'
temp_df1['S1 Closest Color'] = temp_df1.apply(lambda row: tuple(row[2:]), axis=1)
temp_df2['S2 Closest Color'] = temp_df2.apply(lambda row: tuple(row[2:]), axis=1)

# Beri nama kolom
temp_df1.columns = ['Season 1 Name', 'Season 1 Percent', 'S1 R', 'S1 G', 'S1 B', 'S1 Closest Color']
temp_df2.columns = ['Season 2 Name', 'Season 2 Percent', 'S2 R', 'S2 G', 'S2 B', 'S2 Closest Color']

# Gabungkan DataFrame sementara dengan DataFrame utama
df_pixy = pd.concat([df_pixy, temp_df1[['Season 1 Name', 'Season 1 Percent', 'S1 Closest Color']], temp_df2[['Season 2 Name', 'Season 2 Percent', 'S2 Closest Color']]], axis=1)

# Hapus kolom 'Season 1' dan 'Season 2' yang tidak lagi diperlukan
df_pixy.drop(columns=['Season 1', 'Season 2'], inplace=True)

# Terapkan fungsi determine_type ke df_emina untuk menentukan tipe produk
df_pixy['Type'] = df_pixy.apply(determine_type, axis=1)


In [155]:
# Rearrange columns
df_pixy['Product Title'] = df_pixy['Product Title'].str.replace(' | PIXY - My Beauty, My Energy', '', regex=False)
df_pixy['Brand'] = "pixy"
new_column_order = ['Product Title', 'Brand', 'Type','Variant Name', 'Color HEX', 'Color RGB', 'Season 1 Name',	'Season 1 Percent','S1 Closest Color','Season 2 Name','Season 2 Percent','S2 Closest Color','Product URL']
df_pixy = df_pixy.reindex(columns=new_column_order)
df_pixy

Unnamed: 0,Product Title,Brand,Type,Variant Name,Color HEX,Color RGB,Season 1 Name,Season 1 Percent,S1 Closest Color,Season 2 Name,Season 2 Percent,S2 Closest Color,Product URL
0,PIXY Make It Glow Dewy Cushion,pixy,face,Light Beige,#EFCBA3,"(239, 203, 163)",'autumn soft',56.584580,"(222.0, 193.0, 182)",'spring light',43.415420,"(255.0, 176.0, 146)",https://www.pixy.co.id/product/pixy-make-it-gl...
1,PIXY Make It Glow Dewy Cushion,pixy,face,Natural Beige,#D1AE7D,"(209, 174, 125)",'autumn warm',67.269365,"(217.0, 159.0, 113)",'autumn soft',32.730635,"(224.0, 134.0, 127)",https://www.pixy.co.id/product/pixy-make-it-gl...
2,PIXY Make It Glow Dewy Cushion,pixy,face,Medium Beige,#C99D64,"(201, 157, 100)",'autumn warm',67.115604,"(217.0, 159.0, 113)",'autumn soft',32.884396,"(224.0, 134.0, 127)",https://www.pixy.co.id/product/pixy-make-it-gl...
3,PIXY Make It Glow Dewy Cushion,pixy,face,Pinkish Beige,#EBBEAF,"(235, 190, 175)",'autumn soft',65.647733,"(222.0, 193.0, 182)",'spring light',34.352267,"(253.0, 171.0, 187)",https://www.pixy.co.id/product/pixy-make-it-gl...
4,PIXY Make It Glow Dewy Cushion,pixy,face,Sandy Beige,#C68B66,"(198, 139, 102)",'summer soft',50.403252,"(197.0, 121.0, 125)",'autumn warm',49.596748,"(217.0, 159.0, 113)",https://www.pixy.co.id/product/pixy-make-it-gl...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,PIXY Line & Shadow,pixy,face,Green,#153E35,"(21, 62, 53)",'autumn warm',62.659866,"(0.0, 65.0, 52)",'winter deep',37.340134,"(0.0, 72.0, 80)",https://www.pixy.co.id/product/pixy-line-shadow
116,PIXY Line & Shadow,pixy,face,White,#FFFFFF,"(255, 255, 255)",'winter cool',100.000000,"(255.0, 255.0, 255)",'winter cool',0.000000,"(255.0, 233.0, 242)",https://www.pixy.co.id/product/pixy-line-shadow
117,PIXY Line & Shadow,pixy,face,Blue,#33647E,"(51, 100, 126)",'autumn soft',66.673540,"(49.0, 100.0, 146)",'autumn deep',33.326460,"(22.0, 74.0, 116)",https://www.pixy.co.id/product/pixy-line-shadow
118,PIXY Line & Shadow,pixy,face,Black,#222222,"(34, 34, 34)",'winter clear',95.056044,"(33.0, 33.0, 33)",'autumn deep',4.943956,"(44.0, 19.0, 6)",https://www.pixy.co.id/product/pixy-line-shadow


In [156]:
df_pixy['Type'].value_counts()
df_pixy.to_csv('pixy.csv', index=False)

# Maybelline

In [157]:
# Baca DataFrame dari file CSV df_emina
path_maybelline = "C:/Database/Capstone/get_data/maybelline_all_products_colors.csv"
df_maybelline = pd.read_csv(path_maybelline)

# Ubah nama kolom 'Product Name' menjadi 'Product Title' di df_emina
#df_emina.rename(columns={'Product Name': 'Product Title'}, inplace=True)

# Convert hexcode dari df_emina ke RGB 
df_maybelline['Color RGB'] = df_maybelline['Color HEX'].fillna('').apply(hex_to_rgb)

# Terapkan fungsi classify_season ke 'Color RGB'
df_pixy_filtered = df_maybelline.dropna(subset=['Color RGB'])
classified_data = df_pixy_filtered['Color RGB'].apply(classify_season)

# Konversi dan gabungkan ke DataFrame 
classified_df = pd.DataFrame(classified_data.tolist(), columns=['Season 1', 'Season 2'])
df_maybelline = pd.concat([df_maybelline, classified_df], axis=1)

# Buat DataFrame sementara dari kolom 'Season 1'
temp_df1 = df_maybelline['Season 1'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))
temp_df2 = df_maybelline['Season 2'].apply(lambda x: pd.Series(str(x).strip('()').split(', ')))

# Ubah tipe data kolom ke tipe yang sesuai
temp_df1[[1, 2, 3]] = temp_df1[[1, 2, 3]].astype(float)
temp_df2[[1, 2, 3]] = temp_df2[[1, 2, 3]].astype(float)

# Buat kolom 'S1 Closest Color' dengan menggabungkan nilai 'S1 R', 'S1 G', dan 'S1 B'
temp_df1['S1 Closest Color'] = temp_df1.apply(lambda row: tuple(row[2:]), axis=1)
temp_df2['S2 Closest Color'] = temp_df2.apply(lambda row: tuple(row[2:]), axis=1)

# Beri nama kolom
temp_df1.columns = ['Season 1 Name', 'Season 1 Percent', 'S1 R', 'S1 G', 'S1 B', 'S1 Closest Color']
temp_df2.columns = ['Season 2 Name', 'Season 2 Percent', 'S2 R', 'S2 G', 'S2 B', 'S2 Closest Color']

# Gabungkan DataFrame sementara dengan DataFrame utama
df_maybelline = pd.concat([df_maybelline, temp_df1[['Season 1 Name', 'Season 1 Percent', 'S1 Closest Color']], temp_df2[['Season 2 Name', 'Season 2 Percent', 'S2 Closest Color']]], axis=1)

# Hapus kolom 'Season 1' dan 'Season 2' yang tidak lagi diperlukan
df_maybelline.drop(columns=['Season 1', 'Season 2'], inplace=True)

# Terapkan fungsi determine_type ke df_emina untuk menentukan tipe produk
df_maybelline['Type'] = df_maybelline.apply(determine_type, axis=1)


In [158]:
df_maybelline['Brand'] = "maybelline"
new_column_order = ['Product Title', 'Brand', 'Type','Variant Name', 'Color HEX', 'Color RGB', 'Season 1 Name',	'Season 1 Percent','S1 Closest Color','Season 2 Name','Season 2 Percent','S2 Closest Color','Product URL']
df_maybelline = df_maybelline.reindex(columns=new_column_order)
df_maybelline

Unnamed: 0,Product Title,Brand,Type,Variant Name,Color HEX,Color RGB,Season 1 Name,Season 1 Percent,S1 Closest Color,Season 2 Name,Season 2 Percent,S2 Closest Color,Product URL
0,Instant Age Rewind®,maybelline,foundation & cussion,110,#fbe0bf,"(251, 224, 191)",'summer soft',56.219254,"(255.0, 246.0, 183)",'spring light',43.780746,"(255.0, 241.0, 166)",https://www.maybelline.com/face-makeup/foundat...
1,Instant Age Rewind®,maybelline,foundation & cussion,112,#fed4b2,"(254, 212, 178)",'spring light',52.264785,"(255.0, 241.0, 166)",'summer soft',47.735215,"(255.0, 246.0, 183)",https://www.maybelline.com/face-makeup/foundat...
2,Instant Age Rewind®,maybelline,foundation & cussion,118,#f8d4af,"(248, 212, 175)",'spring light',51.398760,"(255.0, 241.0, 166)",'autumn soft',48.601240,"(222.0, 193.0, 182)",https://www.maybelline.com/face-makeup/foundat...
3,Instant Age Rewind®,maybelline,foundation & cussion,120,#f5c19b,"(245, 193, 155)",'spring light',57.207805,"(255.0, 176.0, 146)",'spring clear',42.792195,"(253.0, 167.0, 165)",https://www.maybelline.com/face-makeup/foundat...
4,Instant Age Rewind®,maybelline,foundation & cussion,130,#f6c89a,"(246, 200, 154)",'spring light',56.923164,"(255.0, 176.0, 146)",'spring clear',43.076836,"(253.0, 167.0, 165)",https://www.maybelline.com/face-makeup/foundat...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
611,Color Sensational®,maybelline,lip,Plum Passion,#902472\r\n,"(144, 36, 114)",'winter cool',54.328589,"(134.0, 42.0, 91)",'summer cool ',45.671411,"(152.0, 42.0, 85)",https://www.maybelline.com/lip-makeup/lip-line...
612,Baby Lips Dr Rescue,maybelline,lip,Coral Crave,#FF8C99,"(255, 140, 153)",'spring clear',55.325255,"(253.0, 167.0, 165)",'spring light',44.674745,"(255.0, 176.0, 146)",https://www.maybelline.com/lip-makeup/lip-balm...
613,Baby Lips®,maybelline,lip,Quenched,#fff0f0,"(255, 240, 240)",'winter cool',70.160247,"(255.0, 233.0, 242)",'spring light',29.839753,"(239.0, 239.0, 234)",https://www.maybelline.com/lip-makeup/lip-balm...
614,Baby Lips®,maybelline,lip,Cherry Me,#a12d53,"(161, 45, 83)",'summer cool ',50.000000,"(152.0, 42.0, 85)",'winter deep',50.000000,"(152.0, 42.0, 85)",https://www.maybelline.com/lip-makeup/lip-balm...


In [159]:
df_maybelline['Type'].value_counts()


Type
lip                     326
foundation & cussion    151
face                     69
eye                      32
cheek                    26
powder                   12
Name: count, dtype: int64

In [160]:
df_maybelline.to_csv('maybelline.csv', index=False)

# Combined All Dataframe 

In [162]:
import pandas as pd

# Gabungkan semua DataFrame ke dalam satu DataFrame
combined = pd.concat([df_wardah, df_hanasui, df_emina, df_somethinc, df_pixy, df_maybelline], ignore_index=True)
combined

Unnamed: 0,Product Title,Brand,Type,Variant Name,Color HEX,Color RGB,Season 1 Name,Season 1 Percent,S1 Closest Color,Season 2 Name,Season 2 Percent,S2 Closest Color,Product URL
0,Matte Lip Cream,wardah,lip,Red-dicted\t,#a92431,"(169, 36, 49)",'autumn warm',74.697169,"(156.0, 35.0, 53)",'summer cool ',25.302831,"(152.0, 42.0, 85)",https://www.wardahbeauty.com/id/product/make-u...
1,Matte Lip Cream,wardah,lip,Fuschionately,#d04483,"(208, 68, 131)",'winter deep',69.550194,"(205.0, 52.0, 135)",'summer soft',30.449806,"(177.0, 89.0, 123)",https://www.wardahbeauty.com/id/product/make-u...
2,Matte Lip Cream,wardah,lip,See You Latte,#c5807d,"(197, 128, 125)",'summer soft',79.845008,"(197.0, 121.0, 125)",'autumn soft',20.154992,"(224.0, 134.0, 127)",https://www.wardahbeauty.com/id/product/make-u...
3,Matte Lip Cream,wardah,lip,Pink Me,#d86d8c,"(216, 109, 140)",'summer soft',59.121401,"(228.0, 106.0, 154)",'summer soft',40.878599,"(197.0, 121.0, 125)",https://www.wardahbeauty.com/id/product/make-u...
4,Matte Lip Cream,wardah,lip,Speachless,#d87a71,"(216, 122, 113)",'autumn soft',52.811055,"(224.0, 134.0, 127)",'summer soft',47.188945,"(197.0, 121.0, 125)",https://www.wardahbeauty.com/id/product/make-u...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1378,Color Sensational®,maybelline,lip,Plum Passion,#902472\r\n,"(144, 36, 114)",'winter cool',54.328589,"(134.0, 42.0, 91)",'summer cool ',45.671411,"(152.0, 42.0, 85)",https://www.maybelline.com/lip-makeup/lip-line...
1379,Baby Lips Dr Rescue,maybelline,lip,Coral Crave,#FF8C99,"(255, 140, 153)",'spring clear',55.325255,"(253.0, 167.0, 165)",'spring light',44.674745,"(255.0, 176.0, 146)",https://www.maybelline.com/lip-makeup/lip-balm...
1380,Baby Lips®,maybelline,lip,Quenched,#fff0f0,"(255, 240, 240)",'winter cool',70.160247,"(255.0, 233.0, 242)",'spring light',29.839753,"(239.0, 239.0, 234)",https://www.maybelline.com/lip-makeup/lip-balm...
1381,Baby Lips®,maybelline,lip,Cherry Me,#a12d53,"(161, 45, 83)",'summer cool ',50.000000,"(152.0, 42.0, 85)",'winter deep',50.000000,"(152.0, 42.0, 85)",https://www.maybelline.com/lip-makeup/lip-balm...


In [163]:
combined.to_csv('combined.csv', index=False)