# CLEAN UP CODE for JAN 8 MEETING 

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import math 
products_static = pd.read_csv("products_static_1607946056.csv")

# Inputs: 
Input the values you would like to filter by:

In [2]:
#Filter 1: Categories
(cat_1_on, cat_2_on, cat_3_on) = False, True, False
cat_lst_1 = []
cat_lst_2 = [12999, 1301, 21762, 1297, 1295, 17095, 893, 
              17089, 12738, 2037,2035, 9197, 20768, 2033, 
              1329, 14492, 14503, 21849, 16502, 16500]
cat_lst_3 = []

#Filter 2: Times (Update-status, how long been sold)
time_on = True
unix_time_now = 1607946056 #Change based on Nick's input
min_years = 1
updated_previously_weeks = 2


#Filter 3: Brand GMV
min_gmv_on = True
min_gmv = 10000

#Filter 4: Brand Avg Price 
min_unit_price_on = True
min_unit_price = 300

#Filter 5: GMV Concentration
min_gmv_conc_on = False
top_n = 3 #concetration num of products #IF CHANGE THIS NUMBER CHANGE COLUMN NAME TOO 
min_concentration = 0.2

#Filter 6: Avg Product Rating
min_avg_star_on = False
min_avg_star = 4.5

#Filter 7: Bad Customer Ratings 
max_bad_rating_on = False 
max_bad_rating = 0.5

#Filter 8: Brand TOTAL GMV 
min_total_brand_gmv_on = True
min_total_brand_gmv = 2000000 / 12

# Import Data

In [3]:
shops_gmv = pd.read_csv("shops_gmv_data.csv")
products_static = pd.read_csv("products_static_1607946056.csv")
models_static = pd.read_csv("models_static_data.csv")

Change data types in dataframes to conserve memory:

In [4]:
def convert_to_cats(df, to_convert):
    '''
    Converts cols in to_convert_list to categories. 
    
    Inputs:
        df: a DataFrame
        to_convert: a list of column names (as strings)
                    we'd like to convert
    Returns:
        Nothing; modifies dataframe in place
    '''
    for col in df.columns:
        if col in to_convert:
            df[col] = df[col].astype("category")

In [5]:
to_convert_models = ['category_one', 'category_one_en',
                     'category_one_th', 'category_two',
                     'category_two_en', 'category_two_th',
                     'category_three', 'category_three_en',
                     'category_three_th']
convert_to_cats(models_static, to_convert_models)

to_convert_products = ['category_one', 'category_one_en',
                      'category_one_th', 'category_two', 
                      'category_two_en', 'category_two_th',
                      'category_three', 'category_three_en', 
                      'category_three_th', 'reviews_count_context', 
                      'reviews_count_image', 'shopee_verified', 
                      'show_discount']
convert_to_cats(products_static, to_convert_products)

to_convert_shops = ["shopid", "category_one", "category_two",
                    "category_three"]
convert_to_cats(shops_gmv, to_convert_shops)

In [6]:
def downcast_numbers(df):
    '''
    Downcasts floats and ints.
    
    Inputs:
        df: a DataFrame objecet. 
    Returns:
        Nothing; modifies list in place
    '''
    for col in df.columns:
        if df[col].dtype == "float":
            df[col] = pd.to_numeric(df[col], downcast="float")
        if df[col].dtype == "int":
            df[col] = pd.to_numeric(df[col], downcast="unsigned")

In [7]:
downcast_numbers(models_static)
downcast_numbers(products_static)
downcast_numbers(shops_gmv)

#Making untouched, but memory efficient copy of data for later
products_static["product_gmv"] = products_static["price"] * products_static["sold"] 
products_static_untouched = products_static.copy(deep=True)


# Filter 1: Category Filter

Filter our DataFrames to only keep rows with Categories we care for. 


In [8]:
#Helper Functions
def filter_by_category(df, category_n, ok_cat_lst):
    '''
    Keeps rows in the df that have cats in ok_cat_lst. 
    
    Inputs:
        df: a DataFrame
        category_n: (str) eg. "category_one"
        ok_cat_lst: (lst) of acceptable cat for that 
          cat as numbers eg [50, 26]
    Returns:
        Nothing; modifies list in place
    '''
    
    mask = df[category_n].isin(ok_cat_lst)
    df = df[mask]
    
def filter_by_category(df, category_n, ok_cat_lst):
    '''
    Keeps rows in the df that have cats in ok_cat_lst. 
    
    Inputs:
        df: a DataFrame
        category_n: (str) eg. "category_one"
        ok_cat_lst: (lst) of acceptable cat for that 
          cat as numbers eg [50, 26]
    Returns:
        The modified df 
    '''
    
    mask = df[category_n].isin(ok_cat_lst)
    return df[mask]

In [9]:
if cat_1_on:
    models_static = filter_by_category(models_static, "category_one", cat_lst_1)
    products_static = filter_by_category(products_static, "category_one", cat_lst_1) 

In [10]:
if cat_2_on:
    models_static = filter_by_category(models_static, "category_two", cat_lst_2)
    products_static = filter_by_category(products_static, "category_two", cat_lst_2) 

In [11]:
if cat_3_on:
    models_static = filter_by_category(models_static, "category_three", cat_lst_3)
    products_static = filter_by_category(products_static, "category_three", cat_lst_3) 

# Filter 2: Keep only Established Products and Products that Sellers are Updating

Filter out products that have been sold for less than a specified time. 

In [12]:
def filter_out_new_products(df, min_years, unix_time_now):
    '''
    Filters out rows from our products_static df that 
    have been sold less than a specified time
    '''

    min_s = min_years * 365 * 24 * 60 * 60 

    mask = (unix_time_now - products_static["ctime"] > min_s)

    return df[mask]

def filter_out_unupdated_products(df, updated_previously_weeks):
    '''
    Filters out rows from our products_static df that 
    have been sold less than a specified time
    '''

    unix_time_now = math.ceil(datetime.now().timestamp())

    max_s = updated_previously_weeks * 7 * 24 * 60 * 60

    mask = (unix_time_now - products_static["modified_at"] < max_s)

    return df[mask]

if time_on: 
    products_static = filter_out_new_products(products_static, min_years,
                                              unix_time_now)

    products_static = filter_out_unupdated_products(products_static, 
                                                    updated_previously_weeks)

# Brand Definition

Extract unique brand names from the remaining products

In [13]:
mask = products_static["brand"] != "No Brand(ไม่มียี่ห้อ)"
products_static = products_static[mask]

brands_list = products_static["brand"].unique()
brands_list = np.delete(brands_list, np.where(brands_list == ('No Brand(ไม่มียี่ห้อ)')))

In [14]:
#Clean up brands_df
brands_df = pd.DataFrame(brands_list, columns = ["Brand Name"])

brands_df.dropna(inplace=True)

s1 = brands_df["Brand Name"] != "None"
brands_df = brands_df[s1]

s2 = brands_df["Brand Name"] != "0"
brands_df = brands_df[s2] 


In [15]:
#Set up columns for our brands_df
brands_df["Filtered_Brand_GMV"] = 0
brands_df["Filtered_Average_Unit_Price"] = 0
brands_df["%_Filtered_GMV_from_Top_3_Products"] = 0
brands_df["Filtered_Weighted_Star_Rating"] = 0
brands_df["Filtered_Bad_Rating_Percent"] = 0
brands_df.set_index("Brand Name", inplace = True)

In [16]:
#Make new column in products_static for GMV of a product (price * sold)
products_static["weighted_star"] = products_static["rating_star"] * products_static["sold"]

In [17]:
for brand in brands_df.index:
    
    #make a sub-df containing only rows with the correct brand
    my_brand_df = products_static[products_static["brand"] == brand]
    my_brand_df = my_brand_df.sort_values(by=["product_gmv"], ascending=False) 
    
    #gmv calculation
    brand_gmv = my_brand_df["product_gmv"].sum()
    brands_df.loc[brand, "Filtered_Brand_GMV"] = brand_gmv
    
    #volume calculation
    brand_volume = my_brand_df["sold"].sum()
    brands_df.loc[brand, "Filtered_Average_Unit_Price"] = brand_gmv / brand_volume
    
    #GMV Concentration Calculation
    if len(my_brand_df) >= top_n: 
        top_n_df = my_brand_df.head(top_n)
        top_n_gmv = top_n_df["product_gmv"].sum()
        brands_df.loc[brand, "%_Filtered_GMV_from_Top_3_Products"] = top_n_gmv / brand_gmv
    else:
        #brand sells less than like 5 products --> highly conc.
        brands_df.loc[brand, "%_Filtered_GMV_from_Top_3_Products"] = 1.0 
        
        
    #Weighted Star Rating
    total_star = my_brand_df["weighted_star"].sum()
    brands_df.loc[brand, "Filtered_Weighted_Star_Rating"] = total_star / brand_volume
    
    #Bad Rating Count
    bad_rating_count = my_brand_df["rating_count_one"].sum() + my_brand_df["rating_count_two"].sum()
    total_rating_count = my_brand_df["rating_count_total"].sum()
    
    if total_rating_count != 0:
        val = bad_rating_count / total_rating_count 
    else: 
        val = -1
    
    brands_df.loc[brand, "Filtered_Bad_Rating_Percent"] = val

brands_df["Filtered_Bad_Rating_Percent"] = brands_df["Filtered_Bad_Rating_Percent"].round(decimals=3)

# Filter 3: Brand Total GMV

In [18]:
if min_gmv_on:
    brands_df = brands_df[brands_df["Filtered_Brand_GMV"] >= min_gmv]

# Filter 4: Brand Average Price

In [19]:
if min_unit_price_on:
    brands_df = brands_df[brands_df["Filtered_Average_Unit_Price"] >= min_unit_price]

# Filter 5: GMV Concentration of Top (3) Products

In [20]:
if min_gmv_conc_on:
    brands_df = brands_df[brands_df["Filtered_GMV_Concentration"] >= min_concentration]

# Filter 6: Average Product Rating

In [21]:
if min_avg_star_on:
    brands_df = brands_df[brands_df["Filtered_Weighted_Star_Rating"] >= min_avg_star]

# Filter 7: Bad Customer Ratings

In [22]:
if max_bad_rating_on:
    brands_df = brands_df[brands_df["Filtered_Bad_Rating_Percent"] <= max_bad_rating]

# (GMV Growth?)

In [23]:
# number of brands in our brands_df
print(len(brands_df))

1094


# Filter 8: Brand GMV (Across All Categories) 

Reimport the products data but only filter for the brands in our df. Note: We do this step at the end b/c we'll have to reimport all of products data, so makes sense to have the smallest brands list possible. 

### Calculate Category of the Brands

Do this by looping over again, since going through each cat 2 for each brand is computationally expensive, want to get the brands list down as much as possible.
Note: 
- If set min_total_brand_gmv = 2,000,000 / 12, go from 415 --> 415
- If set min_total_brand_gmv = 2,000,000 / 10, go from 415 --> 364
- If set min_total_brand_gmv = 2,000,000 / 8, go from 415 --> 314
- If set min_total_brand_gmv = 2,000,000 / 6, go from 415 --> 258
- If set min_total_brand_gmv = 2,000,000 / 4, go from 415 --> 193

This is just a sample but goes to show that doing our brand's cat_2 calculation can be expensive, so that's why I wanted to put it after we had chipped away at list. However, if we set min_total_brand_gmv = 2,000,000 / 12, there's no effect so might as well just include our brand cat 2 calculation in Filter 8's for loop. 


In [25]:
#Assume this is always kept on for simplicity
if min_total_brand_gmv_on:
        
    for brand in brands_df.index:

        #make a sub-df containing only rows with the correct brand
        mask_brand = products_static_untouched["brand"] == brand
        my_brand_df = products_static_untouched[mask_brand]

        #TOTAL Brand gmv calculation
        total_brand_gmv = my_brand_df["product_gmv"].sum()
        brands_df.loc[brand, "Brand_GMV_All_Cats"] = total_brand_gmv
        
        #Total brand view count 
        total_brand_views = my_brand_df["view_count"].sum()
        conv_rate = total_brand_gmv / total_brand_views
        brands_df.loc[brand, "Conversion_Rate_All_Cats_(total_gmv/view)"] = conv_rate
        
        #--------------------------------------------------------------
        #Take this out and put later if the min_total_brand_gmv 
        #becomes higher, otherwise inclue this in the for loop
        winning_cat_2 = None
        winning_cat_2_gmv = 0.0

        for cat_2 in my_brand_df["category_two_en"].unique():
            mask_cat_2 = my_brand_df["category_two_en"] == cat_2
            cat_2_df = my_brand_df[mask_cat_2]

            cat_2_gmv = cat_2_df["product_gmv"].sum()
            if cat_2_gmv > winning_cat_2_gmv:
                winning_cat_2 = cat_2
                winning_cat_2_gmv = cat_2_gmv
                    
        percent = (winning_cat_2_gmv / total_brand_gmv).round(decimals=3)
        brands_df.loc[brand, "Cat_2_Percent_of_Total_GMV"] = percent 
        brands_df.loc[brand, "Brand_Category_2"] = winning_cat_2

        print("for brand: ", brand, "Winning cat 2 is: ", winning_cat_2)
        print("with a cat_2_gmv value of: ", winning_cat_2_gmv)
        
        #--------------------------------------------------------------
            
        
        
    #Apply the total brand gmv filter
    brands_df = brands_df[brands_df["Brand_GMV_All_Cats"] >= min_total_brand_gmv]

for brand:  Genie Bra(จีนี่ บรา) Winning cat 2 is:  Underwear
with a cat_2_gmv value of:  200022
for brand:  PORTABLE TOILET(พอร์ทเทเบิล ทอยเลท) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  130130
for brand:  Bandai(บันได)​ Winning cat 2 is:  Game collectibles
with a cat_2_gmv value of:  1892788
for brand:  BANDAI(บันได) Winning cat 2 is:  Game collectibles
with a cat_2_gmv value of:  366985
for brand:  MaxDe Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  64596
for brand:  Bandai(บันได) Winning cat 2 is:  Game collectibles
with a cat_2_gmv value of:  260032
for brand:  Colorland(คัลเลอร์แลนด์) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  20220
for brand:  WELNESS(เวลเนส) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  517900
for brand:  Sloggi(สล็อกกี้) Winning cat 2 is:  Underwear
with a cat_2_gmv value of:  38413
for brand:  Triumph Winning cat 2 is:  Underwear
with a cat_2_gmv value of:  31616
for brand:  Triumph(ไทรอัมพ์) 

for brand:  Naturehike(เนเจอร์ไฮค์) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  1894966
for brand:  Whiskas Winning cat 2 is:  Cat
with a cat_2_gmv value of:  3220809
for brand:  Pedigree Winning cat 2 is:  Dog
with a cat_2_gmv value of:  882229
for brand:  Ceasar Winning cat 2 is:  Dog
with a cat_2_gmv value of:  186257
for brand:  TEMPTATION Winning cat 2 is:  Cat
with a cat_2_gmv value of:  47376
for brand:  OMRON Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  132805
for brand:  OMRON(โอมรอน) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  2751199
for brand:  Sleep Happy(สลีปแฮปปี้) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  372630
for brand:  SleepHappy (สลีปแฮปปี้) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  84020
for brand:  ECF(อีซีเอฟ) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  166256
for brand:  Philips Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  1885737
for 

for brand:  Certainty Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  4474548
for brand:  CERTAINTY(เซอร์เทนตี้) Winning cat 2 is:  Diapers and wet wipes
with a cat_2_gmv value of:  1852159
for brand:  Certainty(เซอร์เทนตี้) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  2806862
for brand:  ﻿Medela Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  68516
for brand:  Medela(เมเดลา) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  70533
for brand:  Medela(มีดิล่า) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  54248
for brand:  SKG(เอสเคจี) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  1182053
for brand:  CUSHY(คุซชี่) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  162855
for brand:  INCO (อินโค) Winning cat 2 is:  Underwear
with a cat_2_gmv value of:  20640
for brand:  PRIM Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  40290
for brand:  CHERISH Winning cat 2 is:  Bedroom
with a cat_2

for brand:  Amber (แอมเเบอร์) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  29250
for brand:  Unicharm Pet Winning cat 2 is:  Cat
with a cat_2_gmv value of:  120072
for brand:  Cuisinart(คริสชินาร์ต) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  25515
for brand:  Milk Plus & More(มิลค์พลัสแอนด์มอร์) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  405657
for brand:  Ginger Blend(จินเจอร์ เบลนด์) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  20640
for brand:  Philips(ฟิลิปส์) Winning cat 2 is:  Irons and cloths
with a cat_2_gmv value of:  2332184
for brand:  Beanie Nap(บีนนี่ แนป) Winning cat 2 is:  Furniture - Children mattress
with a cat_2_gmv value of:  8244
for brand:  DREAM CHEF KOREA เครื่องครัวเพื่อสุขภาพดรีมเชฟ Winning cat 2 is:  Kitchen and dining room
with a cat_2_gmv value of:  101720
for brand:  DREAMCHEF ( DREAM CHEF KOREA ) ดรีมเชฟ Winning cat 2 is:  Kitchen and dining room
with a cat_2_gmv value of:  34140
for brand:

for brand:  Minirin(มินิริน) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  75855
for brand:  Colandas(คอแลนดาส) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  288984
for brand:  Kingkong Electric Winning cat 2 is:  Equipment for children
with a cat_2_gmv value of:  17578
for brand:  Royal Canin Winning cat 2 is:  Cat
with a cat_2_gmv value of:  657486
for brand:  Taste Of The Wild(เท็ดออฟเดอะไวท์) Winning cat 2 is:  Cat
with a cat_2_gmv value of:  435941
for brand:  MARA(มาร่า) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  478718
for brand:  Fast Pure(ฟาส์ท เพียว) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  672041
for brand:  ECX Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  39200
for brand:  SHARP Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  140576
for brand:  Mazuma(มาซูม่า) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value o

for brand:  Pimberly(พิมเบอร์ลี่) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  588258
for brand:  Brusta(บรุสต้า) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  231033
for brand:  B Gift(บี กิ๊ฟท์) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  16290
for brand:  MomTech(มัมเทค) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  52434
for brand:  Sheepola(ชิปโพล่า) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  103390
for brand:  Anda Seat(แอนด้า ซีท) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  716990
for brand:  Cocoro Tokyo(โคโคโร่ โตเกียว) Winning cat 2 is:  Bath and body care products
with a cat_2_gmv value of:  131257
for brand:  MITSUMARU(มิตซูมารุ) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  17100
for brand:  Reboot Master 6000(รีบู๊ทมาสเตอร์6000) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  68754
for brand:  Cleansui (คลีนซุย) Winning cat 2 is:  

for brand:  Victor(วิคเตอร์) Winning cat 2 is:  Fan
with a cat_2_gmv value of:  112173
for brand:  AquaTech(อะควาเทค) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  44954
for brand:  Chanada(ชะนะดะ) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  10920
for brand:  Zeal(เซียล) Winning cat 2 is:  Dog
with a cat_2_gmv value of:  34871
for brand:  Avoderm(อโวเดิร์ม) Winning cat 2 is:  Dog
with a cat_2_gmv value of:  14847
for brand:  Oonew(ออนิว) Winning cat 2 is:  Equipment for children
with a cat_2_gmv value of:  220649
for brand:  Smiggle(สมิ๊กเกิ้ล) Winning cat 2 is:  Equipment for children
with a cat_2_gmv value of:  25935
for brand:  Aidia Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  13950
for brand:  Lintbells(ลินเบล) Winning cat 2 is:  Dog
with a cat_2_gmv value of:  76661
for brand:  JTC(เจทีซี) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  95688
for brand:  ดอกกรรณิการ์ Winning cat 2 is:  Heal

for brand:  Pentavite(เพนตาวิท) Winning cat 2 is:  Equipment for health and safety
with a cat_2_gmv value of:  10150
for brand:  Chicky Mild(ชิคกี้มายด์) Winning cat 2 is:  Equipment for health and safety
with a cat_2_gmv value of:  26613
for brand:  Dpfurniture(ดีพีเฟอร์นิเจอร์) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  153200
for brand:  Baanmainicha(บ้านไม้นิชา) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  92290
for brand:  InnHome(อินน์โฮม) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  133213
for brand:  Nordics Naturals(นอร์ดิกส์ เนเจอเรลส์) Winning cat 2 is:  Equipment for health and safety
with a cat_2_gmv value of:  20940
for brand:  Civil Modern(ซิวิลโมเดิร์น) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  108316
for brand:  Palm Store(ปาล์มสตอรี่) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  32978
for brand:  Koncept Furniture(คอนเซ็ปต์เฟอร์นิเจอร์) Winning cat 2 is:  Furniture
with a cat_2_gmv value of:  113655
for 

for brand:  Acare(เอแคร์) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  125734
for brand:  Glucosure Autocode(กลูโคชัวร์ ออโต้โค้ด) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  306798
for brand:  HM(เอชเอ็ม) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  70635
for brand:  Funowa(ฟูโนว่า) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  40827
for brand:  Bodivis(บอดี้วิส) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  29000
for brand:  แป๊ะยิ้ม(แป๊ะยิ้ม) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  13091
for brand:  Calvin Klein(คาลวิน ไคลน์) Winning cat 2 is:  Underwear
with a cat_2_gmv value of:  251542
for brand:  3M Littmann(3เอ็ม ลิทท์แมนน์) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  57405
for brand:  Strataderm Thailand(สแตรททาเดิร์มไทยแลนด์) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  19265
for brand:  KCM (เคซีเอ็ม) Winning cat 2 is:  

for brand:  Autometer(ออโต้มีเตอร์) Winning cat 2 is:  Car exterior accessories
with a cat_2_gmv value of:  10187
for brand:  TYC(ทีวายซี) Winning cat 2 is:  Spare parts and car accessories
with a cat_2_gmv value of:  34565
for brand:  Mitsubishi(มิทซูบิชิ) Winning cat 2 is:  Car accessories
with a cat_2_gmv value of:  207834
for brand:  HONDA(ฮอนด้า) Winning cat 2 is:  Spare parts and motorcycle accessories
with a cat_2_gmv value of:  678448
for brand:  Ford(ฟอร์ด) Winning cat 2 is:  Spare parts and car accessories
with a cat_2_gmv value of:  281181
for brand:  ตราเพชร Winning cat 2 is:  Car exterior accessories
with a cat_2_gmv value of:  22588
for brand:  SOLEX(โซเล็กซ์) Winning cat 2 is:  Car exterior accessories
with a cat_2_gmv value of:  71874
for brand:  NGK(เอ็น.จี.เค) Winning cat 2 is:  Spare parts and motorcycle accessories
with a cat_2_gmv value of:  144556
for brand:  Carryboy(แครี่บอย) Winning cat 2 is:  Car exterior accessories
with a cat_2_gmv value of:  69830
for brand

for brand:  Stamp (แสตมป์) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  13453
for brand:  Yokochan(โยโกะจัง) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  29860
for brand:  Memoe(เมโม่) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  15530
for brand:  CABEAU(คาโบ) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  16374
for brand:  JHC(เจเอชซี) Winning cat 2 is:  Bathroom
with a cat_2_gmv value of:  25449
for brand:  Supersorber Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  15411
for brand:  GREENLATEX by TMO(กรีนลาเท็กซ์บายทีเอ็มโอ) Winning cat 2 is:  Bedroom
with a cat_2_gmv value of:  14700
for brand:  Haenim(เฮนิม) Winning cat 2 is:  Baby goods
with a cat_2_gmv value of:  15900
for brand:  Bendix(เบ็นดิกซ์) Winning cat 2 is:  Spare parts and car accessories
with a cat_2_gmv value of:  154832
for brand:  Denso(เดนโซ) Winning cat 2 is:  Spare parts and car accessories
with a cat_2_gmv value of:  672064
for brand:  Kakaleer(คาคาเลียร์) Winning ca

for brand:  Campingmoon(แคมปิ้งมูน​) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  524810
for brand:  Miniwell(มินิเวล) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  10387
for brand:  Vidalido(วิดาลิโด) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  3035578
for brand:  Convoy(คอนวอย) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  126076
for brand:  K2(เคทู) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  2078945
for brand:  DIY HOME Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  15561
for brand:  PETZL(เพ็ทซ์) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  34940
for brand:  Fenix(ฟินิกส์) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  163564
for brand:  Go Grill Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  25810
for brand:  Nordisk(นอดิส) Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  304550
for 

for brand:  NAZA(นาซ่า) Winning cat 2 is:  Car care products
with a cat_2_gmv value of:  54135
for brand:  NANONIX(นาโนนิค) Winning cat 2 is:  Car care products
with a cat_2_gmv value of:  11816
for brand:  Tidy tot & Tray Kit(ไทดี้ ทอต บิบ แอนด์ ทรีย์ คิต) Winning cat 2 is:  Equipment for children
with a cat_2_gmv value of:  31188
for brand:  Prince&Princess(ปริ๊น แอนด์ ปริ๊นเซส) Winning cat 2 is:  Equipment for children
with a cat_2_gmv value of:  79010
for brand:  Casiko(คาซิโกะ) Winning cat 2 is:  Kitchen and dining room
with a cat_2_gmv value of:  861126
for brand:  Minimex(มินิมิกซ์) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  32552
for brand:  Up&Under(อัพแอนด์อันเดอร์) Winning cat 2 is:  Underwear
with a cat_2_gmv value of:  492680
for brand:  Fish4Dogs(ฟิชโฟร์ด็อกส์) Winning cat 2 is:  Dog
with a cat_2_gmv value of:  15849
for brand:  SmartHeart Gold Winning cat 2 is:  Dog
with a cat_2_gmv value of:  13440
for brand:  NOVOPEN(โนโวเพ็น) Winning cat 2

for brand:  Samylin(แซมมิลิน) Winning cat 2 is:  Dog
with a cat_2_gmv value of:  59780
for brand:  Vitamix Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  54400
for brand:  Keensen(คีนเซน) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  22000
for brand:  BOSCH(บอสช์) Winning cat 2 is:  Small kitchen appliances
with a cat_2_gmv value of:  135770
for brand:  Certainty(เซอร์เตนตี้) Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  46000
for brand:  Coleman Winning cat 2 is:  Camping and hiking
with a cat_2_gmv value of:  98600
for brand:  DENPA Winning cat 2 is:  Kitchen and dining room
with a cat_2_gmv value of:  79331
for brand:  Kitco Stainless(คิทโค่ สเตนเลส) Winning cat 2 is:  Kitchen and dining room
with a cat_2_gmv value of:  187470
for brand:  Kinto(คินโตะ) Winning cat 2 is:  Kitchen and dining room
with a cat_2_gmv value of:  87660
for brand:  Soma Winning cat 2 is:  Health equipment
with a cat_2_gmv value of:  22000
f

In [26]:
brands_df

Unnamed: 0_level_0,Filtered_Brand_GMV,Filtered_Average_Unit_Price,%_Filtered_GMV_from_Top_3_Products,Filtered_Weighted_Star_Rating,Filtered_Bad_Rating_Percent,Brand_GMV_All_Cats,Conversion_Rate_All_Cats_(total_gmv/view),Cat_2_Percent_of_Total_GMV,Brand_Category_2
Brand Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Genie Bra(จีนี่ บรา),114692,831.101449,0.650351,4.830977,0.004,200022.0,12.231517,1.000,Underwear
Bandai(บันได)​,279564,714.997442,0.311764,4.929363,0.001,1892788.0,5.939500,1.000,Game collectibles
BANDAI(บันได),278401,818.826471,0.196842,4.960065,0.000,366985.0,4.645027,1.000,Game collectibles
Bandai(บันได),183393,921.572864,0.762036,4.947261,0.002,590250.0,5.753597,0.441,Game collectibles
WELNESS(เวลเนส),119600,29900.000000,1.000000,4.642857,0.071,648900.0,91.769198,0.798,Health equipment
...,...,...,...,...,...,...,...,...,...
OK meter(โอเคเมเตอร์),44460,1852.500000,1.000000,4.853022,0.024,378534.0,33.617584,1.000,Health equipment
Mountain(เม้าเทน),65054,1586.682927,1.000000,4.784923,0.022,176294.0,43.810636,1.000,Office equipment
Osprey(ออสเปย์),17997,2249.625000,1.000000,4.761099,0.014,317296.0,64.675092,1.000,Camping and hiking
Denpa,139887,891.000000,1.000000,4.588477,0.021,172717.0,8.049448,0.810,Kitchen and dining room


# Calculate Additional Info on Recency

From the final list of brands we have, add info on recency ratio eg. percentage of total gmv coming from products established at least 3, 6, 12, 24 months. 

In [27]:
three_mo = 3 * 30 * 24 * 60 * 60
six_mo = 6 * 30 * 24 * 60 * 60
twelve_mo = 12 * 30 * 24 * 60 * 60
twenty_four_mo = 24 * 30 * 24 * 60 * 60

In [28]:
mask_3 = unix_time_now - products_static_untouched["ctime"] > three_mo
timed_df_3 = products_static_untouched[mask_3]

mask_6 = unix_time_now - products_static_untouched["ctime"] > six_mo
timed_df_6 = products_static_untouched[mask_6]

mask_12 = unix_time_now - products_static_untouched["ctime"] > twelve_mo
timed_df_12 = products_static_untouched[mask_12]

mask_24 = unix_time_now - products_static_untouched["ctime"] > twenty_four_mo
timed_df_24 = products_static_untouched[mask_24]

In [29]:
for brand in brands_df.index:
    #make a sub-df containing only rows with the correct brand
    my_brand_df = timed_df_3[timed_df_3["brand"] == brand]
    
    #gmv calculation
    brand_gmv_3 = my_brand_df["product_gmv"].sum()
    brands_df.loc[brand, "%Total GMV from products 3 months+"] = brand_gmv_3
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [30]:
for brand in brands_df.index:
    #make a sub-df containing only rows with the correct brand
    my_brand_df = timed_df_6[timed_df_6["brand"] == brand]
    
    #gmv calculation
    brand_gmv_6 = my_brand_df["product_gmv"].sum()
    brands_df.loc[brand, "%Total GMV from products 6 months+"] = brand_gmv_6

In [31]:
for brand in brands_df.index:
    #make a sub-df containing only rows with the correct brand
    my_brand_df = timed_df_12[timed_df_12["brand"] == brand]
    
    #gmv calculation
    brand_gmv_12 = my_brand_df["product_gmv"].sum()
    brands_df.loc[brand, "%Total GMV from products 12 months+"] = brand_gmv_12

In [32]:
for brand in brands_df.index:
    #make a sub-df containing only rows with the correct brand
    my_brand_df = timed_df_24[timed_df_24["brand"] == brand]
    
    #gmv calculation
    brand_gmv_24 = my_brand_df["product_gmv"].sum()
    brands_df.loc[brand, "%Total GMV from products 24 months+"] = brand_gmv_24

In [33]:
#Now we have baht gmv in the four recency columns, 
#just convert into ratio compared to total_gmv 

In [34]:
for brand in brands_df.index:
    percentage = brands_df.loc[brand, "%Total GMV from products 3 months+"] / brands_df.loc[brand, 
                                                                                   "Brand_GMV_All_Cats"]
    brands_df.loc[brand, "%Total GMV from products 3 months+"] = round(percentage, 3)

In [35]:
for brand in brands_df.index:
    percentage = brands_df.loc[brand, "%Total GMV from products 6 months+"] / brands_df.loc[brand, 
                                                                                   "Brand_GMV_All_Cats"]
    brands_df.loc[brand, "%Total GMV from products 6 months+"] = round(percentage, 3)

In [36]:
for brand in brands_df.index:
    percentage = brands_df.loc[brand, "%Total GMV from products 12 months+"] / brands_df.loc[brand, 
                                                                                   "Brand_GMV_All_Cats"]
    brands_df.loc[brand, "%Total GMV from products 12 months+"] = round(percentage, 3)

In [37]:
for brand in brands_df.index:
    percentage = brands_df.loc[brand, "%Total GMV from products 24 months+"] / brands_df.loc[brand, 
                                                                                   "Brand_GMV_All_Cats"]
    brands_df.loc[brand, "%Total GMV from products 24 months+"] = round(percentage, 3)

# Output: Brands that Meet Our Criterion

In [38]:
brands_df.sort_index(inplace=True)
brands_df

#NOTE: Anything before the Brand_GMV_All_Cats uses FILTERED product data 
#i.e. data where we've the products to count for by category, and time. 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Filtered_Brand_GMV,Filtered_Average_Unit_Price,%_Filtered_GMV_from_Top_3_Products,Filtered_Weighted_Star_Rating,Filtered_Bad_Rating_Percent,Brand_GMV_All_Cats,Conversion_Rate_All_Cats_(total_gmv/view),Cat_2_Percent_of_Total_GMV,Brand_Category_2,%Total GMV from products 3 months+,%Total GMV from products 6 months+,%Total GMV from products 12 months+,%Total GMV from products 24 months+
Brand Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3M,230981,302.727392,0.336253,4.768214,0.011,563616.0,6.032430,0.316,Car care products,0.999,0.977,0.932,0.724
ACCU-CHEK(แอคคิวเช็ค),672223,679.699697,0.677647,4.919764,0.002,1017173.0,38.517608,1.000,Health equipment,0.925,0.854,0.677,0.355
ACDelco(เอซีเดลโก้),16088,893.777778,0.876803,4.865786,0.003,333032.0,13.708970,0.909,Oil and liquid,0.964,0.797,0.468,0.358
AIKO(ไอโกะ),41810,459.450549,0.522865,4.782051,0.011,211626.0,6.813238,0.540,Gas stove,0.991,0.889,0.272,0.026
AJ(เอเจ),65799,715.206522,0.948571,4.570254,0.038,1573309.0,8.474780,0.302,Gas stove,0.829,0.751,0.645,0.257
...,...,...,...,...,...,...,...,...,...,...,...,...,...
sonar(โซนาร์),19993,714.035714,0.720502,4.711176,0.010,472764.0,12.167392,0.891,Refrigerator,1.000,0.996,0.970,0.020
xiaomi,52263,360.434483,1.000000,4.766024,0.017,697365.0,6.271381,0.252,Computer accessories,0.997,0.643,0.183,0.006
ดอกกรรณิการ์,357785,394.906181,0.891653,4.858209,0.005,359950.0,58.585612,1.000,Health equipment,1.000,1.000,1.000,0.000
ศรีตรังโกลฟส์(ศรีตรังโกลฟส์),47915,395.991736,1.000000,4.829681,0.006,366216.0,38.781743,0.715,Health equipment,0.969,0.920,0.237,0.237


In [39]:
#make to csv here if desired 
#brands_df.to_csv("test_3.csv")

In [45]:
brands_df.head(30)

Unnamed: 0_level_0,Filtered_Brand_GMV,Filtered_Average_Unit_Price,%_Filtered_GMV_from_Top_3_Products,Filtered_Weighted_Star_Rating,Filtered_Bad_Rating_Percent,Brand_GMV_All_Cats,Conversion_Rate_All_Cats_(total_gmv/view),Cat_2_Percent_of_Total_GMV,Brand_Category_2,%Total GMV from products 3 months+,%Total GMV from products 6 months+,%Total GMV from products 12 months+,%Total GMV from products 24 months+
Brand Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3M,230981,302.727392,0.336253,4.768214,0.011,563616.0,6.03243,0.316,Car care products,0.999,0.977,0.932,0.724
ACCU-CHEK(แอคคิวเช็ค),672223,679.699697,0.677647,4.919764,0.002,1017173.0,38.517608,1.0,Health equipment,0.925,0.854,0.677,0.355
ACDelco(เอซีเดลโก้),16088,893.777778,0.876803,4.865786,0.003,333032.0,13.70897,0.909,Oil and liquid,0.964,0.797,0.468,0.358
AIKO(ไอโกะ),41810,459.450549,0.522865,4.782051,0.011,211626.0,6.813238,0.54,Gas stove,0.991,0.889,0.272,0.026
AJ(เอเจ),65799,715.206522,0.948571,4.570254,0.038,1573309.0,8.47478,0.302,Gas stove,0.829,0.751,0.645,0.257
ASGUARD(แอสการ์ด),144130,2151.19403,0.861791,4.641208,0.015,167455.0,5.179715,0.999,Small kitchen appliances,0.965,0.965,0.861,0.816
Aiebao(ไอเบา),20300,580.0,1.0,4.73449,0.006,367786.0,11.611239,0.945,Baby Carrier - Cart - Car Seat,0.999,0.957,0.94,0.84
Aisin(ไอซิน),32358,475.852941,0.721614,4.831555,0.019,673999.0,12.721043,0.695,Oil and liquid,0.981,0.889,0.238,0.097
Alcon(ออลคอน),172220,318.336414,0.590611,4.933102,0.002,209279.0,24.049529,1.0,Health equipment,1.0,0.861,0.823,0.716
Amway(แอมเวย์),528051,2838.983871,0.90182,4.90119,0.004,9846252.0,34.594257,0.416,Health food supplements,0.798,0.577,0.265,0.085


# MISCELLANEOUS: Additional Work for Jan 8 Meeting

In [40]:
#Begin by importing data
products_static_raw = pd.read_csv("products_static_1607946056.csv")
products_static_raw["product_gmv"] = products_static_raw["price"] * products_static_raw["sold"] 

In [41]:
#Take Sheet Nakrin wants and add my column
info_1 = pd.read_csv("info_1_for_Jan_8.csv")

In [42]:
#Step 1: Convert Nakrin's given Cat 1's into Numbers as given by mapping
#Don't have to include thai. If really want to can always look up based
#on cat number
### 
category_ones = products_static_raw.loc[:, ("category_one_en", "category_one")]
category_ones.drop_duplicates(inplace=True)
cat_1_names = category_ones.sort_values(by="category_one_en")
### 
category_twos = products_static_raw.loc[:, ("category_two_en", "category_two")]
category_twos.drop_duplicates(inplace=True)
cat_2_names = category_twos.sort_values(by="category_two_en")
### 
category_threes = products_static_raw.loc[:, ("category_three_en", "category_three")]
category_threes.drop_duplicates(inplace=True)
cat_3_names = category_threes.sort_values(by="category_three_en")
'''
Call this if want a csv containing the most recent
"proper" cat names i.e. computer readable
cat_1_names.to_csv("cat_1_names.csv")
cat_2_names.to_csv("cat_2_names.csv")
cat_3_names.to_csv("cat_3_names.csv")
'''

#Manually delete rows we don't want from the cat list
#This will likely be a rough list of cats we want since
#we've eliminated any non attractive cats
cat_1_names.drop(cat_1_names.loc[cat_1_names["category_one"]==2085].index, 
                 inplace=True)
cat_1_names.dropna(inplace=True)

In [43]:
for cat_1 in info_1["Cat_1_computer"]:
    cat_1_mask = products_static_raw["category_one_en"] == cat_1
    cat_1_df = products_static_raw[cat_1_mask]
    cat_gmv = cat_1_df["product_gmv"].sum()
    info_1.loc[cat_1, "Annual GMV (Sales)"] = cat_gmv * 12
    

KeyError: 'Cat_1_computer'

In [None]:
#info_1.to_csv("info_1_additional_info.csv")

### Repeat Process for Cat 2

In [None]:
#Challenge is it's not only the cat 2 you care about 
#but which cat 1 that cat 2 is from. 

#i.e. need a way to pull both cat_1_computer_name eg. "Accessories"
# AND cat_2_computer_name eg. "Bracelet"

In [None]:
info_2 = pd.read_csv("info_2_for_Jan_8.csv")

In [None]:
info_2

In [None]:
#can do a manual solution i.e. specify cat 1 then apply a 
#second mask for cat 2 filter eg. 
store_1 = ['Accessories', 'Bag', "Men's fashion", "Men's Shoes",
         "Women's fashion", "Women's Shoes"]
store_2 = ['Bracelet',
         'Hair accessory',
         'Hat',
         'Scarves and shawls',
         'Backpack',
         'Bag accessories',
         'Brand name',
         'Cloth bag',
         'Earrings',
         'Shoulder bag',
         'Backpack',
         'Bracelet',
         'Health equipment',
         'Jeans',
         "Men's underwear",
         'Other',
         'Outerwear',
         'Shorts',
         'T-shirt',
         'Big head shoes',
         'Boots',
         'Computer accessories',
         'Gaming equipment',
         'Heel strap shoes',
         'Lace up shoes',
         'Lace-up sneakers',
         'Slippers',
         'Snacks',
         'Socks',
         'Backpack',
         'Belt',
         'Denim',
         'Dress',
         'Health equipment',
         'Jackets and Coats',
         'Jumpsuit',
         "Men's underwear",
         'Pants',
         'Personal belongings',
         'Plus size clothing',
         'Shirt',
         'Skirt',
         'Swimwear',
         'T-shirt',
         'Underwear',
         "Women's sportswear",
         'Backpack',
         'Big head shoes',
         'Boots',
         'Canvas shoes',
         'Casual shoes',
         'Flat shoes',
         'Gaming equipment',
         'Health equipment',
         'Heel strap shoes',
         'High heels',
         'Shoulder bag']

In [None]:
#START HERE: can do this manual process if want but 
#there should be a more elegant solution

In [None]:
#more elegant attempt

cat_1 = "Accessories"
mask1 = products_static_raw["category_one_en"] == cat_1

for cat_2 in ['Bracelet',
         'Hair accessory',
         'Hat',
         'Scarves and shawls']:
    
    mask2 = products_static_raw["category_two_en"] == cat_2
    
    cat_2_df = products_static_raw[mask1 & mask2]
    cat_2_gmv = cat_2_df["product_gmv"].sum()
    annual_gmv = cat_2_gmv * 12
    info_2.loc[cat_2, "Annual GMV (Sales)"] = annual_gmv

In [None]:
#more elegant attempt

cat_1 = "Bag"
mask1 = products_static_raw["category_one_en"] == cat_1

for cat_2 in ['Backpack',
         'Bag accessories',
         'Brand name',
         'Cloth bag',
         'Earrings',
         'Shoulder bag']:
    
    mask2 = products_static_raw["category_two_en"] == cat_2
    
    cat_2_df = products_static_raw[mask1 & mask2]
    cat_2_gmv = cat_2_df["product_gmv"].sum()
    annual_gmv = cat_2_gmv * 12
    info_2.loc[cat_2, "Annual GMV (Sales)"] = annual_gmv

In [None]:
#more elegant attempt

cat_1 = "Men's fashion"
mask1 = products_static_raw["category_one_en"] == cat_1

for cat_2 in ['Backpack',
         'Bracelet',
         'Health equipment',
         'Jeans',
         "Men's underwear",
         'Other',
         'Outerwear',
         'Shorts',
         'T-shirt']:
    
    mask2 = products_static_raw["category_two_en"] == cat_2
    
    cat_2_df = products_static_raw[mask1 & mask2]
    cat_2_gmv = cat_2_df["product_gmv"].sum()
    annual_gmv = cat_2_gmv * 12
    info_2.loc[cat_2, "Annual GMV (Sales)"] = annual_gmv

In [None]:
#more elegant attempt

cat_1 = "Men's Shoes"
mask1 = products_static_raw["category_one_en"] == cat_1

for cat_2 in ['Big head shoes',
         'Boots',
         'Computer accessories',
         'Gaming equipment',
         'Heel strap shoes',
         'Lace up shoes',
         'Lace-up sneakers',
         'Slippers',
         'Snacks',
         'Socks']:
    
    mask2 = products_static_raw["category_two_en"] == cat_2
    
    cat_2_df = products_static_raw[mask1 & mask2]
    cat_2_gmv = cat_2_df["product_gmv"].sum()
    annual_gmv = cat_2_gmv * 12
    info_2.loc[cat_2, "Annual GMV (Sales)"] = annual_gmv

In [None]:
#more elegant attempt

cat_1 = "Women's fashion"
mask1 = products_static_raw["category_one_en"] == cat_1

for cat_2 in ['Backpack',
         'Belt',
         'Denim',
         'Dress',
         'Health equipment',
         'Jackets and Coats',
         'Jumpsuit',
         "Men's underwear",
         'Pants',
         'Personal belongings',
         'Plus size clothing',
         'Shirt',
         'Skirt',
         'Swimwear',
         'T-shirt',
         'Underwear',
         "Women's sportswear"]:
    
    mask2 = products_static_raw["category_two_en"] == cat_2
    
    cat_2_df = products_static_raw[mask1 & mask2]
    cat_2_gmv = cat_2_df["product_gmv"].sum()
    annual_gmv = cat_2_gmv * 12
    info_2.loc[cat_2, "Annual GMV (Sales)"] = annual_gmv

In [None]:
#more elegant attempt

cat_1 = "Women's Shoes"
mask1 = products_static_raw["category_one_en"] == cat_1

for cat_2 in ['Backpack',
         'Big head shoes',
         'Boots',
         'Canvas shoes',
         'Casual shoes',
         'Flat shoes',
         'Gaming equipment',
         'Health equipment',
         'Heel strap shoes',
         'High heels',
         'Shoulder bag']:
    
    mask2 = products_static_raw["category_two_en"] == cat_2
    
    cat_2_df = products_static_raw[mask1 & mask2]
    cat_2_gmv = cat_2_df["product_gmv"].sum()
    annual_gmv = cat_2_gmv * 12
    info_2.loc[cat_2, "Annual GMV (Sales)"] = annual_gmv

In [None]:
info_2.tail(57)

In [None]:
#info_2.to_csv("info_2_additional_info.csv")

In [None]:
category_twos = products_static_raw.loc[:, ("category_two_en", "category_two")]
category_twos.drop_duplicates(inplace=True)
cat_2_names = category_twos.sort_values(by="category_two_en")

In [None]:
cat_2_names

In [None]:
#check the cat 2's we didn't get to after
#running through the whole thing

In [None]:
#Step 4: add Data to Sheet 3, sheet 5 on excel online

#Step 5: Do the trailing products ratio based on recency etc. 

#Step 6: plus reply to email

#address concern of brand gmv filter not working (it does, 
#see details) 

#and give this additional info

In [None]:
#Don't need to create a new csv for name mappings b/c the level 2 is all good already
'''
category_twos = products_static_raw.loc[:, ("category_one_en", "category_two_en", "category_two")]
category_twos.drop_duplicates(inplace=True)
cat_2_names = category_twos.sort_values(by="category_two_en")

cat_2_names.sort_values(by=["category_one_en", "category_two_en"], inplace=True)

cat_2_names.to_csv("cat_2_computer_names.csv")
'''

In [None]:
'''
To get category name info
models_static = pd.read_csv("models_static_data.csv")

mapping_1 = models_static[["category_one", "category_one_en", "category_one_th"]].drop_duplicates()
mapping_1 = mapping_1.set_index("category_one").dropna().sort_index()

mapping_2 = models_static[["category_two", "category_two_en", "category_two_th"]].drop_duplicates()
mapping_2 = mapping_2.set_index("category_two").dropna().sort_index()

mapping_3 = models_static[["category_three", "category_three_en", "category_three_th"]].drop_duplicates()
mapping_3 = mapping_3.set_index("category_three").dropna().sort_index()

mapping_1.to_csv("mapping_1.csv")
mapping_2.to_csv("mapping_2.csv")
mapping_3.to_csv("mapping_3.csv")

*NOTE: This gives a limited list b/c the models_static csv 
       was limited to just fashion cats last time (to make
       data transfer via harddrive easier). 
'''