In [48]:
import json
import pandas as pd
import numpy as np
from time import time
from glob import glob

In [49]:
# abs path -> 22-03-07/ *.json
best = glob('22-03-07/best*.json')
item = glob('22-03-07/item*.json')
products = glob('22-03-07/products*.json')
#category1 == category2 = True
category = glob('22-03-07/category1.json')

In [142]:
import pandas as pd
import json

class Preprocess:
    
    def __init__(self, best, item, products, category, style_ths=0.1):
        """
        style_ths : top_style_predictions 에서 범위 조절 (e.g. 0.1 이상으로 예측 점수를 받은 스타일만 받아옴 - maximum 3개)
        """
        self.style_ths = style_ths
        self.style_ths_name = str(style_ths)[-1]
        
        self.best = best
        self.item = item
        self.products = products
        self.category = category
        
        self.best_json = self.read_json(best)
        self.item_json = self.read_json(item)
        self.products_json = self.read_json(products).drop_duplicates('_id') # products json duplicated deleted
        self.category_json = self.read_json2(category).reset_index(drop=True)
        self.best_item, self.category,self.products_4, self.products_b = self.preprocess(self.best_json, 
                                                                           self.item_json, 
                                                                           self.products_json, 
                                                                           self.category_json)

    def read_json(self,json_file):
        df = pd.DataFrame()
        if json_file != self.products:  
            for file in json_file:
                x = pd.read_json(file)
                df = pd.concat([df, x])
        else:
            # why -> products json duplicated 
            for file in json_file:
                x = pd.read_json(file).T.reset_index(drop=True)    
                df = pd.concat([df, x])
        return df

    def read_json2(self,category):
        for i in category:
            with open(i) as js:
                json_data = json.load(js)
        cat_4, cat_b = pd.DataFrame(json_data['421B6D0E746C4E6D']), pd.DataFrame(json_data['B57D4F97C0E44A11'])
        category = pd.concat([cat_4, cat_b])
        return category
    
    def get_productId(self, items_list):
        new_list = []
        for item in items_list:
            new_list.append(item['productId'])
        return new_list
    
    # category['children'] -> name(label) 
    def find_category_df(self, category):
        df = pd.DataFrame()
        for i in category['children']:
            df2 = pd.DataFrame(i)
            df = pd.concat([df, df2])
        return df
        
    def preprocess(self, best, item, products, category):
        
        item.rename(columns={'enterpriseId':'enterprise_id', '_id':'id'}, inplace=True)
        templates = pd.merge(best, item, on=['enterprise_id', 'projectId', 'id'])
        
        # 2 enterprise id 
        ent2, ent1 = templates['enterprise_id'].unique().tolist()
        
        # get item_id 
        templates['items'] = templates['items'].apply(self.get_productId)
        
        # find_category_id_preprocess
    
        # edit new columns style_name, style_score > 0.1 
        templates[f'top_style_{self.style_ths_name}'] = templates['style_predictions'].apply(lambda x: sorted([(name, score) for name, score in x.items() if score > self.style_ths], key=lambda x: x[1], reverse=True)[:3])
        # del list, style score
        templates[f'top_style_{self.style_ths_name}'] = templates[f'top_style_{self.style_ths_name}'].apply(lambda x: [name for name, score in x])
        # Edit best_item['items'] = list(values) -> values
        items_stack = pd.DataFrame(templates['items'].apply(lambda x: pd.Series(x)).stack()).reset_index(1, drop=True) 
        products_df = pd.merge(templates[['enterprise_id', 
                                          f'top_style_{self.style_ths_name}', 
                                          'top_style', 
                                          'projectId', 
                                          'awesome_score']].reset_index(), 
                               items_stack.reset_index(), 
                               on='index').drop(['index'], axis=1).rename(columns = {0:'product_id'})
        prod_tags_df = products[['_id', 'tags', 'name', 'images', 'categories']]
        products_df = pd.merge(products_df, 
                               prod_tags_df, 
                               left_on='product_id', 
                               right_on='_id').drop(['_id'], axis=1)
        products_df = pd.merge(products_df, 
                               products_df['product_id'].value_counts().reset_index(),
                               left_on='product_id',
                               right_on='index').rename(columns = {'product_id_x': 'product_id',
                                                                   'product_id_y':'use_count'}).drop(['index'], axis=1)
        
        # category[categories] = list(values) -> values
        products_df['category_name'] = products_df['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)
        
        # find_category_id_preprocess
        cat_df = self.find_category_df(self.category_json) 
        cat_df2 = self.find_category_df(cat_df)
        self.products_json['category_name'] = self.products_json['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)
        # category_df['name_x'] = category['children']
        # category_df['name_y'] = category['children'][children]
        prod_a = pd.merge(self.products_json,
                          cat_df[['name', '_id', 'enterpriseId']],
                          left_on='category_name', right_on='_id', how='left').sort_values(by='_id_x')
        
        prod_b = pd.merge(self.products_json,
                          cat_df2[['name', '_id', 'enterpriseId']],
                          left_on='category_name', right_on='_id', how='left').sort_values(by='_id_x')
        
        category_df = pd.concat([prod_a.rename(columns={'name_y':'category_a'}),
                                 prod_b[['name_y']].rename(columns={'name_y':'category_b'})],axis=1)
        
        category_df = pd.merge(self.products_json[['_id', 'name', 'tags', 'dimensions', 'images']], 
                               category_df[['_id_x', 'category_a', 'category_b']], 
                               left_on ='_id', 
                               right_on = '_id_x').drop(['_id_x'], axis=1)
        
        products_df = pd.merge(products_df[['enterprise_id', 'projectId', 'top_style_1', 
                                            'top_style', 'awesome_score', 'product_id', 'use_count']], 
                               category_df, 
                               left_on='product_id', 
                               right_on='_id').drop(['_id'], axis=1)
        
        products_df_4 = products_df[products_df['enterprise_id'] == ent1].reset_index(drop=True)
        products_df_b = products_df[products_df['enterprise_id'] == ent2].reset_index(drop=True)
        
        return templates,category_df, products_df_4, products_df_b

In [143]:
x = Preprocess(best, item, products, category)

  items_stack = pd.DataFrame(templates['items'].apply(lambda x: pd.Series(x)).stack()).reset_index(1, drop=True)
  products_df['category_name'] = products_df['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)
  self.products_json['category_name'] = self.products_json['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)


In [144]:
x.products_4

Unnamed: 0,enterprise_id,top_style_1,top_style,projectId,awesome_score,product_id,tags_x,name_x,images_x,categories,...,_id,name_y,enterpriseId,tags_y,dimensions,images_y,_id_x,name_x.1,category_a,category_b
0,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X9IceC_16DC38C091A6439F,0.984499,6E3531E25BF343BE,[],투명 식물 액자 - 몬스테라 A3사이즈,[https://resources.archisketch.com/product/6E3...,[XuaCpKo3531C7175AC44FF3],...,6E3531E25BF343BE,투명 식물 액자 - 몬스테라 A3사이즈,B57D4F97C0E44A11,[],"{'width': 330, 'height': 449, 'depth': 100, 'u...",[https://resources.archisketch.com/product/6E3...,6E3531E25BF343BE,투명 식물 액자 - 몬스테라 A3사이즈,,벽/천장장식
1,421B6D0E746C4E6D,[MODERN],MODERN,X5WaEry9CEC543A4961492E,0.987451,XYW9sOc9CC9CD1261C74F5E,"[조명, 무드등/장식조명, 무드등, 무드, 브라운, 갈색, 무드등/수면등, 무드등,...",우리집 속 불멍 벽난로 무드등,[https://resources.archisketch.com/product/XYW...,[XrM7nN5EA9A10A8E476454A],...,XYW9sOc9CC9CD1261C74F5E,우리집 속 불멍 벽난로 무드등,B57D4F97C0E44A11,"[조명, 무드등/장식조명, 무드등, 무드, 브라운, 갈색, 무드등/수면등, 무드등,...","{'width': 320, 'height': 390, 'depth': 140, 'u...",[https://resources.archisketch.com/product/XYW...,XYW9sOc9CC9CD1261C74F5E,우리집 속 불멍 벽난로 무드등,무드등/장식조명,
2,421B6D0E746C4E6D,"[MODERN, SCANDINAVIAN]",MODERN,X3cjAHq297A348CEE5B4698,0.904893,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],...,9685F6318BD040E1,호텔식 화이트 시폰 커튼,B57D4F97C0E44A11,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]","{'width': 3010, 'height': 2331, 'depth': 288, ...",[https://resources.archisketch.com/product/968...,9685F6318BD040E1,호텔식 화이트 시폰 커튼,커튼,
3,421B6D0E746C4E6D,[MODERN],MODERN,X8HTtd3A494EC74B8EA45A3,0.956234,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],...,9685F6318BD040E1,호텔식 화이트 시폰 커튼,B57D4F97C0E44A11,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]","{'width': 3010, 'height': 2331, 'depth': 288, ...",[https://resources.archisketch.com/product/968...,9685F6318BD040E1,호텔식 화이트 시폰 커튼,커튼,
4,421B6D0E746C4E6D,[MODERN],MODERN,X4kGJoQA6F0C0CB39874510,0.902196,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],...,9685F6318BD040E1,호텔식 화이트 시폰 커튼,B57D4F97C0E44A11,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]","{'width': 3010, 'height': 2331, 'depth': 288, ...",[https://resources.archisketch.com/product/968...,9685F6318BD040E1,호텔식 화이트 시폰 커튼,커튼,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4769,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X4RwILN6A20B3B4241C4085,0.921340,XhoyNTc39A13BED86BD4F79,[],몬스테라_우드 스탠드화분(중),[https://resources.archisketch.com/product/Xho...,[35639963DFE14DAE],...,XhoyNTc39A13BED86BD4F79,몬스테라_우드 스탠드화분(중),421B6D0E746C4E6D,[],"{'width': 451, 'height': 691, 'depth': 458, 'u...",[https://resources.archisketch.com/product/Xho...,XhoyNTc39A13BED86BD4F79,몬스테라_우드 스탠드화분(중),식탁,
4770,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X4RwILN6A20B3B4241C4085,0.921340,XoeB5AN55133C37EFF84438,"[모스카토, Moscato, 스파클링, 스파클링와인]",모스카토 스위트 스파클링,[https://resources.archisketch.com/product/Xoe...,[Xoxn39o2E16F00AB2024ED3],...,XoeB5AN55133C37EFF84438,모스카토 스위트 스파클링,421B6D0E746C4E6D,"[모스카토, Moscato, 스파클링, 스파클링와인]","{'width': 85, 'height': 334, 'depth': 85, 'uni...",[https://resources.archisketch.com/product/Xoe...,XoeB5AN55133C37EFF84438,모스카토 스위트 스파클링,홈바/아일랜드식탁,
4771,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X4RwILN6A20B3B4241C4085,0.921340,XoTRw3A83567968A2B74D99,[],와인잔,[https://resources.archisketch.com/product/XoT...,[Xoxn39o2E16F00AB2024ED3],...,XoTRw3A83567968A2B74D99,와인잔,421B6D0E746C4E6D,[],"{'width': 87, 'height': 232, 'depth': 87, 'uni...",[https://resources.archisketch.com/product/XoT...,XoTRw3A83567968A2B74D99,와인잔,,화병/화분
4772,421B6D0E746C4E6D,"[MODERN, NATURAL, MID_CENTURY_MODERN]",MODERN,X4RwILN6A20B3B4241C4085,0.968451,XYizLmP9FA925D538DA4079,"[침대, 베드, 일반형침대, 일반형, 일반침대, 브라운, 갈색, 원목, 내추럴, 나...",마리카 아카시아 원목침대 2colors (SS) (내추럴),[https://resources.archisketch.com/product/XYi...,[XrMAoqvC726ACFACD674C33],...,XYizLmP9FA925D538DA4079,마리카 아카시아 원목침대 2colors (SS) (내추럴),B57D4F97C0E44A11,"[침대, 베드, 일반형침대, 일반형, 일반침대, 브라운, 갈색, 원목, 내추럴, 나...","{'width': 1235, 'height': 990, 'depth': 2150, ...",[https://resources.archisketch.com/product/XYi...,XYizLmP9FA925D538DA4079,마리카 아카시아 원목침대 2colors (SS) (내추럴),Candle & Diffuser,


In [128]:
len(x.products_4)

4774

In [121]:
xy = x.category
xy = xy.set_index('_id')

In [122]:
yy = x.products_4[['enterprise_id', 'top_style_1', 'top_style', 'projectId',
       'awesome_score', 'product_id', 'tags_x', 'name_x', 'images_x',
       'categories', 'use_count', 'category_name', '_id']]

yy = yy.set_index('category_name')

In [125]:
yy

Unnamed: 0_level_0,enterprise_id,top_style_1,top_style,projectId,awesome_score,product_id,tags_x,name_x,name_x,images_x,categories,use_count,_id
category_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
XuaCpKo3531C7175AC44FF3,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X9IceC_16DC38C091A6439F,0.984499,6E3531E25BF343BE,[],투명 식물 액자 - 몬스테라 A3사이즈,,[https://resources.archisketch.com/product/6E3...,[XuaCpKo3531C7175AC44FF3],33,
XrM7nN5EA9A10A8E476454A,421B6D0E746C4E6D,[MODERN],MODERN,X5WaEry9CEC543A4961492E,0.987451,XYW9sOc9CC9CD1261C74F5E,"[조명, 무드등/장식조명, 무드등, 무드, 브라운, 갈색, 무드등/수면등, 무드등,...",우리집 속 불멍 벽난로 무드등,,[https://resources.archisketch.com/product/XYW...,[XrM7nN5EA9A10A8E476454A],31,
XrMYTHY9CB40FCFDDC14991,421B6D0E746C4E6D,"[MODERN, SCANDINAVIAN]",MODERN,X3cjAHq297A348CEE5B4698,0.904893,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,
XrMYTHY9CB40FCFDDC14991,421B6D0E746C4E6D,[MODERN],MODERN,X8HTtd3A494EC74B8EA45A3,0.956234,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,
XrMYTHY9CB40FCFDDC14991,421B6D0E746C4E6D,[MODERN],MODERN,X4kGJoQA6F0C0CB39874510,0.902196,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35639963DFE14DAE,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X4RwILN6A20B3B4241C4085,0.921340,XhoyNTc39A13BED86BD4F79,[],몬스테라_우드 스탠드화분(중),,[https://resources.archisketch.com/product/Xho...,[35639963DFE14DAE],1,
Xoxn39o2E16F00AB2024ED3,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X4RwILN6A20B3B4241C4085,0.921340,XoeB5AN55133C37EFF84438,"[모스카토, Moscato, 스파클링, 스파클링와인]",모스카토 스위트 스파클링,,[https://resources.archisketch.com/product/Xoe...,[Xoxn39o2E16F00AB2024ED3],1,
Xoxn39o2E16F00AB2024ED3,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X4RwILN6A20B3B4241C4085,0.921340,XoTRw3A83567968A2B74D99,[],와인잔,,[https://resources.archisketch.com/product/XoT...,[Xoxn39o2E16F00AB2024ED3],1,
XrMAoqvC726ACFACD674C33,421B6D0E746C4E6D,"[MODERN, NATURAL, MID_CENTURY_MODERN]",MODERN,X4RwILN6A20B3B4241C4085,0.968451,XYizLmP9FA925D538DA4079,"[침대, 베드, 일반형침대, 일반형, 일반침대, 브라운, 갈색, 원목, 내추럴, 나...",마리카 아카시아 원목침대 2colors (SS) (내추럴),,[https://resources.archisketch.com/product/XYi...,[XrMAoqvC726ACFACD674C33],1,


In [59]:
def find_category_df(category):
    df = pd.DataFrame()
    for i in category['children']:
        df2 = pd.DataFrame(i)
        df = pd.concat([df, df2])
    return df

df = find_category_df(x.category_json)
df2 = find_category_df(df).drop('parentId', axis=1).rename(columns={'grandparentId':'parentId'})
# dd = pd.concat([df, df2], ignore_index=True)
# dd = pd.merge(x.category[['name', '_id']], dd, left_on='_id', right_on='parentId').rename(columns={'_id_y' : '_id'}).drop(columns=['_id_x'])

In [60]:
df

Unnamed: 0,_id,name,enterpriseId,children,image,parentId,createdAt,updatedAt
0,1633637E83634380,Doors,421B6D0E746C4E6D,"[{'_id': '83D507B20A56479F', 'name': 'Swing Do...",https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T04:57:38.038Z,2021-06-25T02:37:33.522Z
1,B8A92E293E4E4047,Windows,421B6D0E746C4E6D,"[{'_id': '7D73016B30CC43F9', 'name': 'Swing Wi...",https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T04:58:18.274Z,2021-03-29T05:44:06.625Z
2,D70A7F6A83794317,Other Materials,421B6D0E746C4E6D,"[{'_id': 'AF6BCC66E8014F7B', 'name': 'Columns'...",https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T05:13:58.091Z,2021-06-25T02:36:56.262Z
0,7938684E15E143D6,Refrigerators,421B6D0E746C4E6D,"[{'_id': 'DCAAC2FE131344DD', 'name': 'Refriger...",https://resources.archisketch.com/enterprises/...,C5056ABD01F84CB8,2020-01-23T01:06:28.326Z,2021-01-27T04:45:25.209Z
1,97E89A79A2D549D6,Air Conditioners,421B6D0E746C4E6D,"[{'_id': '72DAF61594364EC2', 'name': 'Floor A/...",https://resources.archisketch.com/enterprises/...,C5056ABD01F84CB8,2020-01-23T01:06:28.326Z,2020-07-16T05:17:33.508Z
...,...,...,...,...,...,...,...,...
0,XrLtsdV9371C959478A4D04,유아동침대,B57D4F97C0E44A11,"[{'_id': 'Xti4gwm06CEE7D9C67F4070', 'name': 'N...",https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-07-22T00:56:31.317Z,2021-09-06T06:54:58.739Z
1,XrLtuH-7E461B9CB6CB464F,유아동소파/의자,B57D4F97C0E44A11,"[{'_id': 'Xti4ivw212B745EAB014041', 'name': 'N...",https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-07-22T00:56:38.142Z,2021-09-06T06:55:22.896Z
2,XrLtvzp2501BDC3011741BC,유아동수납,B57D4F97C0E44A11,"[{'_id': 'Xti4j2E1809CA54B3914C22', 'name': 'N...",https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-07-22T00:56:45.033Z,2021-09-06T06:55:35.101Z
3,XrLtxCDAEC74F8C38D6434B,유아동책상/테이블,B57D4F97C0E44A11,"[{'_id': 'Xti4lBBCE5460A7866A46AD', 'name': 'N...",https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-07-22T00:56:50.051Z,2021-09-06T06:55:50.271Z


In [61]:
df2

Unnamed: 0,_id,name,enterpriseId,image,parentId,createdAt,updatedAt
0,83D507B20A56479F,Swing Doors,421B6D0E746C4E6D,https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T05:00:37.293Z,2020-02-04T08:21:41.079Z
1,AE5BFBA2DE9F439B,Sliding Doors,421B6D0E746C4E6D,https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T05:01:46.053Z,2020-02-04T08:22:01.978Z
2,FA0D229869E34AEF,Folding Doors,421B6D0E746C4E6D,https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T05:07:15.234Z,2020-02-04T08:22:25.474Z
3,4F088FF30DF04594,Garage Doors,421B6D0E746C4E6D,https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T05:11:12.445Z,2021-06-25T02:37:33.522Z
0,7D73016B30CC43F9,Swing Windows,421B6D0E746C4E6D,https://resources.archisketch.com/enterprises/...,D0618DD3D2AD4ACF,2020-01-28T05:11:59.987Z,2020-02-04T08:23:15.345Z
...,...,...,...,...,...,...,...
0,Xti4gwm06CEE7D9C67F4070,NEW,B57D4F97C0E44A11,https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-08-20T09:26:26.598Z,2021-09-06T06:54:58.739Z
0,Xti4ivw212B745EAB014041,NEW,B57D4F97C0E44A11,https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-08-20T09:26:34.736Z,2021-09-06T06:55:22.896Z
0,Xti4j2E1809CA54B3914C22,NEW,B57D4F97C0E44A11,https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-08-20T09:26:39.236Z,2021-09-06T06:55:35.101Z
0,Xti4lBBCE5460A7866A46AD,NEW,B57D4F97C0E44A11,https://resources.archisketch.com/enterprises/...,XrLszCv9134EC572B724DF2,2021-08-20T09:26:44.033Z,2021-09-06T06:55:50.271Z


In [62]:
x.products_json['category_name'] = x.products_json['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)
# categories_name.apply(find_category)

  x.products_json['category_name'] = x.products_json['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)


In [66]:
prod_a = pd.merge(x.products_json[['_id', 'name', 'category_name']], 
         df[['name', '_id', 'enterpriseId']], 
         left_on='category_name', right_on='_id', how='left')

prod_b = pd.merge(x.products_json[['_id', 'name', 'category_name']], 
         df2[['name', '_id', 'enterpriseId']], 
         left_on='category_name', right_on='_id', how='left').sort_values(by='_id_x')


In [64]:
prod_a.head()

Unnamed: 0,_id_x,name_x,category_name,name_y,_id_y,enterpriseId
0,X5rPclT9CFB7E73DEA8495A,4F_TV,X5sOfKR8EE6E3FB89134354,,,
1,Xo7kQN3DC7A5B7C9FE746D6,스마트 라인 LED (화이트)_on,Xo20nqI91CAA7647AA149EF,LED Ceiling Lightings,Xo20nqI91CAA7647AA149EF,421B6D0E746C4E6D
2,XtxfYC1EF0F9090248B4518,브로 사계절 인테리어 러그 S,10D4B42437844E42,Rugs,10D4B42437844E42,421B6D0E746C4E6D
3,XSWNaDQ8D34AB5317CA4971,맥북프로 13형,1348D5C358224276,,,
4,XT21RHb69DF906D42DF4284,원터치 와이드 화이트 휴지통 10L,XrNAUjE123731FDC5D847C0,생활,XrNAUjE123731FDC5D847C0,B57D4F97C0E44A11


In [67]:
prod_b.head()

Unnamed: 0,_id_x,name_x,category_name,name_y,_id_y,enterpriseId
490,002E454FDD8D4D01,호텔식 더뷰 암막커튼 9colors (창문형/긴창형) (마블그레이),XrMYTHY9CB40FCFDDC14991,,,
20,0033BB725EF54736,투명 식물 액자 - 야자 A3사이즈,XuaCpKo3531C7175AC44FF3,벽/천장장식,XuaCpKo3531C7175AC44FF3,B57D4F97C0E44A11
138,009215254E844EE9,밤부 티슈 케이스 106,XuaCriaC3DD13E894674BEF,인테리어소품,XuaCriaC3DD13E894674BEF,B57D4F97C0E44A11
6663,00E53B958EA24EBF,위더스 컴퓨터책상 1260 2colors (티/화),XrM7nN5EA9A10A8E476454A,,,
239,00F723402D634E1F,선데이 러그 특대형 (그레이),XrMYNApF08F061085254CB7,,,


In [16]:
category = pd.concat([prod_a.rename(columns={'name_y':'category_a'}), 
           prod_b[['name_y']].rename(columns={'name_y':'category_b'})], axis=1)

In [17]:
category

Unnamed: 0,_id_x,name_x,category_name,category_a,_id_y,enterpriseId,category_b
0,X5rPclT9CFB7E73DEA8495A,4F_TV,X5sOfKR8EE6E3FB89134354,,,,
1,Xo7kQN3DC7A5B7C9FE746D6,스마트 라인 LED (화이트)_on,Xo20nqI91CAA7647AA149EF,LED Ceiling Lightings,Xo20nqI91CAA7647AA149EF,421B6D0E746C4E6D,
2,XtxfYC1EF0F9090248B4518,브로 사계절 인테리어 러그 S,10D4B42437844E42,Rugs,10D4B42437844E42,421B6D0E746C4E6D,
3,XSWNaDQ8D34AB5317CA4971,맥북프로 13형,1348D5C358224276,,,,Laptops & Portables
4,XT21RHb69DF906D42DF4284,원터치 와이드 화이트 휴지통 10L,XrNAUjE123731FDC5D847C0,생활,XrNAUjE123731FDC5D847C0,B57D4F97C0E44A11,
...,...,...,...,...,...,...,...
9510,XdGlxwz9246FC25CD404DE4,비올렛 디자인체어 2colors (월넛브라운),XrLsP5FB1A8CBA8FF744780,식탁,XrLsP5FB1A8CBA8FF744780,B57D4F97C0E44A11,
9511,XvJtJWPA51ACB9F262E4019,모뜨 3단 서랍장 (화이트),XrLs2vMD1BC5D9B32664A60,일반의자,XrLs2vMD1BC5D9B32664A60,B57D4F97C0E44A11,
9512,XhJp_eJBCFF598127BE4AD8,사무용/컴퓨터 메쉬의자 801 화이트프레임 HEAD(포켓스프링 방석) (그레이),XrLrsOTDB80B92F9B9D440E,서랍장,XrLrsOTDB80B92F9B9D440E,B57D4F97C0E44A11,
9513,Xz-wAFDFF8B0AABAE3B444D,프리미엄 블루투스 홈오디오 모델 XL Model XL (화이트),XrLs4Lm51A52E3EE5094BE3,학생/오피스의자,XrLs4Lm51A52E3EE5094BE3,B57D4F97C0E44A11,


In [68]:
category.head()

Unnamed: 0,_id_x,name_x,category_name,category_a,_id_y,enterpriseId,category_b
0,X5rPclT9CFB7E73DEA8495A,4F_TV,X5sOfKR8EE6E3FB89134354,,,,
1,Xo7kQN3DC7A5B7C9FE746D6,스마트 라인 LED (화이트)_on,Xo20nqI91CAA7647AA149EF,LED Ceiling Lightings,Xo20nqI91CAA7647AA149EF,421B6D0E746C4E6D,
2,XtxfYC1EF0F9090248B4518,브로 사계절 인테리어 러그 S,10D4B42437844E42,Rugs,10D4B42437844E42,421B6D0E746C4E6D,
3,XSWNaDQ8D34AB5317CA4971,맥북프로 13형,1348D5C358224276,,,,Laptops & Portables
4,XT21RHb69DF906D42DF4284,원터치 와이드 화이트 휴지통 10L,XrNAUjE123731FDC5D847C0,생활,XrNAUjE123731FDC5D847C0,B57D4F97C0E44A11,


In [18]:
prod = x.products_json

In [27]:
prod.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9515 entries, 0 to 9513
Data columns (total 40 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   _id               9515 non-null   object
 1   name              9515 non-null   object
 2   modelUrl          605 non-null    object
 3   enterpriseId      9515 non-null   object
 4   userId            5886 non-null   object
 5   categories        9515 non-null   object
 6   archiCategories   4825 non-null   object
 7   description       4129 non-null   object
 8   tags              9515 non-null   object
 9   brand             9206 non-null   object
 10  sku               7827 non-null   object
 11  alias             7906 non-null   object
 12  legacyName        343 non-null    object
 13  type              9515 non-null   object
 14  productType       9426 non-null   object
 15  dimensions        9515 non-null   object
 16  website           8446 non-null   object
 17  previewImage  

In [None]:
_id, name, enterprisedId, tags, dimensions, images

In [30]:
x = pd.merge(prod[['_id', 'name', 'enterpriseId', 'tags', 'dimensions', 'images']], category[['_id_x', 'name_x', 'category_a', 'enterpriseId', 'category_b']], left_on ='_id', right_on = '_id_x')

Unnamed: 0,_id,name,enterpriseId_x,tags,dimensions,images,_id_x,name_x,category_a,enterpriseId_y,category_b
0,X5rPclT9CFB7E73DEA8495A,4F_TV,15BEED4FED5A48FD,[],"{'width': 4200, 'height': 2800, 'depth': 40, '...",[https://resources.archisketch.com/product/X5r...,X5rPclT9CFB7E73DEA8495A,4F_TV,,,
1,Xo7kQN3DC7A5B7C9FE746D6,스마트 라인 LED (화이트)_on,421B6D0E746C4E6D,"[헤이홈, hejhome, 조명, 라인등, 라인LED]","{'width': 2000, 'height': 10, 'depth': 5, 'uni...",[https://resources.archisketch.com/product/Xo7...,Xo7kQN3DC7A5B7C9FE746D6,스마트 라인 LED (화이트)_on,LED Ceiling Lightings,421B6D0E746C4E6D,
2,XtxfYC1EF0F9090248B4518,브로 사계절 인테리어 러그 S,421B6D0E746C4E6D,"[데이드리머, daydreamer, 러그, 사각러그]","{'width': 2000, 'height': 10, 'depth': 1400, '...",[https://resources.archisketch.com/product/Xtx...,XtxfYC1EF0F9090248B4518,브로 사계절 인테리어 러그 S,Rugs,421B6D0E746C4E6D,
3,XSWNaDQ8D34AB5317CA4971,맥북프로 13형,421B6D0E746C4E6D,"[애플, Apple, 맥북, 노트북, 사무용, 사뭉제품, 가전제품]","{'width': 310, 'height': 220, 'depth': 230, 'u...",[https://resources.archisketch.com/product/XSW...,XSWNaDQ8D34AB5317CA4971,맥북프로 13형,,,Laptops & Portables
4,XT21RHb69DF906D42DF4284,원터치 와이드 화이트 휴지통 10L,B57D4F97C0E44A11,"[생활, 생활용품, 휴지통/분리수거함, 휴지통, 분리수거, 분리수거함, 쓰레기통, ...","{'width': 345, 'height': 299, 'depth': 160, 'u...",[https://resources.archisketch.com/product/XT2...,XT21RHb69DF906D42DF4284,원터치 와이드 화이트 휴지통 10L,생활,B57D4F97C0E44A11,
...,...,...,...,...,...,...,...,...,...,...,...
9510,XdGlxwz9246FC25CD404DE4,비올렛 디자인체어 2colors (월넛브라운),B57D4F97C0E44A11,"[일반의자, 식탁의자, 인테리어의자, 주방의자, 블랙, 검정, 검은, 검은색, 인조...","{'width': 514, 'height': 780, 'depth': 478, 'u...",[https://resources.archisketch.com/product/XdG...,XdGlxwz9246FC25CD404DE4,비올렛 디자인체어 2colors (월넛브라운),식탁,B57D4F97C0E44A11,
9511,XvJtJWPA51ACB9F262E4019,모뜨 3단 서랍장 (화이트),B57D4F97C0E44A11,[리바트],"{'width': 792, 'height': 745, 'depth': 500, 'u...",[https://resources.archisketch.com/product/XvJ...,XvJtJWPA51ACB9F262E4019,모뜨 3단 서랍장 (화이트),일반의자,B57D4F97C0E44A11,
9512,XhJp_eJBCFF598127BE4AD8,사무용/컴퓨터 메쉬의자 801 화이트프레임 HEAD(포켓스프링 방석) (그레이),B57D4F97C0E44A11,"[학생의자, 책상의자, 컴퓨터의자, 오피스의자, 화이트, 흰색, 입식, 의자왕, 사...","{'width': 590, 'height': 1080, 'depth': 570, '...",[https://resources.archisketch.com/product/XhJ...,XhJp_eJBCFF598127BE4AD8,사무용/컴퓨터 메쉬의자 801 화이트프레임 HEAD(포켓스프링 방석) (그레이),서랍장,B57D4F97C0E44A11,
9513,Xz-wAFDFF8B0AABAE3B444D,프리미엄 블루투스 홈오디오 모델 XL Model XL (화이트),B57D4F97C0E44A11,[제네바사운드],"{'width': 550, 'height': 613, 'depth': 370, 'u...",[https://resources.archisketch.com/product/Xz-...,Xz-wAFDFF8B0AABAE3B444D,프리미엄 블루투스 홈오디오 모델 XL Model XL (화이트),학생/오피스의자,B57D4F97C0E44A11,


In [31]:
x

Unnamed: 0,_id,name,enterpriseId_x,tags,dimensions,images,_id_x,name_x,category_a,enterpriseId_y,category_b
0,X5rPclT9CFB7E73DEA8495A,4F_TV,15BEED4FED5A48FD,[],"{'width': 4200, 'height': 2800, 'depth': 40, '...",[https://resources.archisketch.com/product/X5r...,X5rPclT9CFB7E73DEA8495A,4F_TV,,,
1,Xo7kQN3DC7A5B7C9FE746D6,스마트 라인 LED (화이트)_on,421B6D0E746C4E6D,"[헤이홈, hejhome, 조명, 라인등, 라인LED]","{'width': 2000, 'height': 10, 'depth': 5, 'uni...",[https://resources.archisketch.com/product/Xo7...,Xo7kQN3DC7A5B7C9FE746D6,스마트 라인 LED (화이트)_on,LED Ceiling Lightings,421B6D0E746C4E6D,
2,XtxfYC1EF0F9090248B4518,브로 사계절 인테리어 러그 S,421B6D0E746C4E6D,"[데이드리머, daydreamer, 러그, 사각러그]","{'width': 2000, 'height': 10, 'depth': 1400, '...",[https://resources.archisketch.com/product/Xtx...,XtxfYC1EF0F9090248B4518,브로 사계절 인테리어 러그 S,Rugs,421B6D0E746C4E6D,
3,XSWNaDQ8D34AB5317CA4971,맥북프로 13형,421B6D0E746C4E6D,"[애플, Apple, 맥북, 노트북, 사무용, 사뭉제품, 가전제품]","{'width': 310, 'height': 220, 'depth': 230, 'u...",[https://resources.archisketch.com/product/XSW...,XSWNaDQ8D34AB5317CA4971,맥북프로 13형,,,Laptops & Portables
4,XT21RHb69DF906D42DF4284,원터치 와이드 화이트 휴지통 10L,B57D4F97C0E44A11,"[생활, 생활용품, 휴지통/분리수거함, 휴지통, 분리수거, 분리수거함, 쓰레기통, ...","{'width': 345, 'height': 299, 'depth': 160, 'u...",[https://resources.archisketch.com/product/XT2...,XT21RHb69DF906D42DF4284,원터치 와이드 화이트 휴지통 10L,생활,B57D4F97C0E44A11,
...,...,...,...,...,...,...,...,...,...,...,...
9510,XdGlxwz9246FC25CD404DE4,비올렛 디자인체어 2colors (월넛브라운),B57D4F97C0E44A11,"[일반의자, 식탁의자, 인테리어의자, 주방의자, 블랙, 검정, 검은, 검은색, 인조...","{'width': 514, 'height': 780, 'depth': 478, 'u...",[https://resources.archisketch.com/product/XdG...,XdGlxwz9246FC25CD404DE4,비올렛 디자인체어 2colors (월넛브라운),식탁,B57D4F97C0E44A11,
9511,XvJtJWPA51ACB9F262E4019,모뜨 3단 서랍장 (화이트),B57D4F97C0E44A11,[리바트],"{'width': 792, 'height': 745, 'depth': 500, 'u...",[https://resources.archisketch.com/product/XvJ...,XvJtJWPA51ACB9F262E4019,모뜨 3단 서랍장 (화이트),일반의자,B57D4F97C0E44A11,
9512,XhJp_eJBCFF598127BE4AD8,사무용/컴퓨터 메쉬의자 801 화이트프레임 HEAD(포켓스프링 방석) (그레이),B57D4F97C0E44A11,"[학생의자, 책상의자, 컴퓨터의자, 오피스의자, 화이트, 흰색, 입식, 의자왕, 사...","{'width': 590, 'height': 1080, 'depth': 570, '...",[https://resources.archisketch.com/product/XhJ...,XhJp_eJBCFF598127BE4AD8,사무용/컴퓨터 메쉬의자 801 화이트프레임 HEAD(포켓스프링 방석) (그레이),서랍장,B57D4F97C0E44A11,
9513,Xz-wAFDFF8B0AABAE3B444D,프리미엄 블루투스 홈오디오 모델 XL Model XL (화이트),B57D4F97C0E44A11,[제네바사운드],"{'width': 550, 'height': 613, 'depth': 370, 'u...",[https://resources.archisketch.com/product/Xz-...,Xz-wAFDFF8B0AABAE3B444D,프리미엄 블루투스 홈오디오 모델 XL Model XL (화이트),학생/오피스의자,B57D4F97C0E44A11,


In [47]:
x.products_4.head(10)

Unnamed: 0,enterprise_id,top_style_1,top_style,projectId,awesome_score,product_id,tags,name,images,categories,use_count,category_name,_id,name_x,name_y
0,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X9IceC_16DC38C091A6439F,0.984499,6E3531E25BF343BE,[],투명 식물 액자 - 몬스테라 A3사이즈,[https://resources.archisketch.com/product/6E3...,[XuaCpKo3531C7175AC44FF3],33,XuaCpKo3531C7175AC44FF3,,,
1,421B6D0E746C4E6D,[MODERN],MODERN,X5WaEry9CEC543A4961492E,0.987451,XYW9sOc9CC9CD1261C74F5E,"[조명, 무드등/장식조명, 무드등, 무드, 브라운, 갈색, 무드등/수면등, 무드등,...",우리집 속 불멍 벽난로 무드등,[https://resources.archisketch.com/product/XYW...,[XrM7nN5EA9A10A8E476454A],31,XrM7nN5EA9A10A8E476454A,XrM7nN5EA9A10A8E476454A,조명,무드등/장식조명
2,421B6D0E746C4E6D,"[MODERN, SCANDINAVIAN]",MODERN,X3cjAHq297A348CEE5B4698,0.904893,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,XrMYTHY9CB40FCFDDC14991,XrMYTHY9CB40FCFDDC14991,패브릭,커튼
3,421B6D0E746C4E6D,[MODERN],MODERN,X8HTtd3A494EC74B8EA45A3,0.956234,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,XrMYTHY9CB40FCFDDC14991,XrMYTHY9CB40FCFDDC14991,패브릭,커튼
4,421B6D0E746C4E6D,[MODERN],MODERN,X4kGJoQA6F0C0CB39874510,0.902196,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,XrMYTHY9CB40FCFDDC14991,XrMYTHY9CB40FCFDDC14991,패브릭,커튼
5,421B6D0E746C4E6D,[MODERN],MODERN,X4kGJoQA6F0C0CB39874510,0.877599,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,XrMYTHY9CB40FCFDDC14991,XrMYTHY9CB40FCFDDC14991,패브릭,커튼
6,421B6D0E746C4E6D,[MODERN],MODERN,X4LFItPB5B85DFF6CFE4EFE,0.963457,9685F6318BD040E1,"[커튼, 데코뷰, 호텔식, 화이트, 시폰]",호텔식 화이트 시폰 커튼,[https://resources.archisketch.com/product/968...,[XrMYTHY9CB40FCFDDC14991],460,XrMYTHY9CB40FCFDDC14991,XrMYTHY9CB40FCFDDC14991,패브릭,커튼
7,421B6D0E746C4E6D,"[MODERN, NATURAL]",MODERN,X9IceC_16DC38C091A6439F,0.984499,2F173085E1BC4D5A,"[플라워/식물, 플라워, 식물, 플랜트, 플랜테리어, 플렌트, 식물데코, 식물인테리...",코코넛 마크라메 행잉플랜트 (아이비),[https://resources.archisketch.com/product/2F1...,[XuaE2XS5FD2BDA113084B15],85,XuaE2XS5FD2BDA113084B15,,,
8,421B6D0E746C4E6D,[NATURAL],NATURAL,X8GARgr482B5458C0D64C80,0.966516,2F173085E1BC4D5A,"[플라워/식물, 플라워, 식물, 플랜트, 플랜테리어, 플렌트, 식물데코, 식물인테리...",코코넛 마크라메 행잉플랜트 (아이비),[https://resources.archisketch.com/product/2F1...,[XuaE2XS5FD2BDA113084B15],85,XuaE2XS5FD2BDA113084B15,,,
9,421B6D0E746C4E6D,[NATURAL],NATURAL,XXQC1c784250E851FC64BF5,0.874387,2F173085E1BC4D5A,"[플라워/식물, 플라워, 식물, 플랜트, 플랜테리어, 플렌트, 식물데코, 식물인테리...",코코넛 마크라메 행잉플랜트 (아이비),[https://resources.archisketch.com/product/2F1...,[XuaE2XS5FD2BDA113084B15],85,XuaE2XS5FD2BDA113084B15,,,


In [None]:
import pandas as pd
import json

class Preprocess:
    
    def __init__(self, best, item, products, category, style_ths=0.1):
        """
        style_ths : top_style_predictions 에서 범위 조절 (e.g. 0.1 이상으로 예측 점수를 받은 스타일만 받아옴 - maximum 3개)
        """
        self.style_ths = style_ths
        self.style_ths_name = str(style_ths)[-1]
        
        self.best = best
        self.item = item
        self.products = products
        self.category = category
        
        self.best_json = self.read_json(best)
        self.item_json = self.read_json(item)
        self.products_json = self.read_json(products).drop_duplicates('_id') # products json duplicated deleted
        self.category_json = self.read_json2(category).reset_index(drop=True)
        self.best_item, self.category,self.products_4, self.products_b = self.preprocess(self.best_json, 
                                                                           self.item_json, 
                                                                           self.products_json, 
                                                                           self.category_json)

    def read_json(self,json_file):
        df = pd.DataFrame()
        if json_file != self.products:  
            for file in json_file:
                x = pd.read_json(file)
                df = pd.concat([df, x])
        else:
            # why -> products json duplicated 
            for file in json_file:
                x = pd.read_json(file).T.reset_index(drop=True)    
                df = pd.concat([df, x])
        return df

    def read_json2(self,category):
        for i in category:
            with open(i) as js:
                json_data = json.load(js)
        cat_4, cat_b = pd.DataFrame(json_data['421B6D0E746C4E6D']), pd.DataFrame(json_data['B57D4F97C0E44A11'])
        category = pd.concat([cat_4, cat_b])
        return category
    
    def get_productId(self, items_list):
        new_list = []
        for item in items_list:
            new_list.append(item['productId'])
        return new_list
    
    # category['children'] -> name(label) 
    def find_category_df(self, category):
        df = pd.DataFrame()
        for i in category['children']:
            df2 = pd.DataFrame(i)
            df = pd.concat([df, df2])
        return df
        
    def preprocess(self, best, item, products, category):
        
        item.rename(columns={'enterpriseId':'enterprise_id', '_id':'id'}, inplace=True)
        templates = pd.merge(best, item, on=['enterprise_id', 'projectId', 'id'])
        
        # 2 enterprise id 
        ent2, ent1 = templates['enterprise_id'].unique().tolist()
        
        # get item_id 
        templates['items'] = templates['items'].apply(self.get_productId)
        
        # find_category_id_preprocess
    
        # edit new columns style_name, style_score > 0.1 
        templates[f'top_style_{self.style_ths_name}'] = templates['style_predictions'].apply(lambda x: sorted([(name, score) for name, score in x.items() if score > self.style_ths], key=lambda x: x[1], reverse=True)[:3])
        # del list, style score
        templates[f'top_style_{self.style_ths_name}'] = templates[f'top_style_{self.style_ths_name}'].apply(lambda x: [name for name, score in x])
        # Edit best_item['items'] = list(values) -> values
        items_stack = pd.DataFrame(templates['items'].apply(lambda x: pd.Series(x)).stack()).reset_index(1, drop=True) 
        products_df = pd.merge(templates[['enterprise_id', 
                                          f'top_style_{self.style_ths_name}', 
                                          'top_style', 
                                          'projectId', 
                                          'awesome_score']].reset_index(), 
                               items_stack.reset_index(), 
                               on='index').drop(['index'], axis=1).rename(columns = {0:'product_id'})
        prod_tags_df = products[['_id', 'tags', 'name', 'images', 'categories']]
        products_df = pd.merge(products_df, 
                               prod_tags_df, 
                               left_on='product_id', 
                               right_on='_id').drop(['_id'], axis=1)
        products_df = pd.merge(products_df, 
                               products_df['product_id'].value_counts().reset_index(),
                               left_on='product_id',
                               right_on='index').rename(columns = {'product_id_x': 'product_id',
                                                                   'product_id_y':'use_count'}).drop(['index'], axis=1)
        
        # category[categories] = list(values) -> values
        products_df['category_name'] = products_df['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)
        
        
        ## new code
        # find_category_id_preprocess
        cat_a = find_category_df(DATA.category_json)
        cat_a = pd.merge(cat_df[['_id', 'name']], 
                         cat_a[['name', '_id', 'enterpriseId', 'parentId', 'children']], 
                         left_on='_id', right_on='parentId').drop(['parentId'], axis=1)
        
        cat_b = find_category_df(cat_a)
        cat_b = pd.merge(cat_df[['_id', 'name']], 
                 cat_b[['name', 'enterpriseId', 'parentId', 'grandparentId', '_id']], 
                 left_on='_id', 
                 right_on='grandparentId').drop(['grandparentId'], axis=1)
        
        self.products_json['category_name'] = self.products_json['categories'].apply(lambda x: pd.Series(x)).reset_index(drop=True).drop(columns=1)
        
        # category_df['name_x'] = category['children']
        # category_df['name_y'] = category['children'][children]
        prod_a = pd.merge(self.products_json[['_id', 'name', 'category_name']], 
                         cat_a[['name_x', 'name_y', '_id_y', 'enterpriseId', 'children']], 
                         left_on='category_name', right_on='_id_y', how='left').drop(['_id_y'], axis=1)
        
        prod_b = pd.merge(self.products_json[['_id', 'name', 'category_name']], 
                         cat_b[['name_x', 'name_y', '_id_y', 'enterpriseId']], 
                         left_on='category_name', right_on='_id_y', how='left').drop(['_id_y'], axis=1)
        
        category_df = pd.merge(prod_a, 
                         prod_b[['_id', 'category_name', 'name_x', 'name_y']].rename(columns={'name_y':'name_z'}),
                         on='_id'
                        )
        
        category_df.columns = ['product_id', 'name', 
                            'name_x', 'name_y', 
                            'enterpriseId', 'name_z']
        category_df = category_df[['product_id', 'name', 
                                'name_x', 'name_y', 'name_z', 
                                'enterpriseId']]
        
        category_df = pd.merge(self.products_json[['_id', 'name', 'tags', 'dimensions', 'images']], 
                               category_df[['product_id', 'name_x', 'name_y', 'name_z']], 
                               on = 'product_id')
        
        products_df = pd.merge(products_df[['enterprise_id', 'projectId', 'top_style_1', 
                                            'top_style', 'awesome_score', 'product_id', 'use_count']], 
                               category_df, 
                               on='product_id')
        
        products_df_4 = products_df[products_df['enterprise_id'] == ent1].reset_index(drop=True)
        products_df_b = products_df[products_df['enterprise_id'] == ent2].reset_index(drop=True)
        
        return templates,category_df, products_df_4, products_df_b