# Data 전처리

In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from io import BytesIO
from PIL import Image
from IPython.display import display

## 데이터 불러오기

In [8]:
def read_jsonfile(json_file):
    with open(json_file) as json_file:
        json_data = json.load(json_file)
    return json_data

# 스타일 별 아이템 목록 뽑기
def get_productId(items_list):
    new_list = []
    for item in items_list:
        new_list.append(item['productId'])
    return new_list

def preprocessing(file_paths):
    
    json_list = []
    for path in file_paths:
        json_list.append(read_jsonfile(path))
    
    category_4, category_b = json_list[2]['421B6D0E746C4E6D'], json_list[2]['B57D4F97C0E44A11']
    
    bestshots_df = pd.read_json(file_paths[0])
    itemsets_df = pd.read_json(file_paths[1])
    category_en1_df = pd.DataFrame(category_4)
    category_en2_df = pd.DataFrame(category_b)
    
    itemsets_df.rename(columns={'enterpriseId':'enterprise_id', '_id':'id'}, inplace=True)
    templates_df = pd.merge(bestshots_df, itemsets_df, on=['enterprise_id', 'projectId', 'id'])
    
    ent2, ent1 = templates_df['enterprise_id'].unique().tolist()
    
    templates_df['items'] = templates_df['items'].apply(get_productId)
    
    # 아이템 별 Top3 상위 스타일 뽑기
    templates_df['top3_style'] = templates_df['style_predictions'].apply(lambda x: sorted([(name, score) for name, score in x.items()], key=lambda x: x[1], reverse=True)[:3])
    templates_df['top3_style'] = templates_df['top3_style'].apply(lambda x: [name for name, score in x])
    items_stack = pd.DataFrame(templates_df['items'].apply(lambda x: pd.Series(x)).stack()).reset_index(1, drop=True) 
    products_df = pd.merge(templates_df[['enterprise_id', 'top3_style', 'top_style', 'projectId', 'awesome_score']].reset_index(), items_stack.reset_index(), on='index').drop(['index'], axis=1).rename(columns = {0:'product_id'})
    
    # 아이템 별 태그 정보 병합하기
    prod_tags_df = pd.DataFrame(json_list[3]).T.reset_index(drop=True)[['_id', 'tags', 'name', 'images']]
    products_df = pd.merge(products_df, prod_tags_df, left_on='product_id', right_on='_id').drop(['_id'], axis=1)
    products_df = pd.merge(products_df, products_df['product_id'].value_counts().reset_index(), left_on='product_id', right_on='index').rename(columns = {'product_id_x': 'product_id','product_id_y':'use_count'}).drop(['index'], axis=1)
    
    # enterprise_id 별로 분류하기
    products_df_4 = products_df[products_df['enterprise_id'] == ent1].reset_index(drop=True)
    products_df_b = products_df[products_df['enterprise_id'] == ent2].reset_index(drop=True)
    
    return products_df_4, products_df_b

In [9]:
base_path = '2022-03-07/'
file_paths = [
    base_path + 'bestshots.json', 
    base_path + 'items.json', 
    base_path +'categories.json', 
    base_path +'products.json'
]

In [10]:
products_df_4, products_df_b = preprocessing(file_paths)

  items_stack = pd.DataFrame(templates_df['items'].apply(lambda x: pd.Series(x)).stack()).reset_index(1, drop=True)


In [11]:
products_df_4.shape, products_df_b.shape

((4211, 10), (46576, 10))

In [12]:
products_df_b

Unnamed: 0,enterprise_id,top3_style,top_style,projectId,awesome_score,product_id,tags,name,images,use_count
0,B57D4F97C0E44A11,"[NATURAL, MID_CENTURY_MODERN, MODERN]",NATURAL,X9djPepD57A2BDCD418437C,0.993074,XjDwfV4C2377DB2C32B4EBD,"[렌지대, 주방수납, 주방장, 주방선반, 베이지, 가공목(mdf외), 가공목, 나무...",onion 진저 주방 수납장 (내추럴),[https://resources.archisketch.com/product/XjD...,10
1,B57D4F97C0E44A11,"[NATURAL, SCANDINAVIAN, MINIMAL]",NATURAL,X9dU4bb1CF0EE26E341493B,0.983243,XjDwfV4C2377DB2C32B4EBD,"[렌지대, 주방수납, 주방장, 주방선반, 베이지, 가공목(mdf외), 가공목, 나무...",onion 진저 주방 수납장 (내추럴),[https://resources.archisketch.com/product/XjD...,10
2,B57D4F97C0E44A11,"[NATURAL, KOREAN_AND_ASIAN, CLASSIC_AND_ANTIQUE]",NATURAL,X9a1taeFAA511E93B794404,0.893659,XjDwfV4C2377DB2C32B4EBD,"[렌지대, 주방수납, 주방장, 주방선반, 베이지, 가공목(mdf외), 가공목, 나무...",onion 진저 주방 수납장 (내추럴),[https://resources.archisketch.com/product/XjD...,10
3,B57D4F97C0E44A11,"[MODERN, NATURAL, MINIMAL]",MODERN,X8Ao1yi02242EB57CD44E0B,0.839363,XjDwfV4C2377DB2C32B4EBD,"[렌지대, 주방수납, 주방장, 주방선반, 베이지, 가공목(mdf외), 가공목, 나무...",onion 진저 주방 수납장 (내추럴),[https://resources.archisketch.com/product/XjD...,10
4,B57D4F97C0E44A11,"[NATURAL, MODERN, MID_CENTURY_MODERN]",NATURAL,X8MtJveB527ACDC1FE44F66,0.981309,XjDwfV4C2377DB2C32B4EBD,"[렌지대, 주방수납, 주방장, 주방선반, 베이지, 가공목(mdf외), 가공목, 나무...",onion 진저 주방 수납장 (내추럴),[https://resources.archisketch.com/product/XjD...,10
...,...,...,...,...,...,...,...,...,...,...
46571,B57D4F97C0E44A11,"[MODERN, MINIMAL, NATURAL]",MODERN,X1_RuIOE8ACEF7EC03246FB,0.859769,XVUcpfK862F9434D3B8413E,"[수납장, 수납, 캐비넷, 캐비닛, 케비넷, 케비닛, 그레이, 회색, 철재/스틸, ...",MKB 캐비넷 6문 수납장 6colors 9131600 라이트그레이,[https://resources.archisketch.com/product/XVU...,1
46572,B57D4F97C0E44A11,"[MODERN, NATURAL, MINIMAL]",MODERN,X4Woi5l64D1CDAD81B64B66,0.848643,XTsmfRBD3B8F6C0BF274BB1,"[일반의자, 식탁의자, 인테리어의자, 주방의자, 베이지, 원목, 내추럴, 나무, 우...",미쉘 원목 카페 식탁의자 4colors (내추럴),[https://resources.archisketch.com/product/XTs...,1
46573,B57D4F97C0E44A11,"[MODERN, NATURAL, MINIMAL]",MODERN,X4Woi5l64D1CDAD81B64B66,0.848643,XkM8XJXFA603CD7FB3747A4,"[테이블, 태이블, 탁자, 식탁, 다이닝테이블, 다이닝식탁, 식탁테이블, 화이트, ...",Marcus 2인용 다용도 원형테이블 (화이트),[https://resources.archisketch.com/product/XkM...,1
46574,B57D4F97C0E44A11,"[MODERN, NATURAL, KOREAN_AND_ASIAN]",MODERN,X4WvMcUD91ACAF3D1654EDF,0.838062,XfWk4HE96C12C678865467F,"[스툴, 일반스툴, 블랙, 검정, 검은, 검은색, 원목, 내추럴, 나무, 우드, 네...",하제 원목스툴 의자 4colors (블랙),[https://resources.archisketch.com/product/XfW...,1
