In [2]:
import pandas as pd
import json
import pickle

#### 1. json파일을 열어서 list of dictionaries로 저장

In [23]:
def json_to_pkl(feature):
    file = '../data/json/' + feature + '.json'
    with open(file, "r") as f:
        s = f.read()
    dict = json.loads(s)

    pkl_name = '../data/' + feature + '.pkl'
    with open(pkl_name, 'wb') as f:
        pickle.dump(dict, f)

In [24]:
# perfumes, notes, category 의 json 파일
feature_list = ['perfumes', 'notes', 'category']
for feature in feature_list:
    json_to_pkl(feature)

#### 2. list of dictionaries를 DataFrame으로 저장

In [25]:
# pkl 파일을 csv로 바꾸는 function
def pkl_to_csv(feature):
    # pkl파일 불러오기
    file = '../data/' + feature + '.pkl'
    with open(file, 'rb') as f:
        list_of_dicts = pickle.load(f)
    # 불러온 list of dictionaries을 DataFrame으로 바꾸고 csv파일로 저장
    df = pd.DataFrame.from_records(list_of_dicts)
    csv_name = '../data/' + feature + '.csv'
    df.to_csv(csv_name, index=False) 

In [26]:
# perfumes.pkl, notes.pkl, category.pkl을 csv로 변환
for feature in feature_list:
    pkl_to_csv(feature)

In [27]:
# perfumes.csv 확인
df = pd.read_csv('../data/perfumes.csv')
df.head()

Unnamed: 0,pk,model,fields
0,26120000,perfumes.perfume,"{'name': 'Ambre Canelle', 'launch_date': '1949..."
1,26120001,perfumes.perfume,"{'name': 'Angélique Encens', 'launch_date': '1..."
2,26120002,perfumes.perfume,"{'name': 'Oud', 'launch_date': None, 'thumbnai..."
3,26120003,perfumes.perfume,"{'name': 'X-Centric', 'launch_date': '2001-01-..."
4,26120005,perfumes.perfume,"{'name': 'Space for Men', 'launch_date': None,..."


In [28]:
# notes.csv 확인
df = pd.read_csv('../data/notes.csv')
df.head()

Unnamed: 0,pk,model,fields
0,1,perfumes.note,"{'name': 'absinthe', 'kor_name': '압생트'}"
1,2,perfumes.note,"{'name': 'acacia', 'kor_name': '아카시아'}"
2,3,perfumes.note,"{'name': 'acai berry', 'kor_name': '아사이베리'}"
3,4,perfumes.note,"{'name': 'acerola', 'kor_name': '아세로라'}"
4,5,perfumes.note,"{'name': 'acerola blossom', 'kor_name': '아세로라 ..."


In [29]:
# category.csv 확인
df = pd.read_csv('../data/category.csv')
df.head()

Unnamed: 0,pk,model,fields
0,1,perfumes.category,{'name': 'citrus_smells'}
1,2,perfumes.category,{'name': 'fruits_vegetables_and_nuts'}
2,3,perfumes.category,{'name': 'flowers'}
3,4,perfumes.category,{'name': 'white_flowers'}
4,5,perfumes.category,{'name': 'greens_herbs_and_fougeres'}


#### 3. fields dictionary를 풀어서 df 열로 붙이기

In [1]:
def concat_df(feature):
    # pkl파일 불러오기
    file = '../data/' + feature + '.pkl'
    dict_name = feature + '_dict'
    with open(file, 'rb') as f:
        dict_name = pickle.load(f)

    # 딕셔너리 안의 딕셔너리인 field를 따로 빼서 list of dictionares로 만들기
    fields_list = []
    for i in range(len(dict_name)):
        fields_list.append(dict_name[i]['fields'])
    
    # 만들어진 리스트를 데이터 프레임으로 만들기
    fields_df = pd.DataFrame.from_records(fields_list)
    
    # 원래의 데이터 프레임을 불러와 딕셔너리로 저장되었던 fields열을 제거
    csv_name = '../data/' + feature + '.csv'
    original = pd.read_csv(csv_name)
    original = original.drop(['fields'], axis=1)
    
    # 데이터 프레임들 합치기
    concat_df = pd.concat([original, fields_df], axis = 1)
    
    # 합쳐진 데이터 프레임을 새로운 csv파일로 저장
    concat_df_name = '../data/' + 'concat_' + feature + '.csv'
    concat_df.to_csv(concat_df_name, index = False)

#### Perfume

fields 내의 thumbnail데이터를 링크에서 'pk'값.jpg로 변환

In [31]:
with open('../data/perfumes.pkl', 'rb') as f:
        perfumes_dict = pickle.load(f)

for i in range(len(perfumes_dict)):
    jpg_str = "{}.jpg".format(perfumes_dict[i]['pk'])
    perfumes_dict[i]['fields']['thumbnail'] = jpg_str

with open('../data/perfumes.pkl', 'wb') as f:
    pickle.dump(perfumes_dict, f)

In [32]:
concat_df('perfumes')
df = pd.read_csv('../data/concat_perfumes.csv')
df.head()

Unnamed: 0,pk,model,name,launch_date,thumbnail,gender,top_notes,heart_notes,base_notes,seasons,availability,brand,categories,price
0,26120000,perfumes.perfume,Ambre Canelle,1949-01-01,26120000.jpg,0,"[224, 480]","[224, 259, 510, 785]","[28, 624]","[3, 4]",False,749,"[3, 6]",169.99
1,26120001,perfumes.perfume,Angélique Encens,1933-01-01,26120001.jpg,0,"[28, 42, 472, 473, 785, 920]",[],[],[],False,749,[],2000.0
2,26120002,perfumes.perfume,Oud,,26120002.jpg,0,[],[],[],[],True,102,[],99.0
3,26120003,perfumes.perfume,X-Centric,2001-01-01,26120003.jpg,1,"[178, 224, 229, 292, 388, 395, 646]","[358, 545, 785]","[26, 193, 624, 692]","[1, 2, 3, 4]",True,895,[3],21.0
4,26120005,perfumes.perfume,Space for Men,,26120005.jpg,0,[],[],[],[],True,601,[],44.95


#### Notes

In [3]:
concat_df('notes')
df = pd.read_csv('../data/concat_notes.csv')
df.head()

Unnamed: 0,pk,model,name,kor_name
0,1,perfumes.note,absinthe,압생트
1,2,perfumes.note,acacia,아카시아
2,3,perfumes.note,acai berry,아사이베리
3,4,perfumes.note,acerola,아세로라
4,5,perfumes.note,acerola blossom,아세로라 블라썸


#### Category

In [34]:
concat_df('category')
df = pd.read_csv('../data/concat_category.csv')
df.head()

Unnamed: 0,pk,model,name
0,1,perfumes.category,citrus_smells
1,2,perfumes.category,fruits_vegetables_and_nuts
2,3,perfumes.category,flowers
3,4,perfumes.category,white_flowers
4,5,perfumes.category,greens_herbs_and_fougeres
