# Colab Cahtspace

In [1]:
import pandas as pd
import numpy as np
import statistics
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
import re
%matplotlib inline
plt.style.use('ggplot') # 그래프에서 격자로 숫자 범위가 눈에 잘 띄도록 ggplot 스타일을 사용
plt.rcParams['font.family'] = 'Malgun Gothic'

# 데이터 불러오기(상품군+상품명+단가)

In [2]:
df = pd.read_excel('chatspace(2019).xlsx')

In [3]:
df['brand_name_code']=pd.Categorical(df['brand_name']).codes

In [4]:
df['u'] = [str(round(i, -4)) for i in df['unit_price']]

In [5]:
df['group_p_name'] = df['p_group'] + ' ' + df['split_p_name'] +' ' + df['u']

In [6]:
test = pd.read_excel('chatspace(2020).xlsx')

In [7]:
test['u'] = [str(round(i, -4)) for i in test['unit_price']]

In [8]:
test['group_p_name'] = test['p_group'] + ' ' + test['split_p_name'] + ' ' + test['u']

# 그룹 구분하기

In [9]:
건강기능 =df[df['p_group'] == '건강기능']
생활용품 =df[df['p_group'] == '생활용품']
침구 =df[df['p_group'] == '침구']
주방 =df[df['p_group'] == '주방']
가전 =df[df['p_group'] == '가전']
가구 =df[df['p_group'] == '가구']
이미용 =df[df['p_group'] == '이미용']
농수축 =df[df['p_group'] == '농수축']
잡화 =df[df['p_group'] == '잡화']
속옷= df[df['p_group'] == '속옷']
의류=df[df['p_group'] == '의류']

In [10]:
건강기능_test = test[test['p_group'] == '건강기능']
생활용품_test = test[test['p_group'] == '생활용품']
침구_test = test[test['p_group'] == '침구']
주방_test = test[test['p_group'] == '주방']
가전_test = test[test['p_group'] == '가전']
가구_test = test[test['p_group'] == '가구']
이미용_test = test[test['p_group'] == '이미용']
농수축_test = test[test['p_group'] == '농수축']
잡화_test = test[test['p_group'] == '잡화']
속옷_test = test[test['p_group'] == '속옷']
의류_test = test[test['p_group'] == '의류']
무형_test = test[test['p_group'] == '무형']

# 빈도수 계산을 위한 텍스트 데이터 백터화

In [11]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm_notebook

In [12]:
p_group_name = ['의류', '속옷', '농수축', '이미용', '가전', '가구', '침구', '생활용품', '잡화', '건강기능', '주방']

In [13]:
names = {}
scores = {}

for group in tqdm_notebook(p_group_name) :
    
    if group == '건강기능':
        p_name = np.unique(건강기능['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 건강기능_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['건강기능'] = result_name
        scores['건강기능'] = result_score
        
    elif group == '의류':
        p_name = np.unique(의류['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 의류_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['의류'] = result_name
        scores['의류'] = result_score
        
    elif group == '속옷':
        p_name = np.unique(속옷['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 속옷_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['속옷'] = result_name
        scores['속옷'] = result_score
        
    elif group == '농수축':
        p_name =np.unique(농수축['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 농수축_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['농수축'] = result_name
        scores['농수축'] = result_score
        
    elif group == '이미용':
        p_name = np.unique(이미용['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 이미용_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['이미용'] = result_name
        scores['이미용'] = result_score
        
    elif group == '생활용품':
        p_name = np.unique(생활용품['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 생활용품_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['생활용품'] = result_name
        scores['생활용품'] = result_score    

    elif group == '가구':
        p_name = np.unique(가구['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 가구_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['가구'] = result_name
        scores['가구'] = result_score   

    elif group == '주방':
        p_name = np.unique(주방['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 주방_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['주방'] = result_name
        scores['주방'] = result_score   
        
    elif group == '가전':
        p_name = np.unique(가전['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 가전_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['가전'] = result_name
        scores['가전'] = result_score 

    elif group == '침구':
        p_name = np.unique(침구['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 침구_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['침구'] = result_name
        scores['침구'] = result_score 

    elif group == '잡화':
        p_name = np.unique(잡화['group_p_name'])

        vectorizer = CountVectorizer(ngram_range=(1,5))
        feature_vector = vectorizer.fit_transform(p_name)
        print(feature_vector.shape)
        vocab = vectorizer.get_feature_names()

        transformer = TfidfTransformer(smooth_idf=False)
        feature_tfidf = transformer.fit_transform(feature_vector)

        test_name = 잡화_test['group_p_name']
        df_len = len(p_name)

        result_score = []
        result_name = []
        
        for name in test_name:
            query = [name]
            q = vectorizer.transform(query)

            sim = []
            for i in range(df_len):
                c = cosine_similarity(q, feature_tfidf[i])
                sim.append(c)

            sim_idx = np.argmax(sim)   
            sim_score = np.max(sim)

            result_score.append(sim_score)
            result_name.append(list(p_name)[sim_idx])
        names['잡화'] = result_name
        scores['잡화'] = result_score     

    else:
        continue

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for group in tqdm_notebook(p_group_name) :


HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

(352, 4123)
(219, 3215)
(144, 2462)
(56, 921)
(180, 2373)
(101, 946)
(60, 595)
(174, 2643)
(284, 3870)
(51, 814)
(229, 3111)



In [14]:
names['가전'] = [i.replace('가전 1등급 221l_딤채 김치 냉장고 900000', '가전 일시불 대우전자 벽걸이 에어컨 tdo z-s 10jk 810000') for i in names['가전']]

# brand_code 가져오기

In [28]:
for group in p_group_name :
    
    if group == '주방':
        주방_names = names['주방']
        temp = []
        for name in 주방_names:
            matching_code = np.unique(주방[주방['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        주방_test['brand_name_code'] = temp

    
    elif group == '건강기능':
        건강기능_names = names['건강기능']
        temp = []
        for name in 건강기능_names:
            matching_code = np.unique(건강기능[건강기능['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        건강기능_test['brand_name_code'] = temp
    
    if group == '잡화':
        잡화_names = names['잡화']
        temp = []
        for name in 잡화_names:
            matching_code = np.unique(잡화[잡화['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        잡화_test['brand_name_code'] = temp
    
    elif group == '생활용품':
        생활용품_names = names['생활용품']
        temp = []
        for name in 생활용품_names:
            matching_code = np.unique(생활용품[생활용품['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        생활용품_test['brand_name_code'] = temp
    
    elif group == '침구':
        침구_names = names['침구']
        temp = []
        for name in 침구_names:
            matching_code = np.unique(침구[침구['group_p_name'] == name]['brand_name_code'])[0]   
            temp.append(matching_code)
        침구_test['brand_name_code'] = temp
    
    elif group == '가구':
        가구_names = names['가구']
        temp = []
        for name in 가구_names:
            matching_code = np.unique(가구[가구['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        가구_test['brand_name_code'] = temp
    
    elif group == '이미용':
        이미용_names = names['이미용']
        temp = []
        for name in 이미용_names:
            matching_code = np.unique(이미용[이미용['group_p_name'] == name]['brand_name_code'])[0]  
            temp.append(matching_code)
        이미용_test['brand_name_code'] = temp
    
    elif group == '농수축':
        농수축_names = names['농수축']
        temp = []
        for name in 농수축_names:
            matching_code = np.unique(농수축[농수축['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        농수축_test['brand_name_code'] = temp
    
    elif group == '속옷':
        속옷_names = names['속옷']
        temp = []
        for name in 속옷_names:
            matching_code = np.unique(속옷[속옷['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        속옷_test['brand_name_code'] = temp   
        
    elif group == '의류':
        의류_names = names['의류']
        temp = []
        for name in 의류_names:
            matching_code = np.unique(의류[의류['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        의류_test['brand_name_code'] = temp
        
    elif group == '가전':
        가전_names = names['가전']
        temp = []
        for name in 가전_names:
            matching_code = np.unique(가전[가전['group_p_name'] == name]['brand_name_code'])[0]
            temp.append(matching_code)
        가전_test['brand_name_code'] = temp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  의류_test['brand_name_code'] = temp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  속옷_test['brand_name_code'] = temp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  농수축_test['brand_name_code'] = temp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_inde

In [19]:
무형_test = test[test['p_group'] == '무형']
무형_test['brand_name_code'] = ''
무형_test['19_p_name'] = ''

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  무형_test['19_m_code'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  무형_test['19_p_name'] = ''


# 데이터 합쳐서 엑셀 파일로 저장하기

In [35]:
result_all = pd.concat([의류_test, 속옷_test, 농수축_test, 이미용_test, 가전_test, 가구_test, 침구_test, 생활용품_test, 잡화_test, 건강기능_test, 주방_test, 무형_test])
del result_all['split_p_name']
del result_all['u']
del result_all['group_p_name']
del data['Unnamed: 0']
data = result_all.sort_index()
data

Unnamed: 0.1,Unnamed: 0,broadcast,exp_min,m_code,p_code,p_name,p_group,unit_price,total_price,YEAR,...,day_of_week_and_hour,no,52_week\t,holiday,timeslot,season,quarter,19_m_code,19_p_name,brand_name_code
0,0,2020-06-01 06:20:00,20.000000,100650,201971,잭필드 남성 반팔셔츠 4종,의류,59800,,2020,...,6,0,23,0,1,2,2,100583,의류 임페리얼 남성 니트 4종 60000,306
1,1,2020-06-01 06:40:00,20.000000,100650,201971,잭필드 남성 반팔셔츠 4종,의류,59800,,2020,...,6,1,23,0,1,2,2,100583,의류 임페리얼 남성 니트 4종 60000,306
2,2,2020-06-01 07:00:00,20.000000,100650,201971,잭필드 남성 반팔셔츠 4종,의류,59800,,2020,...,7,2,23,0,1,2,2,100583,의류 임페리얼 남성 니트 4종 60000,306
3,3,2020-06-01 07:20:00,20.000000,100445,202278,쿠미투니카 쿨 레이시 란쥬쉐이퍼&팬티,속옷,69900,,2020,...,7,3,23,0,1,2,2,100445,속옷 쿠미투니카 쿨 레이시 란쥬쉐이퍼 팬티 70000,353
4,4,2020-06-01 07:40:00,20.000000,100445,202278,쿠미투니카 쿨 레이시 란쥬쉐이퍼&팬티,속옷,69900,,2020,...,7,4,23,0,1,2,2,100445,속옷 쿠미투니카 쿨 레이시 란쥬쉐이퍼 팬티 70000,353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2886,2886,2020-06-30 00:20:00,20.000000,100660,201989,쉴렉스 안마의자 렌탈서비스,무형,0,,2020,...,48,2886,27,0,7,2,2,,,
2887,2887,2020-06-30 00:40:00,20.000000,100660,201989,쉴렉스 안마의자 렌탈서비스,무형,0,,2020,...,48,2887,27,0,7,2,2,,,
2888,2888,2020-06-30 01:00:00,20.000000,100660,201989,쉴렉스 안마의자 렌탈서비스,무형,0,,2020,...,49,2888,27,0,7,2,2,,,
2889,2889,2020-06-30 01:20:00,20.000000,100261,200875,아놀드파마 티셔츠레깅스세트,의류,69900,,2020,...,49,2889,27,0,7,2,2,100815,의류 보코 배색 사파리재킷 70000,166


In [None]:
data.to_excel('test_brand_code_추가.xlsx')

# m_code 추가(19년을 구할 수 없음)

In [15]:
p_group_name = ['의류', '속옷', '농수축', '이미용', '가전', '가구', '침구', '생활용품', '잡화', '건강기능', '주방']

In [16]:
for group in p_group_name :
    
    if group == '주방':
        주방_names = names['주방']
        temp = []
        for name in 주방_names:
            matching_code = np.unique(주방[주방['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        주방_test['19_m_code'] = temp
        주방_test['19_p_name'] = 주방_names
    
    elif group == '건강기능':
        건강기능_names = names['건강기능']
        temp = []
        for name in 건강기능_names:
            matching_code = np.unique(건강기능[건강기능['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        건강기능_test['19_m_code'] = temp
        건강기능_test['19_p_name'] = 건강기능_names
    
    if group == '잡화':
        잡화_names = names['잡화']
        temp = []
        for name in 잡화_names:
            matching_code = np.unique(잡화[잡화['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        잡화_test['19_m_code'] = temp
        잡화_test['19_p_name'] = 잡화_names
    
    elif group == '생활용품':
        생활용품_names = names['생활용품']
        temp = []
        for name in 생활용품_names:
            matching_code = np.unique(생활용품[생활용품['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        생활용품_test['19_m_code'] = temp
        생활용품_test['19_p_name'] = 생활용품_names
    
    elif group == '침구':
        침구_names = names['침구']
        temp = []
        for name in 침구_names:
            matching_code = np.unique(침구[침구['group_p_name'] == name]['m_code'])[0]   
            temp.append(matching_code)
        침구_test['19_m_code'] = temp
        침구_test['19_p_name'] = 침구_names
    
    elif group == '가구':
        가구_names = names['가구']
        temp = []
        for name in 가구_names:
            matching_code = np.unique(가구[가구['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        가구_test['19_m_code'] = temp
        가구_test['19_p_name'] = 가구_names
    
    elif group == '이미용':
        이미용_names = names['이미용']
        temp = []
        for name in 이미용_names:
            matching_code = np.unique(이미용[이미용['group_p_name'] == name]['m_code'])[0]  
            temp.append(matching_code)
        이미용_test['19_m_code'] = temp
        이미용_test['19_p_name'] = 이미용_names
    
    elif group == '농수축':
        농수축_names = names['농수축']
        temp = []
        for name in 농수축_names:
            matching_code = np.unique(농수축[농수축['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        농수축_test['19_m_code'] = temp
        농수축_test['19_p_name'] = 농수축_names
    
    elif group == '속옷':
        속옷_names = names['속옷']
        temp = []
        for name in 속옷_names:
            matching_code = np.unique(속옷[속옷['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        속옷_test['19_m_code'] = temp   
        속옷_test['19_p_name'] = 속옷_names
        
    elif group == '의류':
        의류_names = names['의류']
        temp = []
        for name in 의류_names:
            matching_code = np.unique(의류[의류['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        의류_test['19_m_code'] = temp
        의류_test['19_p_name'] = 의류_names
        
    elif group == '가전':
        가전_names = names['가전']
        temp = []
        for name in 가전_names:
            matching_code = np.unique(가전[가전['group_p_name'] == name]['m_code'])[0]
            temp.append(matching_code)
        가전_test['19_m_code'] = temp
        가전_test['19_p_name'] = 가전_names

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  의류_test['19_m_code'] = temp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  의류_test['19_p_name'] = 의류_names
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  속옷_test['19_m_code'] = temp
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value in

In [30]:
무형_test['19_m_code'] = ''

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  무형_test['brand_name_code'] = ''


In [20]:
result_all = pd.concat([의류_test, 속옷_test, 농수축_test, 이미용_test, 가전_test, 가구_test, 침구_test, 생활용품_test, 잡화_test, 건강기능_test, 주방_test, 무형_test])
del result_all['split_p_name']
del result_all['u']
del result_all['group_p_name']
data = result_all.sort_index()

In [22]:
data.to_excel('test_m_code_추가.xlsx')