In [1]:
# Data Analysis
import pandas as pd
import numpy as np
from numpy import asarray
from numpy import savetxt
from numpy import loadtxt
import pickle as pkl
from scipy import sparse

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt
import wordcloud
from wordcloud import WordCloud, STOPWORDS

# Text Processing
import re
import itertools
import string
import collections
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import stopwords
from nltk import word_tokenize
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer

# Machine Learning packages
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
import sklearn.cluster as cluster
from sklearn.manifold import TSNE

# Model training and evaluation
from sklearn.model_selection import train_test_split

#Models
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from xgboost import plot_importance

#Metrics
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, accuracy_score, balanced_accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, multilabel_confusion_matrix, confusion_matrix
from sklearn.metrics import classification_report

# Ignore noise warning
import warnings
warnings.filterwarnings("ignore")

In [9]:
df_mak = pd.read_csv('./Makgeolli_list.csv')

In [10]:
df_mak

Unnamed: 0,규격,도수,전통주명,제조사,주원료
0,750ml,7,입장탁주,(농) 입장주조㈜,국내산 쌀
1,500ml,30,톡 한잔 소주,(농)(유)대마주조,"국내산 보리, 쌀"
2,360ml,6,자연담은 복분자막걸리,(농)국순당고창명주㈜,"국내산 복분자(고창), 정제수"
3,"600ml, 800ml",45,로얄 안동소주,(농)유토피아㈜,국내산 쌀
4,500ml,17,지란지교,(유)친구들의 술 지란지교,"찹쌀,멥쌀,전통누룩(밀),정제수"
...,...,...,...,...,...
846,500ml,25,화백,화백,"멥쌀(국내산), 찹쌀(국내산), 누룩(국내산 밀함유), 효모, 정제수"
847,750ml,6,산천어막걸리,화천주가,"정제수,팽화미(쌀/외국산),입국(쌀),국(밀),설탕,정제효소제, 효모,아스파탐(감미..."
848,375ml,17,황금보리 증류주 17,황금보리(유)농업회사법인,"보리(국내산) 39.9%, 누룩(국내산)3.3%, 효소(액화효소), 효모(향미증진제..."
849,375ml,25,황금보리 증류주 25,황금보리(유)농업회사법인,"정제수, 보리(국내산) 40.05%, 누룩, 효소(액화효소), 효모, 과당, 구연산..."


In [11]:
df_mak = df_mak[['전통주명','제조사','주원료','규격','도수']]

In [12]:
df_mak = df_mak.rename(columns={'규격':'용량'})
df_mak = df_mak.rename(columns={'전통주명':'상품명'})
df_mak

Unnamed: 0,상품명,제조사,주원료,용량,도수
0,입장탁주,(농) 입장주조㈜,국내산 쌀,750ml,7
1,톡 한잔 소주,(농)(유)대마주조,"국내산 보리, 쌀",500ml,30
2,자연담은 복분자막걸리,(농)국순당고창명주㈜,"국내산 복분자(고창), 정제수",360ml,6
3,로얄 안동소주,(농)유토피아㈜,국내산 쌀,"600ml, 800ml",45
4,지란지교,(유)친구들의 술 지란지교,"찹쌀,멥쌀,전통누룩(밀),정제수",500ml,17
...,...,...,...,...,...
846,화백,화백,"멥쌀(국내산), 찹쌀(국내산), 누룩(국내산 밀함유), 효모, 정제수",500ml,25
847,산천어막걸리,화천주가,"정제수,팽화미(쌀/외국산),입국(쌀),국(밀),설탕,정제효소제, 효모,아스파탐(감미...",750ml,6
848,황금보리 증류주 17,황금보리(유)농업회사법인,"보리(국내산) 39.9%, 누룩(국내산)3.3%, 효소(액화효소), 효모(향미증진제...",375ml,17
849,황금보리 증류주 25,황금보리(유)농업회사법인,"정제수, 보리(국내산) 40.05%, 누룩, 효소(액화효소), 효모, 과당, 구연산...",375ml,25


In [13]:
df_mak['생산국가'] = '대한민국'
df_mak['주종'] ='전통주'

In [15]:
df_mak = df_mak[['주종','상품명','생산국가','제조사','주원료','용량','도수']]

In [16]:
df_mak

Unnamed: 0,주종,상품명,생산국가,제조사,주원료,용량,도수
0,전통주,입장탁주,대한민국,(농) 입장주조㈜,국내산 쌀,750ml,7
1,전통주,톡 한잔 소주,대한민국,(농)(유)대마주조,"국내산 보리, 쌀",500ml,30
2,전통주,자연담은 복분자막걸리,대한민국,(농)국순당고창명주㈜,"국내산 복분자(고창), 정제수",360ml,6
3,전통주,로얄 안동소주,대한민국,(농)유토피아㈜,국내산 쌀,"600ml, 800ml",45
4,전통주,지란지교,대한민국,(유)친구들의 술 지란지교,"찹쌀,멥쌀,전통누룩(밀),정제수",500ml,17
...,...,...,...,...,...,...,...
846,전통주,화백,대한민국,화백,"멥쌀(국내산), 찹쌀(국내산), 누룩(국내산 밀함유), 효모, 정제수",500ml,25
847,전통주,산천어막걸리,대한민국,화천주가,"정제수,팽화미(쌀/외국산),입국(쌀),국(밀),설탕,정제효소제, 효모,아스파탐(감미...",750ml,6
848,전통주,황금보리 증류주 17,대한민국,황금보리(유)농업회사법인,"보리(국내산) 39.9%, 누룩(국내산)3.3%, 효소(액화효소), 효모(향미증진제...",375ml,17
849,전통주,황금보리 증류주 25,대한민국,황금보리(유)농업회사법인,"정제수, 보리(국내산) 40.05%, 누룩, 효소(액화효소), 효모, 과당, 구연산...",375ml,25


In [18]:
df_mak.to_csv('./makgeolli_fin.csv', encoding = 'utf-8-sig')