# Setting

## Import Library

In [None]:
# Import Libraries
import os
import pandas as pd
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

# Date
from calendar import monthrange
from datetime import date, datetime

# Visuzliation Setting
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from matplotlib import rc
from matplotlib import colors
import seaborn as sns

## Set Date Functions
모든 데이터에 대한 주차별 데이터를 사용할 예정이므로  
날짜 정보를 주차 컬럼으로 변경하는 함수 정의

In [None]:
from datetime import date, timedelta


def set_week(df, date):
    '''
    df : datetime 형식의 컬럼을 가지고 있는 dataframe
    date : df에서 datetime 형식을 가진 컬럼명
    return : date의 연도 컬럼과 주차 컬럼을 추가한 dataframe
    '''
    df[date] = pd.to_datetime(df[date])
    df[date] = df[date].dt.date
    df['year'] = df.apply(func=lambda x: x[date].isocalendar()[0], axis=1)
    df['week'] = df.apply(func=lambda x: x[date].isocalendar()[1], axis=1)
    df.drop(date, axis=1, inplace=True)
    

def check_week(df):
    '''
    df에 date가 전부 있는지 확인
    '''
    cnt = 0
    sdate = date(2015, 12, 28)   # start date
    edate = date(2019, 12, 30)   # end date
    delta = edate - sdate       # as timedelta
    mem = set()
    
    for i in range(delta.days + 1):
        day = sdate + timedelta(days=i)
        year, week = day.isocalendar()[0], day.isocalendar()[1]
        if year * 100 + week in mem:
          continue
        mem.add(year * 100 + week)
        if df[(df['year'] == year) & (df['week'] == week)].empty:
          print((year, week), end="")
          cnt += 1
    if cnt > 0:
        print()
    print("missing", cnt, "values")    

## Set Data Root Directory

In [None]:
root = os.path.join(os.getcwd(), 'DATA')

# Training Data
빅콘테스트 제공 데이터 전처리

## Import Data

In [None]:
df_raw = pd.read_excel(os.path.join(root, 'train.xlsx'))
df_train = df_raw.copy()

### One hot encoding
P_IMPORT_TYPE 이라는 특수한 컬럼에 대한 전처리

In [None]:
import_type_list = set()
for tmp in df_train.P_IMPORT_TYPE.unique():
    for a in tmp.split(','):
        import_type_list.add(a)

In [None]:
for name in import_type_list:
    df_train[name] = 0
    df_train.loc[df_train['P_IMPORT_TYPE'].str.contains(name, regex=False), name] = 1

### Add Columns

In [None]:
set_week(df_train, 'REG_DATE')

In [None]:
check_week(df_train)

(2017, 2)
missing 1 values


In [None]:
# 어종별 수 컬럼 추가
df_train['name_cnt'] = 0;
value_dict = {}
for name, value in zip(df_train['P_NAME'].value_counts().index,df_train['P_NAME'].value_counts()):
    value_dict[name] = value

def value(col):
    return value_dict[col]

df_train['name_cnt'] = df_train['P_NAME'].apply(value)

In [None]:
# 가공 여부 컬럼 추가
df_train['is_processed'] = (df_train['CTRY_1'] != df_train['CTRY_2'])

In [None]:
df_train['import_cnt'] = 0
for i, row in df_train.iterrows():
    for name in import_type_list:
        if row[name] == 1:
            df_train.at[i, 'import_cnt'] += 1

---

# Weather

## Import Data

In [None]:
df_weather_code = pd.read_csv(os.path.join(root, 'raw_weather_code.csv'), header=0, index_col=0)

In [None]:
weather_list = [pd.read_csv(os.path.join(root, 'raw_weather_20151228_20161227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'raw_weather_20161228_20171227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'raw_weather_20171228_20181227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'raw_weather_20181228_20191227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'raw_weather_20191228_20201227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'raw_weather_20201228_20210818.csv'), encoding='euc-kr')]


## Preprocess
- '지점'에 따른 나라명 컬럼(CTRY_1)과 해안가여부(is_waterfront) 추가
- 각 나라, 일자 별로 평균 강수량, 풍속, 기온 계산

In [None]:
# 지점에 따라 나라명 추가
def set_country(row):
    data = df_weather_code[df_weather_code['지점'] == row['지점']]
    if data.empty:
        return ""
    return data.iloc[0]['국가명']


def set_waterfront(row):
    data = df_weather_code[df_weather_code['지점'] == row['지점']]
    if data.empty or data.iloc[0]['해안가여부'] != 1:
        return False
    return True
    

def preprocess_weather(df):
    df = df[(df['강수량'] >= 0) & (df['풍속'] >= 0)] # 이상치/결측치 제거
    set_week(df, '일시')  # 날짜 정보 처리
    
    # 1차 평균
    columns = ['year', 'week', '지점']
    df['rain'] = df[columns + ['강수량']].groupby(columns).transform('mean')
    df['wind'] = df[columns + ['풍속']].groupby(columns).transform('mean')
    df['temperature'] = df[columns + ['기온']].groupby(columns).transform('mean')
    
    # 나라명 추가
    df['CTRY_1'] = ""
    for i, row in df.iterrows():
        df.at[i, 'CTRY_1'] = set_country(row)
    
    # 해안가 여부 추가
    df['is_waterfront'] = False
    for i, row in df.iterrows():
        df.at[i, 'is_waterfront'] = set_waterfront(row)
        
#     df = df[df['is_waterfront']]  # 해안가가 아닌 데이터 제외
    
    # 2차 평균
    columns = ['year', 'week', 'CTRY_1']
    df['rain'] = df[columns + ['강수량']].groupby(columns).transform('mean')
    df['wind'] = df[columns + ['풍속']].groupby(columns).transform('mean')
    df['temperature'] = df[columns + ['기온']].groupby(columns).transform('mean')

    # 컬럼/행 정리
    df.drop(columns=['지점명', '지점', '강수량', '풍속', '기온', 'is_waterfront'], inplace=True)
    df.drop_duplicates(inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    return df

In [None]:
for i in range(len(weather_list)):
    weather_list[i] = preprocess_weather(weather_list[i])

In [None]:
df_weather = pd.concat(weather_list)

## Check

In [None]:
df_weather.describe()

Unnamed: 0,year,week,rain,wind,temperature
count,1679.0,1679.0,1679.0,1679.0,1679.0
mean,2018.232877,25.856462,5.981158,3.64264,14.906772
std,1.607481,15.124088,32.371218,3.200572,10.536941
min,2015.0,1.0,0.0,0.0,-21.6
25%,2017.0,13.0,1.489908,1.869608,8.266627
50%,2018.0,25.0,2.75,2.452555,15.382857
75%,2020.0,39.0,6.504304,5.111601,25.430698
max,2021.0,53.0,915.0,60.0,30.1


In [None]:
for country in df_weather['CTRY_1'].unique():
    if country == "":
        continue
    print()
    print(country, "총", len(df_weather[df_weather['CTRY_1'] == country]), "개")
    check_week(df_weather[df_weather['CTRY_1'] == country])


노르웨이 총 236 개
missing 0 values

태국 총 297 개
(2016, 11)
missing 1 values

베트남 총 298 개
(2016, 6)
missing 1 values

중국 총 299 개
missing 0 values

페루 총 7 개
(2015, 53)(2016, 1)(2016, 2)(2016, 3)(2016, 4)(2016, 5)(2016, 6)(2016, 7)(2016, 8)(2016, 9)(2016, 10)(2016, 11)(2016, 12)(2016, 13)(2016, 14)(2016, 15)(2016, 16)(2016, 17)(2016, 18)(2016, 19)(2016, 20)(2016, 21)(2016, 22)(2016, 23)(2016, 24)(2016, 25)(2016, 26)(2016, 27)(2016, 28)(2016, 29)(2016, 30)(2016, 31)(2016, 32)(2016, 33)(2016, 34)(2016, 35)(2016, 36)(2016, 37)(2016, 38)(2016, 39)(2016, 40)(2016, 41)(2016, 42)(2016, 43)(2016, 44)(2016, 45)(2016, 46)(2016, 47)(2016, 48)(2016, 49)(2016, 51)(2016, 52)(2017, 1)(2017, 2)(2017, 3)(2017, 4)(2017, 5)(2017, 6)(2017, 7)(2017, 8)(2017, 9)(2017, 10)(2017, 12)(2017, 13)(2017, 14)(2017, 15)(2017, 16)(2017, 17)(2017, 18)(2017, 19)(2017, 20)(2017, 21)(2017, 22)(2017, 23)(2017, 24)(2017, 25)(2017, 26)(2017, 27)(2017, 28)(2017, 29)(2017, 30)(2017, 31)(2017, 32)(2017, 33)(2017, 34)(2017, 35)(2017, 3

## Filling Missing Values

- 노르웨이, 중국은 결측치 없음
- 태국, 베트남, 칠레는 결측치 1개
> 전/차주 데이터 평균으로 채워넣기
- 페루는 결측치 299개
> 사용 불가

In [None]:
def get_avg(year, week, country, value):
  return (df_weather[(df_weather['year'] == year) & (df_weather['week'] == week + 1) & (df_weather['CTRY_1'] == country)].iloc[0][value] 
          + df_weather[(df_weather['year'] == year) & (df_weather['week'] == week - 1) & (df_weather['CTRY_1'] == country)].iloc[0][value]) / 2

In [None]:
df_weather = df_weather.append({'year': 2016,
                                'week': 11,
                                'CTRY_1': '태국',
                                'rain': get_avg(2016, 11, '태국', 'rain'),
                                'wind': get_avg(2016, 11, '태국', 'wind'),
                                'temperature': get_avg(2016, 11, '태국', 'temperature')},
                               ignore_index=True)
df_weather = df_weather.append({'year': 2016,
                                'week': 6,
                                'CTRY_1': '베트남',
                                'rain': get_avg(2016, 6, '베트남', 'rain'),
                                'wind': get_avg(2016, 6, '베트남', 'wind'),
                                'temperature': get_avg(2016, 6, '베트남', 'temperature')},
                               ignore_index=True)
df_weather = df_weather.append({'year': 2016,
                                'week': 5,
                                'CTRY_1': '칠레',
                                'rain': get_avg(2016, 5, '칠레', 'rain'),
                                'wind': get_avg(2016, 5, '칠레', 'wind'),
                                'temperature': get_avg(2016, 5, '칠레', 'temperature')},
                               ignore_index=True)

for country in ["태국", "베트남", "칠레"]:
    print()
    print(country, "총", len(df_weather[df_weather['CTRY_1'] == country]), "개")
    check_week(df_weather[df_weather['CTRY_1'] == country])


태국 총 298 개
missing 0 values

베트남 총 299 개
missing 0 values

칠레 총 298 개
missing 0 values


## Add to Training Data

In [None]:
df_train = pd.merge(df_train, df_weather, how='left', on=['year', 'week', 'CTRY_1'])

---

# Salinity

## Import Data

In [None]:
# df_salinity = pd.read_csv(os.path.join(root, 'raw_salinity.csv'))

## Drop Columns & Rows

In [None]:
# df_salinity.drop(df_salinity.columns[2], inplace=True, axis=1)
# df_salinity = df_salinity[(2015 <= df_salinity['obs_year']) & (df_salinity['obs_year'] <= 2021)]

2020, 2021 데이터의 부재로 인해 보류

---

# Oil

## Import Data

In [None]:
df_oil = pd.read_csv(os.path.join(root, 'raw_oil.csv'), usecols=["날짜", "종가"])
df_oil_dubai = pd.read_csv(os.path.join(root, 'raw_oil_dubai.csv'), usecols=["날짜", "종가"])
df_oil_brent = pd.read_csv(os.path.join(root, 'raw_oil_brent.csv'), usecols=["날짜", "종가"])

## Preprocess

In [None]:
from datetime import datetime
def preprocess_oil(df):
    df['date'] = pd.to_datetime(df['날짜'].str[:4] + df['날짜'].str[6:8] + df['날짜'].str[10:12])
    set_week(df, 'date')  # 날짜 데이터 정리
    df['oil'] = df[['year', 'week', '종가']].groupby(['year', 'week']).transform('mean')
    df.drop(columns = ['날짜', '종가'], inplace=True, axis=1)
    df.drop_duplicates(inplace=True)
    return df

In [None]:
df_oil = preprocess_oil(df_oil)
df_oil_dubai = preprocess_oil(df_oil_dubai)
df_oil_brent = preprocess_oil(df_oil_brent)

## Check

In [None]:
for i in range(1, 53):
  if i not in (list(df_oil_brent[df_oil_brent['year'] == 2019].sort_values(by=['year', 'week'])['week'])):
    print(i)

In [None]:
check_week(df_oil)
check_week(df_oil_dubai)
check_week(df_oil_brent)

missing 0 values
missing 0 values
missing 0 values


모든 데이터가 결측치는 없지만,  
df_oil 에 해당하는 wti 종가가 가장 예민하게 반응하는 값이므로  
이를 사용하도록 한다

## Add to Training Data

In [None]:
df_train = pd.merge(df_train, df_oil, how='left', on=['year', 'week'])

---

# Korea Weather

## Import Data

In [None]:
df_weather_kr = pd.read_csv(os.path.join(root, 'raw_weather_korea.csv'),
                            encoding='euc-kr',
                            usecols=["일시", "평균 풍속(m/s)", "평균 기온(°C)", "평균 수온(°C)"])

## Preprocess Date

In [None]:
df_weather_kr['date'] = pd.to_datetime(df_weather_kr['일시'].str[:4] + df_weather_kr['일시'].str[5:7] + df_weather_kr['일시'].str[8:])
set_week(df_weather_kr, 'date')

In [None]:
df_weather_kr.describe()

Unnamed: 0,평균 풍속(m/s),평균 기온(°C),평균 수온(°C),year,week
count,35181.0,35107.0,35300.0,35558.0,35558.0
mean,5.732987,14.845689,16.884377,2018.369143,25.531554
std,2.705727,7.573504,5.945493,1.627939,15.146644
min,0.0,-11.1,1.1,2015.0,1.0
25%,3.6,8.8,13.0,2017.0,13.0
50%,5.4,15.1,16.6,2018.0,24.0
75%,7.5,21.0,21.4,2020.0,39.0
max,19.6,34.6,31.5,2021.0,53.0


## Group
일자 별로 평균 구하기

In [None]:
df_weather_kr['wind_kr'] = df_weather_kr[['year', 'week', '평균 풍속(m/s)']].groupby(['year', 'week']).transform('mean')
df_weather_kr['temperature_kr'] = df_weather_kr[['year', 'week', '평균 기온(°C)']].groupby(['year', 'week']).transform('mean')
df_weather_kr['water_temp_kr'] = df_weather_kr[['year', 'week', '평균 수온(°C)']].groupby(['year', 'week']).transform('mean')

## Drop Column

In [None]:
drop = ['일시', '평균 풍속(m/s)', '평균 기온(°C)', '평균 수온(°C)']

In [None]:
df_weather_kr.drop(columns=drop, inplace=True, axis=1)
df_weather_kr.drop_duplicates(inplace=True)

## Check

In [None]:
check_week(df_weather_kr)

missing 0 values


In [None]:
df_weather_kr.describe()

Unnamed: 0,year,week,wind_kr,temperature_kr,water_temp_kr
count,288.0,288.0,288.0,288.0,288.0
mean,2018.267361,25.510417,5.723881,14.920397,16.953522
std,1.612682,15.112121,1.340964,7.177077,5.321755
min,2015.0,1.0,2.706723,-0.529915,8.389899
25%,2017.0,12.75,4.763866,8.648878,12.088782
50%,2018.0,24.5,5.668908,14.792355,16.279115
75%,2020.0,38.25,6.65641,21.312062,21.471801
max,2021.0,53.0,9.819643,28.610924,28.495798


## Add to Training Data

In [None]:
df_train = pd.merge(df_train, df_weather_kr, how='left', on=['year', 'week'])

---

# CPI

## Import Data

In [None]:
df_cpi_total = pd.read_csv(os.path.join(root, 'raw_cpi_total.csv'), encoding='cp949')  # 총 소비자물가지수
df_cpi_fish =  pd.read_csv(os.path.join(root, 'raw_cpi_fish.csv'), encoding='cp949')  # 수산물 소비자물가지수

## Drop Columns/Rows

In [None]:
df_cpi_total = df_cpi_total[df_cpi_total['지출목적별'] == "0 총지수"].drop(columns=['시도별', '지출목적별'], axis=1)
df_cpi_fish = df_cpi_fish[df_cpi_fish['지출목적별'] == "어류 및 수산"].drop(columns=['Unnamed: 0', '지출목적별'], axis=1)

## Pivoting

In [None]:
df_cpi_total = df_cpi_total.transpose().reset_index()
df_cpi_fish = df_cpi_fish.transpose().reset_index()

## Rename

In [None]:
df_cpi_total.rename(columns={0: 'cpi_total'}, inplace=True)
df_cpi_fish.rename(columns={0: 'cpi_fish'}, inplace=True)
print(df_cpi_total.columns, df_cpi_fish.columns)

Index(['index', 'cpi_total'], dtype='object') Index(['index', 'cpi_fish'], dtype='object')


## Preprocess Date

In [None]:
df_cpi_total['year'] = df_cpi_total['index'].str[:4].astype('int')
df_cpi_total['month'] = df_cpi_total['index'].str[6:8].astype('int')
df_cpi_fish['year'] = df_cpi_fish['index'].str[:4].astype('int')
df_cpi_fish['month'] = df_cpi_fish['index'].str[6:8].astype('int')

In [None]:
df_cpi_total.drop(columns=['index'], inplace=True)
df_cpi_fish.drop(columns=['index'], inplace=True)

In [None]:
# Merge df
df_cpi = pd.merge(left=df_cpi_total, right=df_cpi_fish, how='outer', on=['year', 'month'])

In [None]:
# create days by duplicating data
new_df = pd.DataFrame(columns=list(df_cpi.columns) + ['day'])
for i, row in df_cpi.iterrows():
  data = df_cpi.iloc[i]
  year, month, cpi_total, cpi_fish = data['year'].astype('int'), data['month'].astype('int'), data['cpi_total'], data['cpi_fish']
  for day in range(1, monthrange(year, month)[1]+1):
    new_df = new_df.append({'year': str(year),
                            'month': str(month),
                            'day': str(day),
                            'cpi_total': cpi_total,
                            'cpi_fish': cpi_fish},
                           ignore_index=True)
df_cpi = new_df

In [None]:
df_cpi['date'] = df_cpi['year'] + " " + df_cpi['month'] + " " + df_cpi['day']

In [None]:
set_week(df_cpi, 'date')

## 정리
- drop unused columns
- drop duplicates

In [None]:
df_cpi.drop(columns=['month', 'day'], inplace=True)
df_cpi.drop_duplicates(inplace=True)

## Check

In [None]:
check_week(df_cpi)

missing 0 values


In [None]:
df_cpi.describe()

Unnamed: 0,cpi_total,year,cpi_fish,week
count,348.0,353.0,353.0,353.0
mean,103.977759,2018.254958,112.0683,26.067989
std,1.901079,1.659215,6.449167,15.191427
min,100.22,2015.0,100.76,1.0
25%,102.72,2017.0,107.37,13.0
50%,104.35,2018.0,111.96,26.0
75%,105.46,2020.0,118.44,39.0
max,107.46,2021.0,122.01,53.0


In [None]:
df_cpi.sample(5)

Unnamed: 0,cpi_total,year,cpi_fish,week
1707,105.5,2020,118.3,32
1994,107.46,2021,122.01,20
670,103.39,2017,108.02,39
1784,105.61,2020,120.0,43
272,100.86,2016,101.69,35


## Add to Training Data

In [None]:
df_train = pd.merge(df_train, df_cpi, how='left', on=['year', 'week'])

---

# Exchange Rate

In [None]:
exchange_list = [pd.read_csv(os.path.join(root, 'raw_exchange_chile.csv'), usecols=["날짜", "종가"]),
                 pd.read_csv(os.path.join(root, 'raw_exchange_china.csv'), usecols=["날짜", "종가"]),
                 pd.read_csv(os.path.join(root, 'raw_exchange_norway.csv'), usecols=["날짜", "종가"]),
                 pd.read_csv(os.path.join(root, 'raw_exchange_peru.csv'), usecols=["날짜", "종가"]),
                 pd.read_csv(os.path.join(root, 'raw_exchange_thai.csv'), usecols=["날짜", "종가"]),
                 pd.read_csv(os.path.join(root, 'raw_exchange_vietnam.csv'), usecols=["날짜", "종가"])]

In [None]:
ctry_name = ['칠레', '중국', '노르웨이', '페루', '태국', '베트남']
for i in range(len(exchange_list)):
  set_week(exchange_list[i], '날짜')
  check_week(exchange_list[i])
  exchange_list[i]['CTRY_2'] = ctry_name[i]

missing 0 values
missing 0 values
missing 0 values
missing 0 values
missing 0 values
missing 0 values


In [None]:
df_exchange = pd.concat(exchange_list)

## Rename

In [None]:
df_exchange.rename(columns={"종가": 'exchange'}, inplace=True)

## Add to Training Data

In [None]:
df_train = pd.merge(df_train, df_exchange, how='left', on=['year', 'week', 'CTRY_2'])

In [None]:
pd.set_option("display.max_columns", None)
df_train.sample(5)

Unnamed: 0,P_TYPE,CTRY_1,CTRY_2,P_PURPOSE,CATEGORY_1,CATEGORY_2,P_NAME,P_IMPORT_TYPE,P_PRICE,살,염장,창난,알,냉동,눈살,줄기,캐비아대용,훈제,머리_외화획득용,내장,머리살,건조,곤이,난포선,횟감,한쪽껍질붙은,지느러미,자숙,꼬리_외화획득용,목살,턱살,슬라이스(S),필렛(F),외투막,머리,간,포장횟감,다리,볼살,집게다리,활,절단,동체,냉장,개아지살,껍질,year,week,name_cnt,is_processed,import_cnt,rain,wind,temperature,oil,wind_kr,temperature_kr,water_temp_kr,cpi_total,cpi_fish,exchange
36518,수산물,베트남,베트남,판매용,연체류 해물모듬,오징어,화살오징어,"냉동,슬라이스(S),포장횟감",14.473092,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2018,4,512,False,4,4.509184,1.693878,22.6,65.044,8.358621,-0.529915,10.188235,103.42,110.32,4.4e-05
26797,수산물,남아프리카 공화국,남아프리카 공화국,판매용,어류,갈치,갈치,냉동,2.73852,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2017,28,952,False,1,,,,45.51,4.886555,24.300877,22.747899,102.78,106.7,
54264,수산물,베트남,베트남,판매용,어류,팡가시우스(홍메기),PANGASIUS메기,"냉동,포장횟감,필렛(F)",4.92912,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,2019,3,102,False,4,6.563415,1.804878,20.641463,52.408333,6.634821,5.832143,11.866964,104.24,111.64,4.3e-05
40657,수산물,러시아,러시아,자사제품제조용,갑각류,게,대게,냉장,2.674419,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2018,15,603,False,1,,,,66.042,6.352174,12.049573,11.751261,104.29,112.36,
4986,수산물,중국,홍콩,판매용,어류,부세,부세,냉동,4.745993,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2016,16,279,True,1,3.358836,2.327586,15.135776,42.08,4.866387,13.245378,12.982353,100.75,102.73,


# Final

## One-hot Encoding

In [None]:
#one_hot = ['CTRY_1''CTRY_2', 'P_PURPOSE', 'CATEGORY_1', 'CATEGORY_2', 'P_NAME']
one_hot = ['CTRY_1','P_PURPOSE', 'CATEGORY_1', 'CATEGORY_2', 'P_NAME']

In [None]:
df_train = pd.get_dummies(df_train, columns=one_hot)

## Drop Columns

In [None]:
drop = ['P_TYPE', 'P_IMPORT_TYPE']

In [None]:
df_train.drop(columns = drop, inplace=True)

In [None]:
pd.set_option("display.max_columns", None)
df_train.sample(5)

Unnamed: 0,CTRY_2,P_PRICE,살,염장,창난,알,냉동,눈살,줄기,캐비아대용,훈제,머리_외화획득용,내장,머리살,건조,곤이,난포선,횟감,한쪽껍질붙은,지느러미,자숙,꼬리_외화획득용,목살,턱살,슬라이스(S),필렛(F),외투막,머리,간,포장횟감,다리,볼살,집게다리,활,절단,동체,냉장,개아지살,껍질,year,week,name_cnt,is_processed,import_cnt,rain,wind,temperature,oil,wind_kr,temperature_kr,water_temp_kr,cpi_total,cpi_fish,exchange,CTRY_1_가나,CTRY_1_감비아,CTRY_1_그리스,CTRY_1_그린란드,CTRY_1_기니,CTRY_1_기니비사우,CTRY_1_나미비아,CTRY_1_남아프리카 공화국,CTRY_1_네덜란드,CTRY_1_노르웨이,CTRY_1_뉴질랜드,CTRY_1_니카라과,CTRY_1_대만,CTRY_1_대한민국,CTRY_1_라이베리아,CTRY_1_라트비아,CTRY_1_러시아,CTRY_1_루마니아,CTRY_1_리비아,CTRY_1_마다가스카르,CTRY_1_말레이시아,CTRY_1_멕시코,CTRY_1_모로코,CTRY_1_모리타니,CTRY_1_모잠비크,CTRY_1_몰타,CTRY_1_미국,CTRY_1_미얀마,CTRY_1_미크로네시아 연방,CTRY_1_바누아투,CTRY_1_바레인,CTRY_1_방글라데시,CTRY_1_베네수엘라,CTRY_1_베트남,CTRY_1_불가리아,CTRY_1_브라질,CTRY_1_사우디아라비아,CTRY_1_사이프러스,CTRY_1_세네갈,CTRY_1_세이셸,CTRY_1_세인트빈센트 그레나딘,CTRY_1_소말리아,CTRY_1_수리남,CTRY_1_스리랑카,CTRY_1_스페인,CTRY_1_시에라리온,CTRY_1_싱가포르,CTRY_1_아랍에미리트,CTRY_1_아르헨티나,CTRY_1_아이슬란드,CTRY_1_아일랜드,CTRY_1_알제리,CTRY_1_앙골라,CTRY_1_에스토니아,CTRY_1_에콰도르,CTRY_1_영국,CTRY_1_오만,CTRY_1_우루과이,CTRY_1_우크라이나,CTRY_1_이란,CTRY_1_이집트,CTRY_1_이탈리아,CTRY_1_인도,CTRY_1_인도네시아,CTRY_1_일본,CTRY_1_중국,CTRY_1_칠레,CTRY_1_캐나다,CTRY_1_콜롬비아,CTRY_1_쿠바,CTRY_1_쿡 제도,CTRY_1_크로아티아,CTRY_1_키리바시,CTRY_1_태국,CTRY_1_터키,CTRY_1_튀니지,CTRY_1_파나마,CTRY_1_파키스탄,CTRY_1_파푸아뉴기니,CTRY_1_팔라우,CTRY_1_페루,CTRY_1_포르투갈,CTRY_1_포클랜드 제도,CTRY_1_프랑스,CTRY_1_피지,CTRY_1_필리핀,CTRY_1_호주,P_PURPOSE_반송품(기타),P_PURPOSE_외화획득용 원료,P_PURPOSE_자사제품제조용,P_PURPOSE_판매용,P_PURPOSE_합작,CATEGORY_1_갑각류,CATEGORY_1_기타 수입식품,CATEGORY_1_알 곤이류,CATEGORY_1_어류,CATEGORY_1_연체류 해물모듬,CATEGORY_1_젓갈류 해조류 해파리,CATEGORY_1_패류 멍게류,CATEGORY_2_가리비,CATEGORY_2_가물치,CATEGORY_2_가사리,CATEGORY_2_가오리,CATEGORY_2_가자미,CATEGORY_2_가재 랍스타,CATEGORY_2_갈치,CATEGORY_2_갑오징어,CATEGORY_2_개복치,CATEGORY_2_개불,CATEGORY_2_게,CATEGORY_2_고동,CATEGORY_2_고등어,CATEGORY_2_고시래기,CATEGORY_2_골뱅이,CATEGORY_2_광어 넙치,CATEGORY_2_김,CATEGORY_2_꼬막,CATEGORY_2_꼴뚜기,CATEGORY_2_꽁치 학꽁치,CATEGORY_2_꽁치 확꽁치,CATEGORY_2_낙지,CATEGORY_2_날치알,CATEGORY_2_남극빙어,CATEGORY_2_노래미,CATEGORY_2_농어,CATEGORY_2_능성어 붉바리 바리,CATEGORY_2_다시마,CATEGORY_2_달고기,CATEGORY_2_대구,CATEGORY_2_대구알,CATEGORY_2_도다리,CATEGORY_2_도미 감성돔 돔류,CATEGORY_2_망둑어,CATEGORY_2_멍게,CATEGORY_2_메기 동자개,CATEGORY_2_메로,CATEGORY_2_멸치,CATEGORY_2_명란(명태알),CATEGORY_2_명태,CATEGORY_2_문어,CATEGORY_2_물메기(곰치),CATEGORY_2_미꾸라지,CATEGORY_2_미역,CATEGORY_2_민물붕어,CATEGORY_2_민어 점성어,CATEGORY_2_밀크피시,CATEGORY_2_바지락,CATEGORY_2_방어,CATEGORY_2_밴댕이,CATEGORY_2_버터플라이 킹피쉬,CATEGORY_2_벤자리 알롱이,CATEGORY_2_병어,CATEGORY_2_보리멸,CATEGORY_2_복어,CATEGORY_2_부세,CATEGORY_2_붉평치(만다이 꽃돔),CATEGORY_2_삼치,CATEGORY_2_상어 고래,CATEGORY_2_새우,CATEGORY_2_샛돔류알,CATEGORY_2_서대 박대 페루다,CATEGORY_2_성게알,CATEGORY_2_소라,CATEGORY_2_송어,CATEGORY_2_쏘가리,CATEGORY_2_아귀,CATEGORY_2_양미리 정어리,CATEGORY_2_양태,CATEGORY_2_어류 기타,CATEGORY_2_연어,CATEGORY_2_연어알,CATEGORY_2_열빙어(시샤모),CATEGORY_2_열빙어(시샤모)알,CATEGORY_2_오징어,CATEGORY_2_옥돔,CATEGORY_2_우럭 볼락,CATEGORY_2_우렁 다슬기,CATEGORY_2_은민대구알,CATEGORY_2_임연수,CATEGORY_2_잉어,CATEGORY_2_자라,CATEGORY_2_장어,CATEGORY_2_재첩,CATEGORY_2_적어 눈볼대,CATEGORY_2_전갱기 매가리,CATEGORY_2_전갱이 매가리,CATEGORY_2_전복,CATEGORY_2_전어,CATEGORY_2_조개,CATEGORY_2_조개 백합 대합,CATEGORY_2_조기 보구치 강다리,CATEGORY_2_조기 보구치 강다리.1,CATEGORY_2_준치,CATEGORY_2_줄비늘치,CATEGORY_2_쥐치,CATEGORY_2_쭈꾸미,CATEGORY_2_참치 새치류,CATEGORY_2_참치 새치류.1,CATEGORY_2_청어,CATEGORY_2_청어알,CATEGORY_2_톳,CATEGORY_2_틸라피아(역돔),CATEGORY_2_팡가시우스(홍메기),CATEGORY_2_해물모둠,CATEGORY_2_해삼,CATEGORY_2_해초,CATEGORY_2_해파리,CATEGORY_2_호끼류,CATEGORY_2_호키(새꼬리민태)알,CATEGORY_2_호키류,CATEGORY_2_홍어,CATEGORY_2_홍합,P_NAME_PANGASIUS메기,P_NAME_가다랑어,P_NAME_가라지,P_NAME_가리비,P_NAME_가무락조개,P_NAME_가물치,P_NAME_가시배새우,P_NAME_가시투성왕게,P_NAME_가오리,P_NAME_가이석태속,P_NAME_가자미,P_NAME_각시가자미,P_NAME_갈치,P_NAME_감성돔,P_NAME_갑오징어,P_NAME_강담돔,P_NAME_강도다리,P_NAME_개량조개,P_NAME_개복치,P_NAME_개불,P_NAME_개조개,P_NAME_갯고둥,P_NAME_갯장어,P_NAME_검복,P_NAME_검정가자미,P_NAME_검정볼락,P_NAME_게,P_NAME_고등어,P_NAME_골뱅이,P_NAME_곱사연어,P_NAME_곱상어,P_NAME_구라미,P_NAME_귀상어,P_NAME_금눈돔,P_NAME_금색돔,P_NAME_기름치,P_NAME_기타민어류,P_NAME_기타병어류,P_NAME_긴가이석태,P_NAME_김,P_NAME_까지가자미,P_NAME_까치복,P_NAME_까칠복,P_NAME_깜장북방대합,P_NAME_꼬리검정민태,P_NAME_꼬막,P_NAME_꼬시래기,P_NAME_꽁치,P_NAME_꽃게,P_NAME_낙지,P_NAME_날개다랑어,P_NAME_날치알,P_NAME_남방대구,P_NAME_남방참다랑어,P_NAME_넙치,P_NAME_녹새치,P_NAME_논고둥,P_NAME_농어,P_NAME_눈다랑어,P_NAME_능성어,P_NAME_다슬기,P_NAME_다시마,P_NAME_달고기,P_NAME_닭새우,P_NAME_대게,P_NAME_대구,P_NAME_대구알,P_NAME_대두이석태,P_NAME_대서양꼬마민어,P_NAME_대서양먹장어,P_NAME_대서양붉은볼락,P_NAME_대서양연어,P_NAME_대서양조기속,P_NAME_대서양참다랑어,P_NAME_던지네스게,P_NAME_도화새우,P_NAME_돌가사리,P_NAME_돌가자미,P_NAME_돌돔,P_NAME_돔,P_NAME_동갈돗돔,P_NAME_동갈횟대,P_NAME_동등이석태,P_NAME_동자개,P_NAME_동죽,P_NAME_돛새치,P_NAME_두점박이민꽃게,P_NAME_드렁허리,P_NAME_등목어,P_NAME_마설가자미,P_NAME_마소치가자미,P_NAME_맛조개,P_NAME_망둑어,P_NAME_매듭가자미,P_NAME_매미새우,P_NAME_먹볼락,P_NAME_먹장어,P_NAME_멍게,P_NAME_메기,P_NAME_멸치,P_NAME_명태,P_NAME_명태알,P_NAME_문어,P_NAME_물메기,P_NAME_미꾸라지,P_NAME_미역,P_NAME_민꽃게,P_NAME_민대구,P_NAME_민들조개,P_NAME_민물가재,P_NAME_민물새우,P_NAME_민밀복,P_NAME_민어,P_NAME_민태,P_NAME_밀크피시,P_NAME_바다가재,P_NAME_바닷가재,P_NAME_바라문디,P_NAME_바리,"P_NAME_바리,교잡종",P_NAME_바지락,P_NAME_밤색무늬조개,P_NAME_방어,P_NAME_백합,"P_NAME_백합,MERCENARIA MERCENARIA",P_NAME_밴댕이,P_NAME_뱀장어,P_NAME_버들붕어,P_NAME_버터플라이 킹피쉬,P_NAME_벤자리,P_NAME_벵에돔,P_NAME_병어,P_NAME_병치매가리,P_NAME_보리멸,P_NAME_볼락,P_NAME_부세,P_NAME_북방대합,P_NAME_북쪽분홍새우,P_NAME_붉돔,P_NAME_붉은대게,P_NAME_붉은메기,P_NAME_붉은이석태,P_NAME_붉평치,P_NAME_붕어,P_NAME_붕장어,P_NAME_블루화이팅,P_NAME_비너스백합,P_NAME_비단조개,P_NAME_뿔가자미,P_NAME_삼치,P_NAME_상어,P_NAME_새꼬리민태알,P_NAME_새꼬막,P_NAME_새뱅이,P_NAME_새우,P_NAME_새조개,P_NAME_샛돔,P_NAME_샛돔류알,P_NAME_서대,P_NAME_성게알,P_NAME_세네갈가이석태,P_NAME_소라,P_NAME_소주목탁가자미,P_NAME_송어,P_NAME_수조기,P_NAME_스피노잠,P_NAME_식용자라,P_NAME_실꼬리돔,P_NAME_쌍지붕어,P_NAME_쏘가리,P_NAME_아귀,P_NAME_아담스백합,P_NAME_아르헨티나붉은새우,P_NAME_양볼락,P_NAME_양초선홍치,P_NAME_양태,P_NAME_어름돔,P_NAME_얼룩볼락,P_NAME_연어,P_NAME_연어알,P_NAME_열빙어,P_NAME_열빙어알,P_NAME_영상가이석태,P_NAME_오징어,P_NAME_옥덩굴,P_NAME_옥돔,P_NAME_옥두어,P_NAME_왕게,P_NAME_왕게붙이,P_NAME_왕연어,P_NAME_우뭇가사리,P_NAME_위고둥,P_NAME_유럽물레고둥,P_NAME_은대구,P_NAME_은민대구,P_NAME_은민대구알,P_NAME_은밀복,P_NAME_은연어,P_NAME_은행게,P_NAME_이스라엘잉어,P_NAME_임연수어,P_NAME_잉어,P_NAME_자바리,P_NAME_자이언트그루퍼,P_NAME_자주복,P_NAME_장문볼락,P_NAME_장성베도라치,P_NAME_장수기름가자미,P_NAME_재첩,P_NAME_잿방어,P_NAME_적돔,P_NAME_적새우,P_NAME_전갱이,"P_NAME_전갱이,POMPANO",P_NAME_전복,P_NAME_전어,P_NAME_젓새우,P_NAME_정어리,P_NAME_조피볼락,P_NAME_주꾸미,P_NAME_주름백합,P_NAME_준치,P_NAME_줄민태,P_NAME_줄비늘치,P_NAME_쥐노래미,P_NAME_쥐돔,P_NAME_쥐치,P_NAME_진홍퉁돔,P_NAME_진환도상어,P_NAME_참게,P_NAME_참다랑어,P_NAME_참돔,P_NAME_참복,P_NAME_참조기,P_NAME_첨치가자미,P_NAME_청각,P_NAME_청대구,P_NAME_청상아리,P_NAME_청새리상어,P_NAME_청새치,P_NAME_청어,P_NAME_청어알,P_NAME_청회볼락,P_NAME_체장메기,P_NAME_칠성장어,P_NAME_코끼리조개,P_NAME_코드아이스피쉬,P_NAME_코토니,P_NAME_쿠자조기,P_NAME_큰구슬우렁이,P_NAME_큰민어,P_NAME_큰실말,P_NAME_큰징거미새우,P_NAME_키조개,P_NAME_태평양먹장어,P_NAME_털게,P_NAME_톱날꽃게,P_NAME_톳,P_NAME_틸라피아,P_NAME_파타고니아이빨고기,P_NAME_프로펠러조개,P_NAME_피뿔고둥,P_NAME_피조개,P_NAME_학공치,P_NAME_해물혼합,P_NAME_해삼,P_NAME_해파리,P_NAME_홍감펭,P_NAME_홍다리얼룩새우,P_NAME_홍민어,P_NAME_홍서대,P_NAME_홍어,"P_NAME_홍연어,Red salmon",P_NAME_홍합,P_NAME_화살오징어,P_NAME_황다랑어,P_NAME_황돔,P_NAME_황새치,P_NAME_황적퉁돔,P_NAME_회초리꼬리민태,P_NAME_흑기흉상어,P_NAME_흑밀복,P_NAME_흑점샛돔알,P_NAME_흑점줄전갱이,P_NAME_흰꼴뚜기,P_NAME_흰다리새우
18696,중국,2.854577,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2017,4,181,False,1,0.695139,3.166667,1.746528,53.126,6.455556,5.008475,11.236283,102.64,107.37,0.1459,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2068,러시아,3.24999,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2016,6,393,False,2,,,,28.301667,7.273636,7.910185,10.033945,100.82,102.3,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
35765,중국,5.700007,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2018,2,209,False,1,2.229282,3.381215,-0.330387,63.44,8.222321,2.664286,10.999107,103.42,110.32,0.1538,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
55892,사우디아라비아,6.2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019,8,2601,False,1,,,,56.7,5.435294,7.351261,10.886555,104.69,111.65,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
25317,페루,1.278726,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2017,24,2150,False,2,,,,45.294,4.172269,19.455932,19.071429,102.61,108.78,0.3049,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Save Files

In [None]:
# def save_file(df, file_name):
#     df.to_csv(os.path.join(root, file_name), encoding='utf-8', index=False)

In [None]:
# save_file(df_weather, 'preprocessed_weather.csv')  # 제조국 날씨
# save_file(df_oil, 'preprocessed_oil.csv')  # 원유 종가
# save_file(df_weather_kr, 'preprocessed_weather_korea.csv')  # 한국 날씨
# save_file(df_cpi, 'preprocessed_cpi.csv')  # 소비자물가지수
# save_file(df_exchange, 'preprocessed_exchange.csv')  # 환율
# save_file(df_train, 'preprocessed_train.csv')  # 최종 df

# 주요 CTRY DataFrame

In [None]:
df_train.head()

Unnamed: 0,CTRY_2,P_PRICE,살,염장,창난,알,냉동,눈살,줄기,캐비아대용,훈제,머리_외화획득용,내장,머리살,건조,곤이,난포선,횟감,한쪽껍질붙은,지느러미,자숙,꼬리_외화획득용,목살,턱살,슬라이스(S),필렛(F),외투막,머리,간,포장횟감,다리,볼살,집게다리,활,절단,동체,냉장,개아지살,껍질,year,week,name_cnt,is_processed,import_cnt,rain,wind,temperature,oil,wind_kr,temperature_kr,water_temp_kr,cpi_total,cpi_fish,exchange,CTRY_1_가나,CTRY_1_감비아,CTRY_1_그리스,CTRY_1_그린란드,CTRY_1_기니,CTRY_1_기니비사우,CTRY_1_나미비아,CTRY_1_남아프리카 공화국,CTRY_1_네덜란드,CTRY_1_노르웨이,CTRY_1_뉴질랜드,CTRY_1_니카라과,CTRY_1_대만,CTRY_1_대한민국,CTRY_1_라이베리아,CTRY_1_라트비아,CTRY_1_러시아,CTRY_1_루마니아,CTRY_1_리비아,CTRY_1_마다가스카르,CTRY_1_말레이시아,CTRY_1_멕시코,CTRY_1_모로코,CTRY_1_모리타니,CTRY_1_모잠비크,CTRY_1_몰타,CTRY_1_미국,CTRY_1_미얀마,CTRY_1_미크로네시아 연방,CTRY_1_바누아투,CTRY_1_바레인,CTRY_1_방글라데시,CTRY_1_베네수엘라,CTRY_1_베트남,CTRY_1_불가리아,CTRY_1_브라질,CTRY_1_사우디아라비아,CTRY_1_사이프러스,CTRY_1_세네갈,CTRY_1_세이셸,CTRY_1_세인트빈센트 그레나딘,CTRY_1_소말리아,CTRY_1_수리남,CTRY_1_스리랑카,CTRY_1_스페인,CTRY_1_시에라리온,CTRY_1_싱가포르,CTRY_1_아랍에미리트,CTRY_1_아르헨티나,CTRY_1_아이슬란드,CTRY_1_아일랜드,CTRY_1_알제리,CTRY_1_앙골라,CTRY_1_에스토니아,CTRY_1_에콰도르,CTRY_1_영국,CTRY_1_오만,CTRY_1_우루과이,CTRY_1_우크라이나,CTRY_1_이란,CTRY_1_이집트,CTRY_1_이탈리아,CTRY_1_인도,CTRY_1_인도네시아,CTRY_1_일본,CTRY_1_중국,CTRY_1_칠레,CTRY_1_캐나다,CTRY_1_콜롬비아,CTRY_1_쿠바,CTRY_1_쿡 제도,CTRY_1_크로아티아,CTRY_1_키리바시,CTRY_1_태국,CTRY_1_터키,CTRY_1_튀니지,CTRY_1_파나마,CTRY_1_파키스탄,CTRY_1_파푸아뉴기니,CTRY_1_팔라우,CTRY_1_페루,CTRY_1_포르투갈,CTRY_1_포클랜드 제도,CTRY_1_프랑스,CTRY_1_피지,CTRY_1_필리핀,CTRY_1_호주,P_PURPOSE_반송품(기타),P_PURPOSE_외화획득용 원료,P_PURPOSE_자사제품제조용,P_PURPOSE_판매용,P_PURPOSE_합작,CATEGORY_1_갑각류,CATEGORY_1_기타 수입식품,CATEGORY_1_알 곤이류,CATEGORY_1_어류,CATEGORY_1_연체류 해물모듬,CATEGORY_1_젓갈류 해조류 해파리,CATEGORY_1_패류 멍게류,CATEGORY_2_가리비,CATEGORY_2_가물치,CATEGORY_2_가사리,CATEGORY_2_가오리,CATEGORY_2_가자미,CATEGORY_2_가재 랍스타,CATEGORY_2_갈치,CATEGORY_2_갑오징어,CATEGORY_2_개복치,CATEGORY_2_개불,CATEGORY_2_게,CATEGORY_2_고동,CATEGORY_2_고등어,CATEGORY_2_고시래기,CATEGORY_2_골뱅이,CATEGORY_2_광어 넙치,CATEGORY_2_김,CATEGORY_2_꼬막,CATEGORY_2_꼴뚜기,CATEGORY_2_꽁치 학꽁치,CATEGORY_2_꽁치 확꽁치,CATEGORY_2_낙지,CATEGORY_2_날치알,CATEGORY_2_남극빙어,CATEGORY_2_노래미,CATEGORY_2_농어,CATEGORY_2_능성어 붉바리 바리,CATEGORY_2_다시마,CATEGORY_2_달고기,CATEGORY_2_대구,CATEGORY_2_대구알,CATEGORY_2_도다리,CATEGORY_2_도미 감성돔 돔류,CATEGORY_2_망둑어,CATEGORY_2_멍게,CATEGORY_2_메기 동자개,CATEGORY_2_메로,CATEGORY_2_멸치,CATEGORY_2_명란(명태알),CATEGORY_2_명태,CATEGORY_2_문어,CATEGORY_2_물메기(곰치),CATEGORY_2_미꾸라지,CATEGORY_2_미역,CATEGORY_2_민물붕어,CATEGORY_2_민어 점성어,CATEGORY_2_밀크피시,CATEGORY_2_바지락,CATEGORY_2_방어,CATEGORY_2_밴댕이,CATEGORY_2_버터플라이 킹피쉬,CATEGORY_2_벤자리 알롱이,CATEGORY_2_병어,CATEGORY_2_보리멸,CATEGORY_2_복어,CATEGORY_2_부세,CATEGORY_2_붉평치(만다이 꽃돔),CATEGORY_2_삼치,CATEGORY_2_상어 고래,CATEGORY_2_새우,CATEGORY_2_샛돔류알,CATEGORY_2_서대 박대 페루다,CATEGORY_2_성게알,CATEGORY_2_소라,CATEGORY_2_송어,CATEGORY_2_쏘가리,CATEGORY_2_아귀,CATEGORY_2_양미리 정어리,CATEGORY_2_양태,CATEGORY_2_어류 기타,CATEGORY_2_연어,CATEGORY_2_연어알,CATEGORY_2_열빙어(시샤모),CATEGORY_2_열빙어(시샤모)알,CATEGORY_2_오징어,CATEGORY_2_옥돔,CATEGORY_2_우럭 볼락,CATEGORY_2_우렁 다슬기,CATEGORY_2_은민대구알,CATEGORY_2_임연수,CATEGORY_2_잉어,CATEGORY_2_자라,CATEGORY_2_장어,CATEGORY_2_재첩,CATEGORY_2_적어 눈볼대,CATEGORY_2_전갱기 매가리,CATEGORY_2_전갱이 매가리,CATEGORY_2_전복,CATEGORY_2_전어,CATEGORY_2_조개,CATEGORY_2_조개 백합 대합,CATEGORY_2_조기 보구치 강다리,CATEGORY_2_조기 보구치 강다리.1,CATEGORY_2_준치,CATEGORY_2_줄비늘치,CATEGORY_2_쥐치,CATEGORY_2_쭈꾸미,CATEGORY_2_참치 새치류,CATEGORY_2_참치 새치류.1,CATEGORY_2_청어,CATEGORY_2_청어알,CATEGORY_2_톳,CATEGORY_2_틸라피아(역돔),CATEGORY_2_팡가시우스(홍메기),CATEGORY_2_해물모둠,CATEGORY_2_해삼,CATEGORY_2_해초,CATEGORY_2_해파리,CATEGORY_2_호끼류,CATEGORY_2_호키(새꼬리민태)알,CATEGORY_2_호키류,CATEGORY_2_홍어,CATEGORY_2_홍합,P_NAME_PANGASIUS메기,P_NAME_가다랑어,P_NAME_가라지,P_NAME_가리비,P_NAME_가무락조개,P_NAME_가물치,P_NAME_가시배새우,P_NAME_가시투성왕게,P_NAME_가오리,P_NAME_가이석태속,P_NAME_가자미,P_NAME_각시가자미,P_NAME_갈치,P_NAME_감성돔,P_NAME_갑오징어,P_NAME_강담돔,P_NAME_강도다리,P_NAME_개량조개,P_NAME_개복치,P_NAME_개불,P_NAME_개조개,P_NAME_갯고둥,P_NAME_갯장어,P_NAME_검복,P_NAME_검정가자미,P_NAME_검정볼락,P_NAME_게,P_NAME_고등어,P_NAME_골뱅이,P_NAME_곱사연어,P_NAME_곱상어,P_NAME_구라미,P_NAME_귀상어,P_NAME_금눈돔,P_NAME_금색돔,P_NAME_기름치,P_NAME_기타민어류,P_NAME_기타병어류,P_NAME_긴가이석태,P_NAME_김,P_NAME_까지가자미,P_NAME_까치복,P_NAME_까칠복,P_NAME_깜장북방대합,P_NAME_꼬리검정민태,P_NAME_꼬막,P_NAME_꼬시래기,P_NAME_꽁치,P_NAME_꽃게,P_NAME_낙지,P_NAME_날개다랑어,P_NAME_날치알,P_NAME_남방대구,P_NAME_남방참다랑어,P_NAME_넙치,P_NAME_녹새치,P_NAME_논고둥,P_NAME_농어,P_NAME_눈다랑어,P_NAME_능성어,P_NAME_다슬기,P_NAME_다시마,P_NAME_달고기,P_NAME_닭새우,P_NAME_대게,P_NAME_대구,P_NAME_대구알,P_NAME_대두이석태,P_NAME_대서양꼬마민어,P_NAME_대서양먹장어,P_NAME_대서양붉은볼락,P_NAME_대서양연어,P_NAME_대서양조기속,P_NAME_대서양참다랑어,P_NAME_던지네스게,P_NAME_도화새우,P_NAME_돌가사리,P_NAME_돌가자미,P_NAME_돌돔,P_NAME_돔,P_NAME_동갈돗돔,P_NAME_동갈횟대,P_NAME_동등이석태,P_NAME_동자개,P_NAME_동죽,P_NAME_돛새치,P_NAME_두점박이민꽃게,P_NAME_드렁허리,P_NAME_등목어,P_NAME_마설가자미,P_NAME_마소치가자미,P_NAME_맛조개,P_NAME_망둑어,P_NAME_매듭가자미,P_NAME_매미새우,P_NAME_먹볼락,P_NAME_먹장어,P_NAME_멍게,P_NAME_메기,P_NAME_멸치,P_NAME_명태,P_NAME_명태알,P_NAME_문어,P_NAME_물메기,P_NAME_미꾸라지,P_NAME_미역,P_NAME_민꽃게,P_NAME_민대구,P_NAME_민들조개,P_NAME_민물가재,P_NAME_민물새우,P_NAME_민밀복,P_NAME_민어,P_NAME_민태,P_NAME_밀크피시,P_NAME_바다가재,P_NAME_바닷가재,P_NAME_바라문디,P_NAME_바리,"P_NAME_바리,교잡종",P_NAME_바지락,P_NAME_밤색무늬조개,P_NAME_방어,P_NAME_백합,"P_NAME_백합,MERCENARIA MERCENARIA",P_NAME_밴댕이,P_NAME_뱀장어,P_NAME_버들붕어,P_NAME_버터플라이 킹피쉬,P_NAME_벤자리,P_NAME_벵에돔,P_NAME_병어,P_NAME_병치매가리,P_NAME_보리멸,P_NAME_볼락,P_NAME_부세,P_NAME_북방대합,P_NAME_북쪽분홍새우,P_NAME_붉돔,P_NAME_붉은대게,P_NAME_붉은메기,P_NAME_붉은이석태,P_NAME_붉평치,P_NAME_붕어,P_NAME_붕장어,P_NAME_블루화이팅,P_NAME_비너스백합,P_NAME_비단조개,P_NAME_뿔가자미,P_NAME_삼치,P_NAME_상어,P_NAME_새꼬리민태알,P_NAME_새꼬막,P_NAME_새뱅이,P_NAME_새우,P_NAME_새조개,P_NAME_샛돔,P_NAME_샛돔류알,P_NAME_서대,P_NAME_성게알,P_NAME_세네갈가이석태,P_NAME_소라,P_NAME_소주목탁가자미,P_NAME_송어,P_NAME_수조기,P_NAME_스피노잠,P_NAME_식용자라,P_NAME_실꼬리돔,P_NAME_쌍지붕어,P_NAME_쏘가리,P_NAME_아귀,P_NAME_아담스백합,P_NAME_아르헨티나붉은새우,P_NAME_양볼락,P_NAME_양초선홍치,P_NAME_양태,P_NAME_어름돔,P_NAME_얼룩볼락,P_NAME_연어,P_NAME_연어알,P_NAME_열빙어,P_NAME_열빙어알,P_NAME_영상가이석태,P_NAME_오징어,P_NAME_옥덩굴,P_NAME_옥돔,P_NAME_옥두어,P_NAME_왕게,P_NAME_왕게붙이,P_NAME_왕연어,P_NAME_우뭇가사리,P_NAME_위고둥,P_NAME_유럽물레고둥,P_NAME_은대구,P_NAME_은민대구,P_NAME_은민대구알,P_NAME_은밀복,P_NAME_은연어,P_NAME_은행게,P_NAME_이스라엘잉어,P_NAME_임연수어,P_NAME_잉어,P_NAME_자바리,P_NAME_자이언트그루퍼,P_NAME_자주복,P_NAME_장문볼락,P_NAME_장성베도라치,P_NAME_장수기름가자미,P_NAME_재첩,P_NAME_잿방어,P_NAME_적돔,P_NAME_적새우,P_NAME_전갱이,"P_NAME_전갱이,POMPANO",P_NAME_전복,P_NAME_전어,P_NAME_젓새우,P_NAME_정어리,P_NAME_조피볼락,P_NAME_주꾸미,P_NAME_주름백합,P_NAME_준치,P_NAME_줄민태,P_NAME_줄비늘치,P_NAME_쥐노래미,P_NAME_쥐돔,P_NAME_쥐치,P_NAME_진홍퉁돔,P_NAME_진환도상어,P_NAME_참게,P_NAME_참다랑어,P_NAME_참돔,P_NAME_참복,P_NAME_참조기,P_NAME_첨치가자미,P_NAME_청각,P_NAME_청대구,P_NAME_청상아리,P_NAME_청새리상어,P_NAME_청새치,P_NAME_청어,P_NAME_청어알,P_NAME_청회볼락,P_NAME_체장메기,P_NAME_칠성장어,P_NAME_코끼리조개,P_NAME_코드아이스피쉬,P_NAME_코토니,P_NAME_쿠자조기,P_NAME_큰구슬우렁이,P_NAME_큰민어,P_NAME_큰실말,P_NAME_큰징거미새우,P_NAME_키조개,P_NAME_태평양먹장어,P_NAME_털게,P_NAME_톱날꽃게,P_NAME_톳,P_NAME_틸라피아,P_NAME_파타고니아이빨고기,P_NAME_프로펠러조개,P_NAME_피뿔고둥,P_NAME_피조개,P_NAME_학공치,P_NAME_해물혼합,P_NAME_해삼,P_NAME_해파리,P_NAME_홍감펭,P_NAME_홍다리얼룩새우,P_NAME_홍민어,P_NAME_홍서대,P_NAME_홍어,"P_NAME_홍연어,Red salmon",P_NAME_홍합,P_NAME_화살오징어,P_NAME_황다랑어,P_NAME_황돔,P_NAME_황새치,P_NAME_황적퉁돔,P_NAME_회초리꼬리민태,P_NAME_흑기흉상어,P_NAME_흑밀복,P_NAME_흑점샛돔알,P_NAME_흑점줄전갱이,P_NAME_흰꼴뚜기,P_NAME_흰다리새우
0,아르헨티나,7.48,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,79,False,1,,,,37.08,5.352941,7.973109,13.732174,100.22,101.26,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,아르헨티나,7.48,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,79,False,1,,,,37.08,5.352941,7.973109,13.732174,100.39,100.76,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,바레인,2.92,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,596,False,1,,,,37.08,5.352941,7.973109,13.732174,100.22,101.26,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,바레인,2.92,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,596,False,1,,,,37.08,5.352941,7.973109,13.732174,100.39,100.76,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,바레인,3.356352,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2015,53,596,False,2,,,,37.08,5.352941,7.973109,13.732174,100.22,101.26,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
def major_ctry(data, ctry_name):
  '''
  주요 국가 DataFrame
  data : 기본 데이터 (df_train)
  ctry_name : 주요 수출국 (칠레, 중국, 노르웨이, 페루, 태국, 베트남)
  '''
  
  for num,_ in enumerate(ctry_name):

    if ctry_name[num] == '칠레':
      chile = data[data['CTRY_2'] == ctry_name[num]]

    elif ctry_name[num] == '중국':
       china = data[data['CTRY_2'] == ctry_name[num]]

    elif ctry_name[num] == '노르웨이':
      nor = data[data['CTRY_2'] == ctry_name[num]]

    elif ctry_name[num] == '페루':
      peru = data[data['CTRY_2'] == ctry_name[num]]    

    elif ctry_name[num] == '태국':
      thai = data[data['CTRY_2'] == ctry_name[num]] 

    elif ctry_name[num] == '베트남':
      vit = data[data['CTRY_2'] == ctry_name[num]]

  return chile, china, nor, peru, thai, vit

def is_na(data, ctry_name):
  chile, china, nor, peru, thai, vit = major_ctry(df_train, ctry_name)
  nation_list = [chile, china, nor, peru, thai, vit]
  for nation in nation_list:
    for col in nation.columns:
      if nation[col].isna().sum() > 0 :
        print(f'{col} >> ', nation[col].isna().sum())
    print('#####')

In [None]:
is_na(df_train, ctry_name) # chile, china,nor, peru, thai, vit 결측치 존재

rain >>  46
wind >>  46
temperature >>  46
#####
rain >>  1679
wind >>  1679
temperature >>  1679
#####
#####
rain >>  1151
wind >>  1151
temperature >>  1151
#####
rain >>  53
wind >>  53
temperature >>  53
#####
rain >>  200
wind >>  200
temperature >>  200
#####


In [None]:
has_na = [chile, china, peru, thai, vit]

In [None]:
# def fill_na(data, mode = None):
#   '''
#   data : 결측값을 가지고 있는 국가(has_na)
#   mode : 결측값을 채우는 방법 
#   - ffill
#   - mean()
#   - median()
#   '''
#   if mode == 'ffill':
#     for num, nation in enumerate(data):
#       data[num] = data[num].fillna(method='ffill')


#   elif mode == 'mean':
#     for num, nation in enumerate(data):
#       for col in nation.columns:
#         if data[num][col].isna().sum() > 0 :
#           data[num].fillna(data[num][col].mean(), inplace=True)

#   elif mode == 'median':
#     for num, nation in enumerate(data):
#       for col in nation.columns:
#         if data[num][col].isna().sum() > 0 :
#           data[num].fillna(data[num][col].median(), inplace=True)

In [None]:
for num, nation in enumerate(has_na):
  for col in nation.columns:
    if has_na[num][col].isna().sum() > 0 :
      has_na[num].fillna(has_na[num][col].mean(), inplace=True)

In [None]:
final_data = [chile, china, peru, thai, nor, vit]

## BASIC MODEL

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV

# Regression
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.kernel_ridge import KernelRidge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.metrics import mean_squared_log_error, mean_squared_error,  r2_score, mean_absolute_error

pd.set_option('display.max_rows', 72000)
pd.set_option('display.max_columns', 650)

In [None]:
def get_rmse(model):
    pred = model.predict(X_test)
    mse = mean_squared_error(y_test , pred)
    rmse = np.sqrt(mse)
    print('{} RMSE: {}'.format(model.__class__.__name__,np.round(rmse, 3)))
    return rmse

def get_rmses(models):
    rmses = [ ]
    for model in models:
        rmse = get_rmse(model)
        rmses.append(rmse)
    return rmses


from sklearn.model_selection import cross_val_score

def get_avg_rmse_cv(models):
    for model in models:
        rmse_list = np.sqrt(-cross_val_score(model, x_features, y_target,
                                             scoring="neg_mean_squared_error", cv = 5))
        rmse_avg = np.mean(rmse_list)
        print('\n{0} CV RMSE 값 리스트: {1}'.format( model.__class__.__name__, np.round(rmse_list, 3)))
        print('{0} CV 평균 RMSE 값: {1}'.format( model.__class__.__name__, np.round(rmse_avg, 3)))


from sklearn.model_selection import GridSearchCV

def print_best_params(model, params):
    grid_model = GridSearchCV(model, param_grid=params, 
                              scoring='neg_mean_squared_error', cv=5)
    grid_model.fit(x_features, y_target)
    rmse = np.sqrt(-1* grid_model.best_score_)
    print('{0} 5 CV 시 최적 평균 RMSE 값: {1}, 최적 alpha:{2}'.format(model.__class__.__name__,
                                        np.round(rmse, 4), grid_model.best_params_))
    return grid_model.best_estimator_

def get_rmse_pred(preds):
    for key in preds.keys():
        pred_value = preds[key]
        mse = mean_squared_error(y_test , pred_value)
        rmse = np.sqrt(mse)
        print('{0} 모델의 RMSE: {1}'.format(key, rmse))



### Chile

In [None]:
x_features = chile.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = chile['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)
ridge_reg = Ridge(alpha=12)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=1)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.008)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 0.742
Ridge RMSE: 0.77
Lasso RMSE: 5.615

LinearRegression CV RMSE 값 리스트: [6.89343412e+07 3.51285920e+08 1.34994284e+07 1.10200000e+00
 4.64969551e+07]
LinearRegression CV 평균 RMSE 값: 96043329.179

Ridge CV RMSE 값 리스트: [2.53  1.506 1.585 1.03  1.326]
Ridge CV 평균 RMSE 값: 1.595

Lasso CV RMSE 값 리스트: [1.938 1.917 4.195 6.416 6.976]
Lasso CV 평균 RMSE 값: 4.288
Ridge 5 CV 시 최적 평균 RMSE 값: 1.6731, 최적 alpha:{'alpha': 1}
Lasso 5 CV 시 최적 평균 RMSE 값: 1.4522, 최적 alpha:{'alpha': 0.008}
LinearRegression RMSE: 0.742
Ridge RMSE: 1.807
Lasso RMSE: 0.725
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 1.5327, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 0.7782344611272731
Ridge 모델의 RMSE: 0.7697269981624738
Lasso 모델의 RMSE: 0.7952616474414349
최종 혼합 모델의 RMSE: 1.029052401633401
XGBM 모델의 RMSE: 0.5081138108951823
LGBM 모델의 RMSE: 1.9232017119499851


### China

In [None]:
x_features = china.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = china['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)
# LinearRegression, Ridge, Lasso 학습, 예측, 평가
lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)
ridge_reg = Ridge(alpha=12)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=1)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 14813.291
Ridge RMSE: 2.251
Lasso RMSE: 5.981

LinearRegression CV RMSE 값 리스트: [5.79960610e+04 4.57961200e+03 3.14479053e+07 2.98949254e+05
 4.03281550e+04]
LinearRegression CV 평균 RMSE 값: 6369951.686

Ridge CV RMSE 값 리스트: [1.912 1.724 2.589 1.883 1.847]
Ridge CV 평균 RMSE 값: 1.991

Lasso CV RMSE 값 리스트: [4.31  6.142 6.275 6.263 5.324]
Lasso CV 평균 RMSE 값: 5.663
Ridge 5 CV 시 최적 평균 RMSE 값: 2.0144, 최적 alpha:{'alpha': 1}
Lasso 5 CV 시 최적 평균 RMSE 값: 2.0474, 최적 alpha:{'alpha': 0.001}
LinearRegression RMSE: 14813.291
Ridge RMSE: 2.35
Lasso RMSE: 2.267
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 1.8142, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 2.257285840911825
Ridge 모델의 RMSE: 2.2512461938067667
Lasso 모델의 RMSE: 2.2670516518320656
최종 혼합 모델의 RMSE: 1.807495676500074
XGBM 모델의 RMSE: 1.5866057907253532
LGBM 모델의 RMSE: 2.091702125366102


### Peru

In [None]:
x_features = peru.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = peru['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=5)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.05)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 2.785
Ridge RMSE: 2.86
Lasso RMSE: 8.707

LinearRegression CV RMSE 값 리스트: [1.741 1.24  2.261 3.261 2.955]
LinearRegression CV 평균 RMSE 값: 2.292

Ridge CV RMSE 값 리스트: [1.815 1.264 2.334 3.333 2.74 ]
Ridge CV 평균 RMSE 값: 2.297

Lasso CV RMSE 값 리스트: [ 4.6    4.852  3.582  9.435 20.136]
Lasso CV 평균 RMSE 값: 8.521
Ridge 5 CV 시 최적 평균 RMSE 값: 2.4016, 최적 alpha:{'alpha': 5}
Lasso 5 CV 시 최적 평균 RMSE 값: 2.3411, 최적 alpha:{'alpha': 0.05}
LinearRegression RMSE: 2.785
Ridge RMSE: 2.86
Lasso RMSE: 8.707
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 2.5591, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 3.005228639238051
Ridge 모델의 RMSE: 3.068815737436969
Lasso 모델의 RMSE: 2.979342319809642
최종 혼합 모델의 RMSE: 2.083123793722712
XGBM 모델의 RMSE: 1.9567606845407168
LGBM 모델의 RMSE: 2.269570927027802


### Nor

In [None]:
x_features = nor.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = nor['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=1)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.005)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 6.606
Ridge RMSE: 6.4
Lasso RMSE: 7.81

LinearRegression CV RMSE 값 리스트: [2.46755116e+07 4.75668950e+09 1.94504747e+08 3.95800000e+00
 3.53600000e+00]
LinearRegression CV 평균 RMSE 값: 995173952.286

Ridge CV RMSE 값 리스트: [ 7.4   12.403  9.935  3.92   3.407]
Ridge CV 평균 RMSE 값: 7.413

Lasso CV RMSE 값 리스트: [ 8.729 13.521 11.653  6.398  6.845]
Lasso CV 평균 RMSE 값: 9.429
Ridge 5 CV 시 최적 평균 RMSE 값: 8.1766, 최적 alpha:{'alpha': 1}
Lasso 5 CV 시 최적 평균 RMSE 값: 8.1671, 최적 alpha:{'alpha': 0.005}
LinearRegression RMSE: 6.606
Ridge RMSE: 6.4
Lasso RMSE: 7.81
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 10.7756, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 6.396167417716956
Ridge 모델의 RMSE: 6.39979673710711
Lasso 모델의 RMSE: 6.393928844211526
최종 혼합 모델의 RMSE: 3.6931076180748477
XGBM 모델의 RMSE: 3.651378315603429
LGBM 모델의 RMSE: 4.165907661308296


### Thai

In [None]:
x_features = thai.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = thai['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=1)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 118455828.747
Ridge RMSE: 1.899
Lasso RMSE: 3.852

LinearRegression CV RMSE 값 리스트: [8.99754143e+08 5.24905645e+06 2.77311287e+07 3.96269121e+05
 3.92476538e+06]
LinearRegression CV 평균 RMSE 값: 187411072.532

Ridge CV RMSE 값 리스트: [2.119 1.713 1.692 1.974 1.677]
Ridge CV 평균 RMSE 값: 1.835

Lasso CV RMSE 값 리스트: [4.47  4.344 4.729 3.332 2.635]
Lasso CV 평균 RMSE 값: 3.902
Ridge 5 CV 시 최적 평균 RMSE 값: 1.8439, 최적 alpha:{'alpha': 1}
Lasso 5 CV 시 최적 평균 RMSE 값: 1.8824, 최적 alpha:{'alpha': 0.001}
LinearRegression RMSE: 118455828.747
Ridge RMSE: 1.899
Lasso RMSE: 3.852
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 1.6219, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 1.938329097367818
Ridge 모델의 RMSE: 1.8989802217877914
Lasso 모델의 RMSE: 1.967045726914964
최종 혼합 모델의 RMSE: 1.6252784518817733
XGBM 모델의 RMSE: 1.6129092357798454
LGBM 모델의 RMSE: 1.7028912342963844


### Vit

In [None]:
x_features = vit.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = vit['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)

lr_reg = LinearRegression()
lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=1)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 1.521
Ridge RMSE: 1.519
Lasso RMSE: 5.459

LinearRegression CV RMSE 값 리스트: [1.72700000e+00 8.55118718e+06 1.31076140e+08 2.76007738e+07
 3.12047330e+08]
LinearRegression CV 평균 RMSE 값: 95855086.521

Ridge CV RMSE 값 리스트: [1.719 1.597 1.545 1.611 1.668]
Ridge CV 평균 RMSE 값: 1.628

Lasso CV RMSE 값 리스트: [5.244 6.74  5.554 5.703 5.392]
Lasso CV 평균 RMSE 값: 5.727
Ridge 5 CV 시 최적 평균 RMSE 값: 1.6292, 최적 alpha:{'alpha': 1}
Lasso 5 CV 시 최적 평균 RMSE 값: 1.6308, 최적 alpha:{'alpha': 0.001}
LinearRegression RMSE: 1.521
Ridge RMSE: 1.519
Lasso RMSE: 5.459
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 1.1933, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 1.5222522763591304
Ridge 모델의 RMSE: 1.5188152317460293
Lasso 모델의 RMSE: 1.5271524151924991
최종 혼합 모델의 RMSE: 0.9041604896561873
XGBM 모델의 RMSE: 0.8154356517950935
LGBM 모델의 RMSE: 1.0303599573186826


In [None]:
nations = pd.concat(final_data, axis=0)
nations.shape

(47276, 552)

In [None]:
x_features = nations.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = nations['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)

lr_reg = LinearRegression()

lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=20)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.005)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 1302896.773
Ridge RMSE: 2.708
Lasso RMSE: 6.599

LinearRegression CV RMSE 값 리스트: [3.95500000e+00 4.61257416e+07 5.33502238e+06 2.83141000e+03
 7.43128730e+04]
LinearRegression CV 평균 RMSE 값: 10307582.446

Ridge CV RMSE 값 리스트: [ 4.196 12.314  6.092  2.907  2.734]
Ridge CV 평균 RMSE 값: 5.649

Lasso CV RMSE 값 리스트: [6.678 7.609 9.127 6.361 5.585]
Lasso CV 평균 RMSE 값: 7.072
Ridge 5 CV 시 최적 평균 RMSE 값: 5.5296, 최적 alpha:{'alpha': 20}
Lasso 5 CV 시 최적 평균 RMSE 값: 4.3923, 최적 alpha:{'alpha': 0.005}
LinearRegression RMSE: 1302896.773
Ridge RMSE: 2.708
Lasso RMSE: 6.599
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 3.3358, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 2.7107541630674032
Ridge 모델의 RMSE: 2.708198356205861
Lasso 모델의 RMSE: 2.720760515358982
최종 혼합 모델의 RMSE: 1.8029995744837188
XGBM 모델의 RMSE: 1.6136159257615716
LGBM 모델의 RMSE: 2.094207944575838


- xgbm, ridge, lasso, linear, lgbm

### 비주류 국가

['칠레', '중국', '노르웨이', '페루', '태국', '베트남'] 제외

In [367]:
df_train.head()

Unnamed: 0,CTRY_2,P_PRICE,살,염장,창난,알,냉동,눈살,줄기,캐비아대용,훈제,머리_외화획득용,내장,머리살,건조,곤이,난포선,횟감,한쪽껍질붙은,지느러미,자숙,꼬리_외화획득용,목살,턱살,슬라이스(S),필렛(F),외투막,머리,간,포장횟감,다리,볼살,집게다리,활,절단,동체,냉장,개아지살,껍질,year,week,name_cnt,is_processed,import_cnt,rain,wind,temperature,oil,wind_kr,temperature_kr,water_temp_kr,cpi_total,cpi_fish,exchange,CTRY_1_가나,CTRY_1_감비아,CTRY_1_그리스,CTRY_1_그린란드,CTRY_1_기니,CTRY_1_기니비사우,CTRY_1_나미비아,CTRY_1_남아프리카 공화국,CTRY_1_네덜란드,CTRY_1_노르웨이,CTRY_1_뉴질랜드,CTRY_1_니카라과,CTRY_1_대만,CTRY_1_대한민국,CTRY_1_라이베리아,CTRY_1_라트비아,CTRY_1_러시아,CTRY_1_루마니아,CTRY_1_리비아,CTRY_1_마다가스카르,CTRY_1_말레이시아,CTRY_1_멕시코,CTRY_1_모로코,CTRY_1_모리타니,CTRY_1_모잠비크,CTRY_1_몰타,CTRY_1_미국,CTRY_1_미얀마,CTRY_1_미크로네시아 연방,CTRY_1_바누아투,CTRY_1_바레인,CTRY_1_방글라데시,CTRY_1_베네수엘라,CTRY_1_베트남,CTRY_1_불가리아,CTRY_1_브라질,CTRY_1_사우디아라비아,CTRY_1_사이프러스,CTRY_1_세네갈,CTRY_1_세이셸,CTRY_1_세인트빈센트 그레나딘,CTRY_1_소말리아,CTRY_1_수리남,CTRY_1_스리랑카,CTRY_1_스페인,CTRY_1_시에라리온,CTRY_1_싱가포르,CTRY_1_아랍에미리트,CTRY_1_아르헨티나,CTRY_1_아이슬란드,CTRY_1_아일랜드,CTRY_1_알제리,CTRY_1_앙골라,CTRY_1_에스토니아,CTRY_1_에콰도르,CTRY_1_영국,CTRY_1_오만,CTRY_1_우루과이,CTRY_1_우크라이나,CTRY_1_이란,CTRY_1_이집트,CTRY_1_이탈리아,CTRY_1_인도,CTRY_1_인도네시아,CTRY_1_일본,CTRY_1_중국,CTRY_1_칠레,CTRY_1_캐나다,CTRY_1_콜롬비아,CTRY_1_쿠바,CTRY_1_쿡 제도,CTRY_1_크로아티아,CTRY_1_키리바시,CTRY_1_태국,CTRY_1_터키,CTRY_1_튀니지,CTRY_1_파나마,CTRY_1_파키스탄,CTRY_1_파푸아뉴기니,CTRY_1_팔라우,CTRY_1_페루,CTRY_1_포르투갈,CTRY_1_포클랜드 제도,CTRY_1_프랑스,CTRY_1_피지,CTRY_1_필리핀,CTRY_1_호주,P_PURPOSE_반송품(기타),P_PURPOSE_외화획득용 원료,P_PURPOSE_자사제품제조용,P_PURPOSE_판매용,P_PURPOSE_합작,CATEGORY_1_갑각류,CATEGORY_1_기타 수입식품,CATEGORY_1_알 곤이류,CATEGORY_1_어류,CATEGORY_1_연체류 해물모듬,CATEGORY_1_젓갈류 해조류 해파리,CATEGORY_1_패류 멍게류,CATEGORY_2_가리비,CATEGORY_2_가물치,CATEGORY_2_가사리,CATEGORY_2_가오리,CATEGORY_2_가자미,CATEGORY_2_가재 랍스타,CATEGORY_2_갈치,CATEGORY_2_갑오징어,CATEGORY_2_개복치,CATEGORY_2_개불,CATEGORY_2_게,CATEGORY_2_고동,CATEGORY_2_고등어,CATEGORY_2_고시래기,CATEGORY_2_골뱅이,CATEGORY_2_광어 넙치,CATEGORY_2_김,CATEGORY_2_꼬막,CATEGORY_2_꼴뚜기,CATEGORY_2_꽁치 학꽁치,CATEGORY_2_꽁치 확꽁치,CATEGORY_2_낙지,CATEGORY_2_날치알,CATEGORY_2_남극빙어,CATEGORY_2_노래미,CATEGORY_2_농어,CATEGORY_2_능성어 붉바리 바리,CATEGORY_2_다시마,CATEGORY_2_달고기,CATEGORY_2_대구,CATEGORY_2_대구알,CATEGORY_2_도다리,CATEGORY_2_도미 감성돔 돔류,CATEGORY_2_망둑어,CATEGORY_2_멍게,CATEGORY_2_메기 동자개,CATEGORY_2_메로,CATEGORY_2_멸치,CATEGORY_2_명란(명태알),CATEGORY_2_명태,CATEGORY_2_문어,CATEGORY_2_물메기(곰치),CATEGORY_2_미꾸라지,CATEGORY_2_미역,CATEGORY_2_민물붕어,CATEGORY_2_민어 점성어,CATEGORY_2_밀크피시,CATEGORY_2_바지락,CATEGORY_2_방어,CATEGORY_2_밴댕이,CATEGORY_2_버터플라이 킹피쉬,CATEGORY_2_벤자리 알롱이,CATEGORY_2_병어,CATEGORY_2_보리멸,CATEGORY_2_복어,CATEGORY_2_부세,CATEGORY_2_붉평치(만다이 꽃돔),CATEGORY_2_삼치,CATEGORY_2_상어 고래,CATEGORY_2_새우,CATEGORY_2_샛돔류알,CATEGORY_2_서대 박대 페루다,CATEGORY_2_성게알,CATEGORY_2_소라,CATEGORY_2_송어,CATEGORY_2_쏘가리,CATEGORY_2_아귀,CATEGORY_2_양미리 정어리,CATEGORY_2_양태,CATEGORY_2_어류 기타,CATEGORY_2_연어,CATEGORY_2_연어알,CATEGORY_2_열빙어(시샤모),CATEGORY_2_열빙어(시샤모)알,CATEGORY_2_오징어,CATEGORY_2_옥돔,CATEGORY_2_우럭 볼락,CATEGORY_2_우렁 다슬기,CATEGORY_2_은민대구알,CATEGORY_2_임연수,CATEGORY_2_잉어,CATEGORY_2_자라,CATEGORY_2_장어,CATEGORY_2_재첩,CATEGORY_2_적어 눈볼대,CATEGORY_2_전갱기 매가리,CATEGORY_2_전갱이 매가리,CATEGORY_2_전복,CATEGORY_2_전어,CATEGORY_2_조개,CATEGORY_2_조개 백합 대합,CATEGORY_2_조기 보구치 강다리,CATEGORY_2_조기 보구치 강다리.1,CATEGORY_2_준치,CATEGORY_2_줄비늘치,CATEGORY_2_쥐치,CATEGORY_2_쭈꾸미,CATEGORY_2_참치 새치류,CATEGORY_2_참치 새치류.1,CATEGORY_2_청어,CATEGORY_2_청어알,CATEGORY_2_톳,CATEGORY_2_틸라피아(역돔),CATEGORY_2_팡가시우스(홍메기),CATEGORY_2_해물모둠,CATEGORY_2_해삼,CATEGORY_2_해초,CATEGORY_2_해파리,CATEGORY_2_호끼류,CATEGORY_2_호키(새꼬리민태)알,CATEGORY_2_호키류,CATEGORY_2_홍어,CATEGORY_2_홍합,P_NAME_PANGASIUS메기,P_NAME_가다랑어,P_NAME_가라지,P_NAME_가리비,P_NAME_가무락조개,P_NAME_가물치,P_NAME_가시배새우,P_NAME_가시투성왕게,P_NAME_가오리,P_NAME_가이석태속,P_NAME_가자미,P_NAME_각시가자미,P_NAME_갈치,P_NAME_감성돔,P_NAME_갑오징어,P_NAME_강담돔,P_NAME_강도다리,P_NAME_개량조개,P_NAME_개복치,P_NAME_개불,P_NAME_개조개,P_NAME_갯고둥,P_NAME_갯장어,P_NAME_검복,P_NAME_검정가자미,P_NAME_검정볼락,P_NAME_게,P_NAME_고등어,P_NAME_골뱅이,P_NAME_곱사연어,P_NAME_곱상어,P_NAME_구라미,P_NAME_귀상어,P_NAME_금눈돔,P_NAME_금색돔,P_NAME_기름치,P_NAME_기타민어류,P_NAME_기타병어류,P_NAME_긴가이석태,P_NAME_김,P_NAME_까지가자미,P_NAME_까치복,P_NAME_까칠복,P_NAME_깜장북방대합,P_NAME_꼬리검정민태,P_NAME_꼬막,P_NAME_꼬시래기,P_NAME_꽁치,P_NAME_꽃게,P_NAME_낙지,P_NAME_날개다랑어,P_NAME_날치알,P_NAME_남방대구,P_NAME_남방참다랑어,P_NAME_넙치,P_NAME_녹새치,P_NAME_논고둥,P_NAME_농어,P_NAME_눈다랑어,P_NAME_능성어,P_NAME_다슬기,P_NAME_다시마,P_NAME_달고기,P_NAME_닭새우,P_NAME_대게,P_NAME_대구,P_NAME_대구알,P_NAME_대두이석태,P_NAME_대서양꼬마민어,P_NAME_대서양먹장어,P_NAME_대서양붉은볼락,P_NAME_대서양연어,P_NAME_대서양조기속,P_NAME_대서양참다랑어,P_NAME_던지네스게,P_NAME_도화새우,P_NAME_돌가사리,P_NAME_돌가자미,P_NAME_돌돔,P_NAME_돔,P_NAME_동갈돗돔,P_NAME_동갈횟대,P_NAME_동등이석태,P_NAME_동자개,P_NAME_동죽,P_NAME_돛새치,P_NAME_두점박이민꽃게,P_NAME_드렁허리,P_NAME_등목어,P_NAME_마설가자미,P_NAME_마소치가자미,P_NAME_맛조개,P_NAME_망둑어,P_NAME_매듭가자미,P_NAME_매미새우,P_NAME_먹볼락,P_NAME_먹장어,P_NAME_멍게,P_NAME_메기,P_NAME_멸치,P_NAME_명태,P_NAME_명태알,P_NAME_문어,P_NAME_물메기,P_NAME_미꾸라지,P_NAME_미역,P_NAME_민꽃게,P_NAME_민대구,P_NAME_민들조개,P_NAME_민물가재,P_NAME_민물새우,P_NAME_민밀복,P_NAME_민어,P_NAME_민태,P_NAME_밀크피시,P_NAME_바다가재,P_NAME_바닷가재,P_NAME_바라문디,P_NAME_바리,"P_NAME_바리,교잡종",P_NAME_바지락,P_NAME_밤색무늬조개,P_NAME_방어,P_NAME_백합,"P_NAME_백합,MERCENARIA MERCENARIA",P_NAME_밴댕이,P_NAME_뱀장어,P_NAME_버들붕어,P_NAME_버터플라이 킹피쉬,P_NAME_벤자리,P_NAME_벵에돔,P_NAME_병어,P_NAME_병치매가리,P_NAME_보리멸,P_NAME_볼락,P_NAME_부세,P_NAME_북방대합,P_NAME_북쪽분홍새우,P_NAME_붉돔,P_NAME_붉은대게,P_NAME_붉은메기,P_NAME_붉은이석태,P_NAME_붉평치,P_NAME_붕어,P_NAME_붕장어,P_NAME_블루화이팅,P_NAME_비너스백합,P_NAME_비단조개,P_NAME_뿔가자미,P_NAME_삼치,P_NAME_상어,P_NAME_새꼬리민태알,P_NAME_새꼬막,P_NAME_새뱅이,P_NAME_새우,P_NAME_새조개,P_NAME_샛돔,P_NAME_샛돔류알,P_NAME_서대,P_NAME_성게알,P_NAME_세네갈가이석태,P_NAME_소라,P_NAME_소주목탁가자미,P_NAME_송어,P_NAME_수조기,P_NAME_스피노잠,P_NAME_식용자라,P_NAME_실꼬리돔,P_NAME_쌍지붕어,P_NAME_쏘가리,P_NAME_아귀,P_NAME_아담스백합,P_NAME_아르헨티나붉은새우,P_NAME_양볼락,P_NAME_양초선홍치,P_NAME_양태,P_NAME_어름돔,P_NAME_얼룩볼락,P_NAME_연어,P_NAME_연어알,P_NAME_열빙어,P_NAME_열빙어알,P_NAME_영상가이석태,P_NAME_오징어,P_NAME_옥덩굴,P_NAME_옥돔,P_NAME_옥두어,P_NAME_왕게,P_NAME_왕게붙이,P_NAME_왕연어,P_NAME_우뭇가사리,P_NAME_위고둥,P_NAME_유럽물레고둥,P_NAME_은대구,P_NAME_은민대구,P_NAME_은민대구알,P_NAME_은밀복,P_NAME_은연어,P_NAME_은행게,P_NAME_이스라엘잉어,P_NAME_임연수어,P_NAME_잉어,P_NAME_자바리,P_NAME_자이언트그루퍼,P_NAME_자주복,P_NAME_장문볼락,P_NAME_장성베도라치,P_NAME_장수기름가자미,P_NAME_재첩,P_NAME_잿방어,P_NAME_적돔,P_NAME_적새우,P_NAME_전갱이,"P_NAME_전갱이,POMPANO",P_NAME_전복,P_NAME_전어,P_NAME_젓새우,P_NAME_정어리,P_NAME_조피볼락,P_NAME_주꾸미,P_NAME_주름백합,P_NAME_준치,P_NAME_줄민태,P_NAME_줄비늘치,P_NAME_쥐노래미,P_NAME_쥐돔,P_NAME_쥐치,P_NAME_진홍퉁돔,P_NAME_진환도상어,P_NAME_참게,P_NAME_참다랑어,P_NAME_참돔,P_NAME_참복,P_NAME_참조기,P_NAME_첨치가자미,P_NAME_청각,P_NAME_청대구,P_NAME_청상아리,P_NAME_청새리상어,P_NAME_청새치,P_NAME_청어,P_NAME_청어알,P_NAME_청회볼락,P_NAME_체장메기,P_NAME_칠성장어,P_NAME_코끼리조개,P_NAME_코드아이스피쉬,P_NAME_코토니,P_NAME_쿠자조기,P_NAME_큰구슬우렁이,P_NAME_큰민어,P_NAME_큰실말,P_NAME_큰징거미새우,P_NAME_키조개,P_NAME_태평양먹장어,P_NAME_털게,P_NAME_톱날꽃게,P_NAME_톳,P_NAME_틸라피아,P_NAME_파타고니아이빨고기,P_NAME_프로펠러조개,P_NAME_피뿔고둥,P_NAME_피조개,P_NAME_학공치,P_NAME_해물혼합,P_NAME_해삼,P_NAME_해파리,P_NAME_홍감펭,P_NAME_홍다리얼룩새우,P_NAME_홍민어,P_NAME_홍서대,P_NAME_홍어,"P_NAME_홍연어,Red salmon",P_NAME_홍합,P_NAME_화살오징어,P_NAME_황다랑어,P_NAME_황돔,P_NAME_황새치,P_NAME_황적퉁돔,P_NAME_회초리꼬리민태,P_NAME_흑기흉상어,P_NAME_흑밀복,P_NAME_흑점샛돔알,P_NAME_흑점줄전갱이,P_NAME_흰꼴뚜기,P_NAME_흰다리새우
0,아르헨티나,7.48,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,79,False,1,,,,37.08,5.352941,7.973109,13.732174,100.22,101.26,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,아르헨티나,7.48,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,79,False,1,,,,37.08,5.352941,7.973109,13.732174,100.39,100.76,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,바레인,2.92,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,596,False,1,,,,37.08,5.352941,7.973109,13.732174,100.22,101.26,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,바레인,2.92,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2015,53,596,False,1,,,,37.08,5.352941,7.973109,13.732174,100.39,100.76,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,바레인,3.356352,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2015,53,596,False,2,,,,37.08,5.352941,7.973109,13.732174,100.22,101.26,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [368]:
ctry_name

['칠레', '중국', '노르웨이', '페루', '태국', '베트남']

In [373]:
def func(data):
  '''
  data : df_train
  CTRY_2에서 필요없는 주요 국가 버려서 집합형식으로 변환.
  '''
  all_nation = list(data['CTRY_2'])
  
  for i in all_nation[:]:
    if i in ctry_name:
      all_nation.remove(i)
  set_nation = set(all_nation)

  return set_nation



def func2(data):
  '''
  data : df_train
  1. 비주류국가에서 버려야 할 컬럼(rain, wind, temperature, exchange)을 버림.
  2. 마지막으로 concat.

  '''

  set_nation = func(data)
  non_nation_list = []

  for not_major_nation in set_nation:
    non_nation_list.append(df_train[df_train['CTRY_2'] == not_major_nation])

  
  for col in range(len(non_nation_list)):
    non_nation_list[col].drop(['rain','wind','temperature','exchange'], axis=1, inplace=True)


  result = pd.concat(non_nation_list, axis=0)

  return result

In [375]:
result = func2(df_train)

In [366]:
x_features = result.drop(['P_PRICE','CTRY_2'], axis=1 , inplace=False)
y_target = result['P_PRICE']

X_train, X_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, random_state=0)

lr_reg = LinearRegression()

lr_reg.fit(X_train, y_train)

ridge_reg = Ridge()
ridge_reg.fit(X_train, y_train)

lasso_reg = Lasso()
lasso_reg.fit(X_train, y_train)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)


models = [lr_reg, ridge_reg, lasso_reg]
get_avg_rmse_cv(models)

ridge_params = { 'alpha':[0.05, 0.1, 1, 5, 8, 10, 12, 15, 20] }
lasso_params = { 'alpha':[0.001, 0.005, 0.008, 0.05, 0.03, 0.1, 0.5, 1,5, 10] }
best_rige = print_best_params(ridge_reg, ridge_params)
best_lasso = print_best_params(lasso_reg, lasso_params)

models = [lr_reg, ridge_reg, lasso_reg]
get_rmses(models)

xgb_params = {'n_estimators':[1000]}
xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
best_xgb = print_best_params(xgb_reg, xgb_params)

# 개별 모델의 학습
ridge_reg = Ridge(alpha=20)
ridge_reg.fit(X_train, y_train)
lasso_reg = Lasso(alpha=0.005)
lasso_reg.fit(X_train, y_train)
# 개별 모델 예측
ridge_pred = ridge_reg.predict(X_test)
lasso_pred = lasso_reg.predict(X_test)

# 개별 모델 예측값 혼합으로 최종 예측값 도출
pred = 0.4 * ridge_pred + 0.6 * lasso_pred
preds = {'최종 혼합': pred,
         'Ridge': ridge_pred,
         'Lasso': lasso_pred}
#최종 혼합 모델, 개별모델의 RMSE 값 출력
get_rmse_pred(preds)


xgb_reg = XGBRegressor(n_estimators=1000, learning_rate=0.05, 
                       colsample_bytree=0.5, subsample=0.8)
lgbm_reg = LGBMRegressor(n_estimators=1000, learning_rate=0.05, num_leaves=4, 
                         subsample=0.6, colsample_bytree=0.4, reg_lambda=10, n_jobs=-1)
xgb_reg.fit(X_train, y_train)
lgbm_reg.fit(X_train, y_train)
xgb_pred = xgb_reg.predict(X_test)
lgbm_pred = lgbm_reg.predict(X_test)

pred = 0.5 * xgb_pred + 0.5 * lgbm_pred
preds = {'최종 혼합': pred,
         'XGBM': xgb_pred,
         'LGBM': lgbm_pred}
        
get_rmse_pred(preds)

LinearRegression RMSE: 16817208.448
Ridge RMSE: 14.013
Lasso RMSE: 18.072

LinearRegression CV RMSE 값 리스트: [2.10148265e+08 7.99862676e+08 1.03764895e+08 2.24084203e+07
 7.47059694e+08]
LinearRegression CV 평균 RMSE 값: 376648790.016

Ridge CV RMSE 값 리스트: [55.127 15.853 54.82  24.088 39.403]
Ridge CV 평균 RMSE 값: 37.858

Lasso CV RMSE 값 리스트: [28.43  11.458 16.881 12.774 47.195]
Lasso CV 평균 RMSE 값: 23.348
Ridge 5 CV 시 최적 평균 RMSE 값: 35.4229, 최적 alpha:{'alpha': 20}
Lasso 5 CV 시 최적 평균 RMSE 값: 26.8891, 최적 alpha:{'alpha': 1}
LinearRegression RMSE: 16817208.448
Ridge RMSE: 14.013
Lasso RMSE: 18.072
XGBRegressor 5 CV 시 최적 평균 RMSE 값: 29.597, 최적 alpha:{'n_estimators': 1000}
최종 혼합 모델의 RMSE: 14.080730604517086
Ridge 모델의 RMSE: 14.141140237673856
Lasso 모델의 RMSE: 14.070340028693192
최종 혼합 모델의 RMSE: 10.776560388844297
XGBM 모델의 RMSE: 15.2362786156171
LGBM 모델의 RMSE: 8.656913522018417
