# Import Library

In [1]:
# Import Libraries
import os
import pandas as pd
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

# Visuzliation Setting
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from matplotlib import rc
from matplotlib import colors
import seaborn as sns

---

# Wrangling Hypothesis
### 날짜
날짜를 year, month, day 로 처리하면 결국 int형 데이터가 되는데, 정량적으로 의미 없는 숫자가 악영향을 끼칠 수도 있지 않을까  
- datetime 형식으로 가져가기
- year, month, day int형으로 가져가기
- 날짜와 관련있는 데이터를 다 넣어준 후(ex. 2015/09/15 노르웨이 기온 추가 후) 날짜 데이터 제거하기
- 날짜 데이터도 one hot encoding (2015년이다/아니다, 05월이다/아니다..)

### 어종별 수
어종(P_NAME)에 따라서 수 차이가 있는데, 이를 수치화하여 새로운 데이터로 사용할 수 있다  
- P_NAME만 사용하기
- 어종별 수 세려서 새로운 컬럼 만들기

### 카테고리
어종 카테고리가 대분류, 중분류, 소분류 (CATEGORY_1, CATEGORY_2, P_NAME)으로 계츨 구조를 가지고 있다  
이를 어떻게 처리해야할까  
- 대분류에서 끊기
- 중분류에서 끊기
- 별개의 변수로 생각하여 소분류까지 포함하기
- 소분류만 사용하기

### 수입용도
P_IMPORT_TYPE 는 여러개의 수입 용도가 concate 되어 들어오므로 이를 분리하여 생각해야 한다  
- 여러개의 수입 용도를 가지는 품목의 경우 영향을 끼칠 수 있다고 생각

### 날씨
- 결측치 처리 방식 (fillna() 함수의 method 선택)
- 결측치 처리 시점 (raw_data 상태, 나라별 묶은 후, 최종 데이터)
- outlier 검출 방식 (정규화 후 끝부분 자르기, 나라별 최저/최고 기온 검색하여 처리)
- 해안가만 추출하기

### 소비자물가지수 / 생활물가지수
- 전체 지수 평균 사용
- 품목 중 관련성 있는 지수 사용
- 둘다 사용

### 날씨 / 원유 / 소비자물가지수 / 생활물가지수 / 우리나라 날씨
- 사용
- 미사용

### Data Filtering - Correlation
- 상관계수 계산해서 중복 변수 제거하기

### Data Filtering - Feature Selection (PCA...)
- 중요도가 높은 변수들만 선택하여 학습하기
- 중요도가 낮은 변수들은 제거하여 학습하기

### 전체
- 주별로 묶기
- 월별로 묶기
- 연별로 묶기
- 계절별로 묶기 (분기별)

df_date1 : 날짜를 int형으로 가져감
df_date2 : ddd
df_date3 : ddd

df_date2 > 

df_import1 = df_train + 컬럼 추가한 (import_type) > one-hot encoding
df_import2 = df_train > one-hot encoding
df_import3 = df_train + import_type 뺴고 + 1번컬럼 추가
lr -> df_import1, df_import2

---

In [2]:
root = os.path.join(os.getcwd(), 'DATA')

# Training Data
빅콘테스트 제공 데이터 전처리

## Import Data

In [3]:
df_raw = pd.read_excel(os.path.join(root, 'train.xlsx'))
df_train = df_raw.copy()

### Add Columns

In [4]:
# 날짜변수 추가
df_train['year'] = df_train['REG_DATE'].dt.year
df_train['month'] = df_train['REG_DATE'].dt.month
df_train['day'] = df_train['REG_DATE'].dt.day

In [5]:
df_train['VALUE_COUNT'] = 0;
value_dict = {}
for name, value in zip(df_train['P_NAME'].value_counts().index,df_train['P_NAME'].value_counts()):
    value_dict[name] = value

def value(col):
    return value_dict[col]

df_train['VALUE_COUNT'] = df_train['P_NAME'].apply(value)

### One hot encoding
P_IMPORT_TYPE 이라는 특수한 컬럼에 대한 전처리

In [6]:
import_type_list = set()
for tmp in df_train.P_IMPORT_TYPE.unique():
    for a in tmp.split(','):
        import_type_list.add(a)

In [7]:
for name in import_type_list:
    df_train[name] = 0
    df_train.loc[df_train['P_IMPORT_TYPE'].str.contains(name, regex=False), name] = 1

---

# Weather data

## Import Data

In [8]:
df_weather_code = pd.read_excel(os.path.join(root, 'weather_code.xlsx'), header=None, index_col=0)

In [9]:
weather_list = [pd.read_csv(os.path.join(root, 'weather_raw_20151228_20161227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'weather_raw_20161228_20171227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'weather_raw_20171228_20181227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'weather_raw_20181228_20191227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'weather_raw_20191228_20201227.csv'), encoding='euc-kr') , 
                pd.read_csv(os.path.join(root, 'weather_raw_20201228_20210818.csv'), encoding='euc-kr')]

## Preprocess
- '지점'에 따른 나라명 컬럼 (country) 추가
- 각 나라, 일자 별로 평균 강수량, 풍속, 기온 계산

In [10]:
# 지점에 따라 나라명 추가
def set_country(row):
    data = df_weather_code[df_weather_code[1] == row['지점']]
    if data.empty:
        return ""
    return data.iloc[0][2]


def preprocess_weather(df_weather):
    # 날짜 정보 정리
    df_weather['year'] = df_weather['일시'].astype('str').str[:4].astype('int')
    df_weather['month'] = df_weather['일시'].astype('str').str[5:7].astype('int')
    df_weather['day'] = df_weather['일시'].astype('str').str[8:10].astype('int')
    # 1차 평균
    df_weather['rain'] = df_weather[['지점', 'year', 'month', 'day', '강수량']].groupby(['지점', 'year', 'month', 'day']).transform('mean')
    df_weather['wind'] = df_weather[['지점', 'year', 'month', 'day', '풍속']].groupby(['지점', 'year', 'month', 'day']).transform('mean')
    df_weather['temperature'] = df_weather[['지점', 'year', 'month', 'day', '기온']].groupby(['지점', 'year', 'month', 'day']).transform('mean')
    # 컬럼/행 정리
    df_weather.drop(columns = ['지점명', '일시', '강수량', '풍속', '기온'], inplace=True)
    df_weather.drop_duplicates(inplace=True)
    # 나라명 추가
    df_weather['CTRY_1'] = ""
    for i, row in df_weather.iterrows():
        df_weather.at[i, 'CTRY_1'] = set_country(row)
    # 2차 평균
    df_weather['rain'] = df_weather[['CTRY_1', 'year', 'month', 'day', 'rain']].groupby(['CTRY_1', 'year', 'month', 'day']).transform('mean')
    df_weather['wind'] = df_weather[['CTRY_1', 'year', 'month', 'day', 'wind']].groupby(['CTRY_1', 'year', 'month', 'day']).transform('mean')
    df_weather['temperature'] = df_weather[['CTRY_1', 'year', 'month', 'day', 'temperature']].groupby(['CTRY_1', 'year', 'month', 'day']).transform('mean')
    # 컬럼/행 정리
    df_weather.drop(columns = ['지점'], inplace=True)
    df_weather.drop_duplicates(inplace=True)
    # 인덱스 정리
    df_weather.reset_index(drop=True, inplace=True)

In [11]:
for df in weather_list:
    preprocess_weather(df)
df_weather = pd.concat(weather_list)

NameError: name 'rwt_list' is not defined

In [12]:
df_weather = pd.concat(weather_list)

In [13]:
df_weather.describe()

Unnamed: 0,year,month,day,rain,wind,temperature
count,13947.0,13947.0,13947.0,10303.0,13947.0,13947.0
mean,2018.258407,6.276762,15.697426,-20.47845,3.312765,16.499339
std,1.613987,3.414373,8.809565,78.40176,5.434442,10.954919
min,2015.0,1.0,1.0,-999.0,-156.116667,-26.175
25%,2017.0,3.0,8.0,-6.701488,2.03125,9.912083
50%,2018.0,6.0,16.0,1.035,2.625,19.35625
75%,2020.0,9.0,23.0,3.67625,5.434846,25.244375
max,2021.0,12.0,31.0,915.0,15.125,33.275


#### 확인필요
- outlier 찾아내기 > 값이 너무 크거나 작은 경우 제외
- 날짜별로 확인 후 비어있는 값 채워넣기 (전/다음날 이용)
    - 13947개 데이터 중 rain, wind, temperature 갯수 보면 몇개 비어있는지 확인 가능
- 합치기..
- 강수량의 경우 NaN 값이 너무 많음
    - 위 전처리들을 한 후에도 많다면 사용불가
    - 위 전처리 후에는 적다면 전날/다음날 데이터 기반으로 채워넣기 진행

## Add to Training Data

In [17]:
df_train = pd.merge(df_train, df_weather, how='left', on=['year', 'month', 'day', 'CTRY_1'])

In [None]:
pd.set_option("display.max_columns", None)
df_train.sample(5)

---

# Salinity

## Import Data

In [18]:
# df_salinity = pd.read_csv(os.path.join(root, 'salinity_raw.csv'))

## Drop Columns & Rows

In [19]:
# df_salinity.drop(df_salinity.columns[2], inplace=True, axis=1)
# df_salinity = df_salinity[(2015 <= df_salinity['obs_year']) & (df_salinity['obs_year'] <= 2021)]

2020, 2021 데이터의 부재로 인해 보류

---

# Oil

## Import Data

In [30]:
df_oil = pd.read_csv(os.path.join(root, 'oil_raw.csv'))
df_oil_dubai = pd.read_csv(os.path.join(root, 'oil_raw_dubai.csv'))
df_oil_brent = pd.read_csv(os.path.join(root, 'oil_raw_brent.csv'))

## Preprocess

In [31]:
def preprocess_oil(df):
    df['year'] = df['날짜'].str[:4].astype('int')
    df['month'] = df['날짜'].str[6:8].astype('int')
    df['day'] = df['날짜'].str[10:12].astype('int')
    df['date'] = pd.to_datetime(df['날짜'].str[:4] + df['날짜'].str[6:8] + df['날짜'].str[10:12])
    df = df.query('date.dt.dayofweek == 0')
    df.drop(columns = ['날짜', '오픈', '고가', '저가', '거래량', '변동 %'], inplace=True, axis=1)
    df.sort_index(ascending=False)
    return df

In [32]:
df_oil = preprocess_oil(df_oil)
df_oil_dubai = preprocess_oil(df_oil_dubai)
df_oil_brent = preprocess_oil(df_oil_brent)

## Check

In [33]:
print("oil:", len(df_oil), "\ndubai:", len(df_oil_dubai), "\nbrent:", len(df_oil_brent))

oil: 288 
dubai: 260 
brent: 285


oil과 비교하였을 때 dubai의 경우 28개, brent의 경우 3개의 데이터가 적다  
따라서 df_oil 데이터를 사용하도록 한다

## Add to Training Data

In [34]:
df_train = pd.merge(df_train, df_oil, how='left', on=['year', 'month', 'day'])

In [None]:
pd.set_option("display.max_columns", None)
df_train.sample(5)

---

# Korea Weather

## Import Data

In [35]:
df_weather_kr = pd.read_csv(os.path.join(root, 'weather_raw_korea.csv'), encoding='euc-kr')

## Group
일자 별로 평균 구하기

In [36]:
df_weather_kr['wind_kr'] = df_weather_kr[['일시', '평균 풍속(m/s)']].groupby(['일시']).transform('mean')
df_weather_kr['temperature_kr'] = df_weather_kr[['일시', '평균 기온(°C)']].groupby(['일시']).transform('mean')
df_weather_kr['water_temp_kr'] = df_weather_kr[['일시', '평균 수온(°C)']].groupby(['일시']).transform('mean')

## Preprocess Date

In [37]:
df_weather_kr['year'] = df_weather_kr['일시'].str[:4].astype('int')
df_weather_kr['month'] = df_weather_kr['일시'].str[5:7].astype('int')
df_weather_kr['day'] = df_weather_kr['일시'].str[8:].astype('int')

## Drop Column
어차피 전부 해안가 대한민국이라 지점은 필요없다  
사용한 컬럼은 제거한다

In [38]:
drop = ['지점', '일시', '평균 풍속(m/s)', '평균 기온(°C)', '평균 수온(°C)']

In [39]:
df_weather_kr.drop(columns=drop, inplace=True, axis=1)

## Add to Training Data

In [40]:
df_train = pd.merge(df_train, df_weather_kr, how='left', on=['year', 'month', 'day'])

In [None]:
pd.set_option("display.max_columns", None)
df_train.sample(5)

---

# Final

## onehot encoding

In [41]:
one_hot = ['CTRY_1', 'CTRY_2', 'P_PURPOSE', 'CATEGORY_1', 'CATEGORY_2', 'P_NAME']

In [42]:
df_train = pd.get_dummies(df_train, columns=one_hot)

## drop columns

In [43]:
drop = ['REG_DATE', 'P_TYPE', 'P_IMPORT_TYPE']

In [44]:
df_train.drop(columns = drop, inplace=True)

In [51]:
pd.set_option("display.max_columns", None)
df_train.sample(5)

Unnamed: 0,P_PRICE,year,month,day,VALUE_COUNT,알,지느러미,내장,머리,개아지살,턱살,눈살,목살,다리,염장,슬라이스(S),한쪽껍질붙은,냉동,머리_외화획득용,필렛(F),집게다리,캐비아대용,볼살,건조,꼬리_외화획득용,머리살,살,포장횟감,곤이,줄기,창난,절단,훈제,외투막,냉장,횟감,동체,자숙,껍질,활,난포선,간,rain_x,wind_x,temperature_x,rain_y,wind_y,temperature_y,종가,date,wind_kr,temperature_kr,water_temp_kr,CTRY_1_가나,CTRY_1_감비아,CTRY_1_그리스,CTRY_1_그린란드,CTRY_1_기니,CTRY_1_기니비사우,CTRY_1_나미비아,CTRY_1_남아프리카 공화국,CTRY_1_네덜란드,CTRY_1_노르웨이,CTRY_1_뉴질랜드,CTRY_1_니카라과,CTRY_1_대만,CTRY_1_대한민국,CTRY_1_라이베리아,CTRY_1_라트비아,CTRY_1_러시아,CTRY_1_루마니아,CTRY_1_리비아,CTRY_1_마다가스카르,CTRY_1_말레이시아,CTRY_1_멕시코,CTRY_1_모로코,CTRY_1_모리타니,CTRY_1_모잠비크,CTRY_1_몰타,CTRY_1_미국,CTRY_1_미얀마,CTRY_1_미크로네시아 연방,CTRY_1_바누아투,CTRY_1_바레인,CTRY_1_방글라데시,CTRY_1_베네수엘라,CTRY_1_베트남,CTRY_1_불가리아,CTRY_1_브라질,CTRY_1_사우디아라비아,CTRY_1_사이프러스,CTRY_1_세네갈,CTRY_1_세이셸,CTRY_1_세인트빈센트 그레나딘,CTRY_1_소말리아,CTRY_1_수리남,CTRY_1_스리랑카,CTRY_1_스페인,CTRY_1_시에라리온,CTRY_1_싱가포르,CTRY_1_아랍에미리트,CTRY_1_아르헨티나,CTRY_1_아이슬란드,CTRY_1_아일랜드,CTRY_1_알제리,CTRY_1_앙골라,CTRY_1_에스토니아,CTRY_1_에콰도르,CTRY_1_영국,CTRY_1_오만,CTRY_1_우루과이,CTRY_1_우크라이나,CTRY_1_이란,CTRY_1_이집트,CTRY_1_이탈리아,CTRY_1_인도,CTRY_1_인도네시아,CTRY_1_일본,CTRY_1_중국,CTRY_1_칠레,CTRY_1_캐나다,CTRY_1_콜롬비아,CTRY_1_쿠바,CTRY_1_쿡 제도,CTRY_1_크로아티아,CTRY_1_키리바시,CTRY_1_태국,CTRY_1_터키,CTRY_1_튀니지,CTRY_1_파나마,CTRY_1_파키스탄,CTRY_1_파푸아뉴기니,CTRY_1_팔라우,CTRY_1_페루,CTRY_1_포르투갈,CTRY_1_포클랜드 제도,CTRY_1_프랑스,CTRY_1_피지,CTRY_1_필리핀,CTRY_1_호주,CTRY_2_가나,CTRY_2_감비아,CTRY_2_그리스,CTRY_2_기니,CTRY_2_기니비사우,CTRY_2_기타(ZZ),CTRY_2_나미비아,CTRY_2_남아프리카 공화국,CTRY_2_네덜란드,CTRY_2_노르웨이,CTRY_2_뉴질랜드,CTRY_2_대만,CTRY_2_덴마크,CTRY_2_독일,CTRY_2_라이베리아,CTRY_2_러시아,CTRY_2_루마니아,CTRY_2_마다가스카르,CTRY_2_말레이시아,CTRY_2_멕시코,CTRY_2_모로코,CTRY_2_모리셔스,CTRY_2_모리타니,CTRY_2_모잠비크,CTRY_2_몰타,CTRY_2_미국,CTRY_2_미얀마,CTRY_2_미크로네시아 연방,CTRY_2_바누아투,CTRY_2_바레인,CTRY_2_방글라데시,CTRY_2_베네수엘라,CTRY_2_베트남,CTRY_2_벨리즈,CTRY_2_북한,CTRY_2_불가리아,CTRY_2_브라질,CTRY_2_사모아,CTRY_2_사우디아라비아,CTRY_2_세네갈,CTRY_2_세이셸,CTRY_2_솔로몬 제도,CTRY_2_수리남,CTRY_2_스리랑카,CTRY_2_스웨덴,CTRY_2_스페인,CTRY_2_시에라리온,CTRY_2_싱가포르,CTRY_2_아랍에미리트,CTRY_2_아르헨티나,CTRY_2_아이슬란드,CTRY_2_아일랜드,CTRY_2_앙골라,CTRY_2_에콰도르,CTRY_2_영국,CTRY_2_오만,CTRY_2_우루과이,CTRY_2_우크라이나,CTRY_2_이란,CTRY_2_이집트,CTRY_2_이탈리아,CTRY_2_인도,CTRY_2_인도네시아,CTRY_2_일본,CTRY_2_중국,CTRY_2_지부티,CTRY_2_칠레,CTRY_2_캐나다,CTRY_2_콜롬비아,CTRY_2_쿠바,CTRY_2_크로아티아,CTRY_2_키리바시,CTRY_2_태국,CTRY_2_터키,CTRY_2_투발루,CTRY_2_튀니지,CTRY_2_파나마,CTRY_2_파키스탄,CTRY_2_파푸아뉴기니,CTRY_2_팔라우,CTRY_2_페루,CTRY_2_포르투갈,CTRY_2_포클랜드 제도,CTRY_2_프랑스,CTRY_2_피지,CTRY_2_필리핀,CTRY_2_호주,CTRY_2_홍콩,P_PURPOSE_반송품(기타),P_PURPOSE_외화획득용 원료,P_PURPOSE_자사제품제조용,P_PURPOSE_판매용,P_PURPOSE_합작,CATEGORY_1_갑각류,CATEGORY_1_기타 수입식품,CATEGORY_1_알 곤이류,CATEGORY_1_어류,CATEGORY_1_연체류 해물모듬,CATEGORY_1_젓갈류 해조류 해파리,CATEGORY_1_패류 멍게류,CATEGORY_2_가리비,CATEGORY_2_가물치,CATEGORY_2_가사리,CATEGORY_2_가오리,CATEGORY_2_가자미,CATEGORY_2_가재 랍스타,CATEGORY_2_갈치,CATEGORY_2_갑오징어,CATEGORY_2_개복치,CATEGORY_2_개불,CATEGORY_2_게,CATEGORY_2_고동,CATEGORY_2_고등어,CATEGORY_2_고시래기,CATEGORY_2_골뱅이,CATEGORY_2_광어 넙치,CATEGORY_2_김,CATEGORY_2_꼬막,CATEGORY_2_꼴뚜기,CATEGORY_2_꽁치 학꽁치,CATEGORY_2_꽁치 확꽁치,CATEGORY_2_낙지,CATEGORY_2_날치알,CATEGORY_2_남극빙어,CATEGORY_2_노래미,CATEGORY_2_농어,CATEGORY_2_능성어 붉바리 바리,CATEGORY_2_다시마,CATEGORY_2_달고기,CATEGORY_2_대구,CATEGORY_2_대구알,CATEGORY_2_도다리,CATEGORY_2_도미 감성돔 돔류,CATEGORY_2_망둑어,CATEGORY_2_멍게,CATEGORY_2_메기 동자개,CATEGORY_2_메로,CATEGORY_2_멸치,CATEGORY_2_명란(명태알),CATEGORY_2_명태,CATEGORY_2_문어,CATEGORY_2_물메기(곰치),CATEGORY_2_미꾸라지,CATEGORY_2_미역,CATEGORY_2_민물붕어,CATEGORY_2_민어 점성어,CATEGORY_2_밀크피시,CATEGORY_2_바지락,CATEGORY_2_방어,CATEGORY_2_밴댕이,CATEGORY_2_버터플라이 킹피쉬,CATEGORY_2_벤자리 알롱이,CATEGORY_2_병어,CATEGORY_2_보리멸,CATEGORY_2_복어,CATEGORY_2_부세,CATEGORY_2_붉평치(만다이 꽃돔),CATEGORY_2_삼치,CATEGORY_2_상어 고래,CATEGORY_2_새우,CATEGORY_2_샛돔류알,CATEGORY_2_서대 박대 페루다,CATEGORY_2_성게알,CATEGORY_2_소라,CATEGORY_2_송어,CATEGORY_2_쏘가리,CATEGORY_2_아귀,CATEGORY_2_양미리 정어리,CATEGORY_2_양태,CATEGORY_2_어류 기타,CATEGORY_2_연어,CATEGORY_2_연어알,CATEGORY_2_열빙어(시샤모),CATEGORY_2_열빙어(시샤모)알,CATEGORY_2_오징어,CATEGORY_2_옥돔,CATEGORY_2_우럭 볼락,CATEGORY_2_우렁 다슬기,CATEGORY_2_은민대구알,CATEGORY_2_임연수,CATEGORY_2_잉어,CATEGORY_2_자라,CATEGORY_2_장어,CATEGORY_2_재첩,CATEGORY_2_적어 눈볼대,CATEGORY_2_전갱기 매가리,CATEGORY_2_전갱이 매가리,CATEGORY_2_전복,CATEGORY_2_전어,CATEGORY_2_조개,CATEGORY_2_조개 백합 대합,CATEGORY_2_조기 보구치 강다리,CATEGORY_2_조기 보구치 강다리.1,CATEGORY_2_준치,CATEGORY_2_줄비늘치,CATEGORY_2_쥐치,CATEGORY_2_쭈꾸미,CATEGORY_2_참치 새치류,CATEGORY_2_참치 새치류.1,CATEGORY_2_청어,CATEGORY_2_청어알,CATEGORY_2_톳,CATEGORY_2_틸라피아(역돔),CATEGORY_2_팡가시우스(홍메기),CATEGORY_2_해물모둠,CATEGORY_2_해삼,CATEGORY_2_해초,CATEGORY_2_해파리,CATEGORY_2_호끼류,CATEGORY_2_호키(새꼬리민태)알,CATEGORY_2_호키류,CATEGORY_2_홍어,CATEGORY_2_홍합,P_NAME_PANGASIUS메기,P_NAME_가다랑어,P_NAME_가라지,P_NAME_가리비,P_NAME_가무락조개,P_NAME_가물치,P_NAME_가시배새우,P_NAME_가시투성왕게,P_NAME_가오리,P_NAME_가이석태속,P_NAME_가자미,P_NAME_각시가자미,P_NAME_갈치,P_NAME_감성돔,P_NAME_갑오징어,P_NAME_강담돔,P_NAME_강도다리,P_NAME_개량조개,P_NAME_개복치,P_NAME_개불,P_NAME_개조개,P_NAME_갯고둥,P_NAME_갯장어,P_NAME_검복,P_NAME_검정가자미,P_NAME_검정볼락,P_NAME_게,P_NAME_고등어,P_NAME_골뱅이,P_NAME_곱사연어,P_NAME_곱상어,P_NAME_구라미,P_NAME_귀상어,P_NAME_금눈돔,P_NAME_금색돔,P_NAME_기름치,P_NAME_기타민어류,P_NAME_기타병어류,P_NAME_긴가이석태,P_NAME_김,P_NAME_까지가자미,P_NAME_까치복,P_NAME_까칠복,P_NAME_깜장북방대합,P_NAME_꼬리검정민태,P_NAME_꼬막,P_NAME_꼬시래기,P_NAME_꽁치,P_NAME_꽃게,P_NAME_낙지,P_NAME_날개다랑어,P_NAME_날치알,P_NAME_남방대구,P_NAME_남방참다랑어,P_NAME_넙치,P_NAME_녹새치,P_NAME_논고둥,P_NAME_농어,P_NAME_눈다랑어,P_NAME_능성어,P_NAME_다슬기,P_NAME_다시마,P_NAME_달고기,P_NAME_닭새우,P_NAME_대게,P_NAME_대구,P_NAME_대구알,P_NAME_대두이석태,P_NAME_대서양꼬마민어,P_NAME_대서양먹장어,P_NAME_대서양붉은볼락,P_NAME_대서양연어,P_NAME_대서양조기속,P_NAME_대서양참다랑어,P_NAME_던지네스게,P_NAME_도화새우,P_NAME_돌가사리,P_NAME_돌가자미,P_NAME_돌돔,P_NAME_돔,P_NAME_동갈돗돔,P_NAME_동갈횟대,P_NAME_동등이석태,P_NAME_동자개,P_NAME_동죽,P_NAME_돛새치,P_NAME_두점박이민꽃게,P_NAME_드렁허리,P_NAME_등목어,P_NAME_마설가자미,P_NAME_마소치가자미,P_NAME_맛조개,P_NAME_망둑어,P_NAME_매듭가자미,P_NAME_매미새우,P_NAME_먹볼락,P_NAME_먹장어,P_NAME_멍게,P_NAME_메기,P_NAME_멸치,P_NAME_명태,P_NAME_명태알,P_NAME_문어,P_NAME_물메기,P_NAME_미꾸라지,P_NAME_미역,P_NAME_민꽃게,P_NAME_민대구,P_NAME_민들조개,P_NAME_민물가재,P_NAME_민물새우,P_NAME_민밀복,P_NAME_민어,P_NAME_민태,P_NAME_밀크피시,P_NAME_바다가재,P_NAME_바닷가재,P_NAME_바라문디,P_NAME_바리,"P_NAME_바리,교잡종",P_NAME_바지락,P_NAME_밤색무늬조개,P_NAME_방어,P_NAME_백합,"P_NAME_백합,MERCENARIA MERCENARIA",P_NAME_밴댕이,P_NAME_뱀장어,P_NAME_버들붕어,P_NAME_버터플라이 킹피쉬,P_NAME_벤자리,P_NAME_벵에돔,P_NAME_병어,P_NAME_병치매가리,P_NAME_보리멸,P_NAME_볼락,P_NAME_부세,P_NAME_북방대합,P_NAME_북쪽분홍새우,P_NAME_붉돔,P_NAME_붉은대게,P_NAME_붉은메기,P_NAME_붉은이석태,P_NAME_붉평치,P_NAME_붕어,P_NAME_붕장어,P_NAME_블루화이팅,P_NAME_비너스백합,P_NAME_비단조개,P_NAME_뿔가자미,P_NAME_삼치,P_NAME_상어,P_NAME_새꼬리민태알,P_NAME_새꼬막,P_NAME_새뱅이,P_NAME_새우,P_NAME_새조개,P_NAME_샛돔,P_NAME_샛돔류알,P_NAME_서대,P_NAME_성게알,P_NAME_세네갈가이석태,P_NAME_소라,P_NAME_소주목탁가자미,P_NAME_송어,P_NAME_수조기,P_NAME_스피노잠,P_NAME_식용자라,P_NAME_실꼬리돔,P_NAME_쌍지붕어,P_NAME_쏘가리,P_NAME_아귀,P_NAME_아담스백합,P_NAME_아르헨티나붉은새우,P_NAME_양볼락,P_NAME_양초선홍치,P_NAME_양태,P_NAME_어름돔,P_NAME_얼룩볼락,P_NAME_연어,P_NAME_연어알,P_NAME_열빙어,P_NAME_열빙어알,P_NAME_영상가이석태,P_NAME_오징어,P_NAME_옥덩굴,P_NAME_옥돔,P_NAME_옥두어,P_NAME_왕게,P_NAME_왕게붙이,P_NAME_왕연어,P_NAME_우뭇가사리,P_NAME_위고둥,P_NAME_유럽물레고둥,P_NAME_은대구,P_NAME_은민대구,P_NAME_은민대구알,P_NAME_은밀복,P_NAME_은연어,P_NAME_은행게,P_NAME_이스라엘잉어,P_NAME_임연수어,P_NAME_잉어,P_NAME_자바리,P_NAME_자이언트그루퍼,P_NAME_자주복,P_NAME_장문볼락,P_NAME_장성베도라치,P_NAME_장수기름가자미,P_NAME_재첩,P_NAME_잿방어,P_NAME_적돔,P_NAME_적새우,P_NAME_전갱이,"P_NAME_전갱이,POMPANO",P_NAME_전복,P_NAME_전어,P_NAME_젓새우,P_NAME_정어리,P_NAME_조피볼락,P_NAME_주꾸미,P_NAME_주름백합,P_NAME_준치,P_NAME_줄민태,P_NAME_줄비늘치,P_NAME_쥐노래미,P_NAME_쥐돔,P_NAME_쥐치,P_NAME_진홍퉁돔,P_NAME_진환도상어,P_NAME_참게,P_NAME_참다랑어,P_NAME_참돔,P_NAME_참복,P_NAME_참조기,P_NAME_첨치가자미,P_NAME_청각,P_NAME_청대구,P_NAME_청상아리,P_NAME_청새리상어,P_NAME_청새치,P_NAME_청어,P_NAME_청어알,P_NAME_청회볼락,P_NAME_체장메기,P_NAME_칠성장어,P_NAME_코끼리조개,P_NAME_코드아이스피쉬,P_NAME_코토니,P_NAME_쿠자조기,P_NAME_큰구슬우렁이,P_NAME_큰민어,P_NAME_큰실말,P_NAME_큰징거미새우,P_NAME_키조개,P_NAME_태평양먹장어,P_NAME_털게,P_NAME_톱날꽃게,P_NAME_톳,P_NAME_틸라피아,P_NAME_파타고니아이빨고기,P_NAME_프로펠러조개,P_NAME_피뿔고둥,P_NAME_피조개,P_NAME_학공치,P_NAME_해물혼합,P_NAME_해삼,P_NAME_해파리,P_NAME_홍감펭,P_NAME_홍다리얼룩새우,P_NAME_홍민어,P_NAME_홍서대,P_NAME_홍어,"P_NAME_홍연어,Red salmon",P_NAME_홍합,P_NAME_화살오징어,P_NAME_황다랑어,P_NAME_황돔,P_NAME_황새치,P_NAME_황적퉁돔,P_NAME_회초리꼬리민태,P_NAME_흑기흉상어,P_NAME_흑밀복,P_NAME_흑점샛돔알,P_NAME_흑점줄전갱이,P_NAME_흰꼴뚜기,P_NAME_흰다리새우
377531,61.766234,2018,3,5,453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0.51,7.846919,17.220868,0.51,7.846919,17.220868,62.57,2018-03-05,9.694118,7.017647,9.288235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
130361,11.198062,2016,10,10,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,,,,,,,51.35,2016-10-10,5.888235,17.841176,21.7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
432708,3.073906,2018,6,18,1640,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,65.85,2018-06-18,2.552941,20.217647,20.011765,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
315260,1.88704,2017,10,30,246,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-57.505263,2.301989,9.130741,-57.505263,2.301989,9.130741,54.15,2017-10-30,8.541176,13.229412,19.252941,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
620691,3.052472,2019,7,15,418,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,59.58,2019-07-15,2.941176,22.488235,22.36875,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Save files

# Train

## Set Metric

In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression

# Function to print the MAE (Mean Absolute Error) score
def print_score(m : LinearRegression):
    res = ['mae train: ', mean_absolute_error(m.predict(X_train), y_train), 
           'mae val: ', mean_absolute_error(m.predict(X_valid), y_valid)]
    if hasattr(m, 'oob_score_'): res.append(m.oob_score_)
    print(res)

## Set Data

In [None]:
target = df_train['P_PRICE']
df_train.drop(columns = 'P_PRICE', inplace=True)

In [None]:
#Standardization
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()

# Function for splitting training and validation data
def split_vals(a, n : int): 
    return a[:n].copy(), a[n:].copy()

val_perc = 0.1 # % to use for validation set
n_valid = int(val_perc * 100000) 
n_trn = len(df_train)-n_valid

# Split data
raw_train, raw_valid = split_vals(df_train, n_trn)
X_train, X_valid = split_vals(df_train, n_trn)
y_train, y_valid = split_vals(target, n_trn)

X_train=sc.fit_transform(X_train)
X_valid=sc.transform(X_valid)

# Check dimensions of samples
print('Sample train shape: ', X_train.shape, 
      '\nSample target shape: ', y_train.shape, 
      '\nSample validation shape: ', X_valid.shape)

- metric 선정하기
- base model 선정 > 전처리 최소화 + linear regression model 로 정확도 구하기
- 모델마다 최적화하기
- 모델끼리 비교하기

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostRegressor

In [None]:
model = RandomForestRegressor(n_estimators=500,random_state=0)
model.fit(x_train,y_train)
cross = cross_val_score(model,x_test,y_test,cv = 5)

print('cross_tree',cross.mean())

In [None]:
from sklearn.svm import SVR
model = SVR()
model.fit(x_train,y_train)
cross_svr = cross_val_score(model,x_test,y_test,cv = 5)
print('cross_svr',cross_svr.mean())

In [None]:
from xgboost import XGBRegressor
model = XGBRegressor(n_estimators=500,learning_rate=0.05,max_depth=3)
model.fit(x_train,y_train)
cross_xgboost = cross_val_score(model,x_test,y_test,cv = 5)

print('cross_xgboost : ',cross_xgboost.mean())

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(x_train,y_train)

cross_linear = cross_val_score(model,x_test,y_test,cv = 5)
print(cross_linear)
print('cross_linear : ',cross_linear.mean())

In [None]:
from sklearn.neural_network import MLPRegressor

model = MLPRegressor().fit(x_train,y_train)
cross_MLP = cross_val_score(model,x_test,y_test,cv = 5)

print('croos_MLP : ',cross_MLP.mean())