In [None]:
# 구글 드라이브 연동을 위한 mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 필요한 라이브러리 import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, load_model

In [None]:
# test 데이터 불러오기
test = pd.read_csv('/data/round2_test.csv')

In [None]:
# P5를 예측하는 딥러닝 모델 불러오기
model = load_model('/result/p5_predict_model_final.h5')

In [None]:
# test의 P1~P4로 P5예측하기
p5_pred = model.predict(test[['P1','P2','P3','P4']])

In [None]:
# P5 예측 변수 추가
test['pred_p5'] = p5_pred

In [None]:
# Linear Regression을 사용해 P5를 예측하는 모델 불러오기
from joblib import dump, load
model = load('/result/model_lr_final.joblib')

In [None]:
# P5 예측
new_feat = model.predict(test[['P1','P2','P3','P4']])
new_feat

array([ 94.49604883, 159.64380412,  62.38459914, ...,  76.26509762,
       469.75580818,  78.44032398])

In [None]:
# 새로운 feature 1 행 추가

# P5 예측 값
test['new_feature'] = new_feat

# 새로운 feature 2 행 추가

# P4는 AX bidprice P2, P3는 bidfloor라고 가정.
# AX bidprice가 bidfloor들보다 더 클 때만 낙찰에 성공할 수 있다고 생각!

# AX bidprice가 bidfloor값들보다 클 때 1, bidfloor가 하나라도 bidprice보다 크다면 0
test['new_feature2'] = 0
test.loc[(test['P2'] > test['P4']) | (test['P3'] > test['P4']), 'new_feature2'] = 0
test.loc[(test['P2'] < test['P4']) & (test['P3'] < test['P4']), 'new_feature2'] = 1

# 새로운 feature 3, 4 행 추가

# 새로운 feature 2는 범주형 데이터이므로 수치형 데이터도 추가해주기 위해 feature 2 생성원리와 동일한 이유로
# feature 3, 4, 5, 6, 7 생성
test['P4 - P3'] = test['P4'] - test['P3']
test['P4 - P2'] = test['P4'] - test['P2']
test['P1 - P2'] = test['P1'] - test['P2']
test['P1 - P3'] = test['P1'] - test['P3']
test['P2 - P3'] = test['P2'] - test['P3']

In [None]:
# 대부분의 범주형 변수 제거
test.drop(columns=['Bid Index', '시각', '매체 ID', 'ADID', 'DSP ID', '애드유닛 ID', '노출 ID', 'SSP 입찰ID', 'DSP 입찰ID', 'AX 낙찰ID', 'WUID (웹 유저 ID)', 'OS 버전 ID', '국가코드 ID', '광고 응답 광고주 도메인'], inplace=True)

In [None]:
# 남은 범주형 변수 가변수화 처리
test_d = pd.get_dummies(data=test, columns = ['ADID 타입', '플랫폼', 'OS 종류', '사이즈 ID'], drop_first=True)

In [None]:
test_d.drop(columns=['광고 응답 소재 카테고리'], inplace=True)

In [None]:
test_d

Unnamed: 0,환율,P1,P2,P3,P4,pred_p5,new_feature,new_feature2,P4 - P3,P4 - P2,...,P1 - P3,P2 - P3,ADID 타입_2,ADID 타입_5,ADID 타입_7,플랫폼_2,플랫폼_3,OS 종류_2,사이즈 ID_2,사이즈 ID_3
0,1228.0,178.305600,49.734000,36.84,106.983360,99.392052,94.496049,1,70.143360,57.249360,...,141.465600,12.894000,0,0,0,0,0,0,0,0
1,1228.0,291.772800,120.000160,122.80,175.063680,155.618088,159.643804,1,52.263680,55.063520,...,168.972800,-2.799840,0,1,0,0,1,0,0,0
2,1228.0,113.467200,49.734000,36.84,68.080320,63.752781,62.384599,1,31.240320,18.346320,...,76.627200,12.894000,0,0,0,0,0,0,0,0
3,1228.0,504.436025,250.001152,245.60,302.661615,269.483887,279.395192,1,57.061615,52.660463,...,258.836025,4.401152,0,0,0,0,1,0,0,1
4,1228.0,72.935758,49.734000,36.84,43.761455,39.539062,42.311258,0,6.921455,-5.972545,...,36.095758,12.894000,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
624137,1228.0,729.432000,240.000320,368.40,437.659200,393.687012,397.505046,1,69.259200,197.658880,...,361.032000,-128.399680,0,1,0,0,1,0,0,0
624138,1228.0,226.934400,120.000160,122.80,136.160640,130.547516,127.532354,1,13.360640,16.160480,...,104.134400,-2.799840,0,1,0,0,1,0,0,0
624139,1228.0,141.494253,49.734000,36.84,84.896552,80.423973,76.265098,1,48.056552,35.162552,...,104.654253,12.894000,0,0,0,0,0,0,0,0
624140,1228.0,875.318400,240.000320,368.40,525.191040,458.013550,469.755808,1,156.791040,285.190720,...,506.918400,-128.399680,0,1,0,0,1,0,0,0


In [None]:
# train data로 학습한 model 불러오기
from joblib import dump, load
model_dt = load('/result/model_dt_final_final.joblib')

In [None]:
# test data에 대해 낙찰여부 예측값 추출
pred = model_dt.predict(test_d)

In [None]:
pred

array([1, 1, 0, ..., 0, 1, 1])

In [None]:
# 제출 파일 구조 불러오기
result = pd.read_csv('/data/(new)과제2_본선_예측결과_팀명 (1).csv')
result

Unnamed: 0,Bid Index,winning
0,bid0000000,
1,bid0000001,
2,bid0000002,
3,bid0000003,
4,bid0000004,
...,...,...
624137,bid0624137,
624138,bid0624138,
624139,bid0624139,
624140,bid0624140,


In [None]:
# 낙찰여부 값 대입
result['winning'] = pred
result

Unnamed: 0,Bid Index,winning
0,bid0000000,1
1,bid0000001,1
2,bid0000002,0
3,bid0000003,0
4,bid0000004,0
...,...,...
624137,bid0624137,1
624138,bid0624138,1
624139,bid0624139,0
624140,bid0624140,1


In [None]:
result.to_csv("/result/과제2_본선_예측결과_final_J'S.csv", index=False)