In [1]:
import numpy as np
import os
import pandas as pd
pd.set_option('display.max_columns',None)
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
ProgressBar().register()

import matplotlib.pyplot as plt 

plt.rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False

import seaborn as sns 
from tqdm import tqdm
tqdm.pandas()

import datetime
import ast
import math
import gc

from pandarallel import pandarallel
pandarallel.initialize(nb_workers=7, progress_bar=True)

from sklearn.preprocessing import MinMaxScaler

def say(comment):
  os.system(f'say "{comment}"')

# warming 제거
import warnings
warnings.filterwarnings('ignore')
from branca.colormap import linear

# notebook을 실행한 브라우저에서 바로 그림을 볼 수 있게 해주는 것
%matplotlib inline       

from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus']= False

INFO: Pandarallel will run on 7 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [2]:
df = pd.read_pickle('0216_0357_skill2pgm_formodel_ver1.pkl')

In [3]:
df['60_norm'] = df['인입콜'] / df['duration'] * 60

In [4]:
df.describe()

Unnamed: 0,인입콜,year,duration,판매가평균,price_max_all,price_min_all,prod_num,한정표현_num,주말/공휴일,sh_num,month_sin,month_cos,hour_sin,hour_cos,시차변수,week,price_max_top3,price_min_top3,price1,price2,price3,score1,score2,score3,60_norm
count,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,30949.0,34051.0,34051.0,34051.0,34051.0,33736.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0,34051.0
mean,540.260374,1.523421,60.60565,997864.3,1106457.0,920921.0,2.326833,0.347684,0.183196,1.740315,-0.01344158,0.004026106,0.0345938,-0.01774256,7646.900018,27.055828,1042208.0,944016.6,991097.3,989296.3,989747.5,0.048288,0.047106,0.045639,464.93689
std,866.401881,0.499459,26.819445,3674586.0,3726481.0,3663482.0,3.772354,0.533538,0.386833,0.45361,0.7044509,0.7096347,0.7260723,0.6865397,2040.914712,15.089268,3701406.0,3677125.0,3687755.0,3687387.0,3687929.0,0.052206,0.050315,0.047085,580.352054
min,0.0,1.0,10.0,16900.0,16900.0,9000.0,1.0,0.0,0.0,1.0,-1.0,-1.0,-1.0,-1.0,21.6,1.0,15000.0,9000.0,9900.0,11000.0,9000.0,0.0,0.0,0.0,0.0
25%,80.0,1.0,60.0,62425.0,69000.0,59800.0,1.0,0.0,0.0,1.0,-0.8660254,-0.5,-0.7071068,-0.7071068,6196.4,14.0,64900.0,59900.0,59900.0,59900.0,59900.0,0.006587,0.006587,0.006587,90.0
50%,189.0,2.0,60.0,128000.0,148000.0,99000.0,1.0,0.0,0.0,2.0,-2.449294e-16,6.123234000000001e-17,1.224647e-16,-1.83697e-16,7398.2,27.0,134000.0,99000.0,119000.0,119000.0,118000.0,0.033105,0.033105,0.033105,201.428571
75%,797.0,2.0,60.0,759000.0,865800.0,399000.0,2.0,1.0,0.0,2.0,0.5,0.8660254,0.7071068,0.7071068,8936.6,40.0,729000.0,489500.0,657000.0,649000.0,680000.0,0.052381,0.052381,0.047506,744.428571
max,75228.0,2.0,240.0,68975000.0,68975000.0,68975000.0,66.0,4.0,1.0,5.0,1.0,1.0,1.0,1.0,16647.6,53.0,68975000.0,68975000.0,68975000.0,68975000.0,68975000.0,0.366812,0.366812,0.366812,36109.44


In [10]:
new_df = df[df['60_norm']<2000].drop('60_norm',axis=1)

In [12]:
new_df.describe()

Unnamed: 0,인입콜,year,duration,판매가평균,price_max_all,price_min_all,prod_num,한정표현_num,주말/공휴일,sh_num,month_sin,month_cos,hour_sin,hour_cos,시차변수,week,price_max_top3,price_min_top3,price1,price2,price3,score1,score2,score3
count,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0,30445.0,33545.0,33545.0,33545.0,33545.0,33236.0,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0,33545.0
mean,498.266895,1.527441,60.278253,1011210.0,1120053.0,933773.0,2.311432,0.341511,0.183872,1.738315,-0.01469249,0.002255408,0.03380154,-0.01064658,7636.668745,27.066865,1056004.0,957091.6,1004567.0,1002755.0,1003143.0,0.047272,0.04618,0.045001
std,679.83928,0.499254,26.586659,3700530.0,3752375.0,3689488.0,3.76309,0.529638,0.387386,0.453173,0.7046257,0.7094445,0.7256974,0.6871224,2035.036911,15.063934,3727448.0,3703181.0,3713791.0,3713425.0,3713974.0,0.05081,0.049044,0.046731
min,0.0,1.0,10.0,16900.0,16900.0,9000.0,1.0,0.0,0.0,1.0,-1.0,-1.0,-1.0,-1.0,21.6,1.0,15000.0,9000.0,9900.0,11000.0,9000.0,0.0,0.0,0.0
25%,78.0,1.0,55.0,64000.0,69000.0,59800.0,1.0,0.0,0.0,1.0,-0.8660254,-0.5,-0.7071068,-0.7071068,6196.4,14.0,64900.0,59900.0,59900.0,59900.0,59900.0,0.006587,0.006587,0.006587
50%,184.0,2.0,60.0,129000.0,149000.0,99000.0,1.0,0.0,0.0,2.0,-2.449294e-16,-1.83697e-16,1.224647e-16,6.123234000000001e-17,7380.4,27.0,138000.0,108000.0,119900.0,119000.0,119000.0,0.033105,0.033105,0.033105
75%,746.0,2.0,60.0,780000.0,869000.0,459000.0,2.0,1.0,0.0,2.0,0.5,0.8660254,0.7071068,0.7071068,8895.2,40.0,759000.0,509000.0,699000.0,699000.0,699000.0,0.052381,0.047506,0.047506
max,6342.0,2.0,240.0,68975000.0,68975000.0,68975000.0,66.0,4.0,1.0,5.0,1.0,1.0,1.0,1.0,16647.6,53.0,68975000.0,68975000.0,68975000.0,68975000.0,68975000.0,0.366812,0.366812,0.366812
