In [1]:
from sklearn.preprocessing import LabelEncoder

In [2]:
items = ['TV', '냉장고', '전자레인지', '컴퓨터', '선풍기', '선풍기', '믹서', '건조기']

encoder = LabelEncoder()
encoder.fit(items)
labels = encoder.transform(items)
labels

array([0, 2, 5, 6, 4, 4, 3, 1])

In [3]:
encoder.classes_

array(['TV', '건조기', '냉장고', '믹서', '선풍기', '전자레인지', '컴퓨터'], dtype='<U5')

In [4]:
encoder.inverse_transform([0, 2, 5, 6, 4, 4, 3, 1])

array(['TV', '냉장고', '전자레인지', '컴퓨터', '선풍기', '선풍기', '믹서', '건조기'],
      dtype='<U5')

In [5]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [6]:
labels

array([0, 2, 5, 6, 4, 4, 3, 1])

In [8]:
labels = labels.reshape(-1, 1)
labels

array([[0],
       [2],
       [5],
       [6],
       [4],
       [4],
       [3],
       [1]])

In [9]:
oh_encoder = OneHotEncoder()
oh_encoder.fit(labels)
oh_labels = oh_encoder.transform(labels)

In [10]:
oh_labels.toarray()

array([[1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0.]])

In [11]:
oh_labels.shape

(8, 7)

In [12]:
import pandas as pd

In [17]:
df = pd.DataFrame(data=items, columns=['item'])
df

Unnamed: 0,item
0,TV
1,냉장고
2,전자레인지
3,컴퓨터
4,선풍기
5,선풍기
6,믹서
7,건조기


In [18]:
pd.get_dummies(df)

Unnamed: 0,item_TV,item_건조기,item_냉장고,item_믹서,item_선풍기,item_전자레인지,item_컴퓨터
0,1,0,0,0,0,0,0
1,0,0,1,0,0,0,0
2,0,0,0,0,0,1,0
3,0,0,0,0,0,0,1
4,0,0,0,0,1,0,0
5,0,0,0,0,1,0,0
6,0,0,0,1,0,0,0
7,0,1,0,0,0,0,0


In [19]:
from sklearn.datasets import load_iris

iris = load_iris()
iris_data = iris.data
df = pd.DataFrame(data=iris_data, columns=iris.feature_names)
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [20]:
df.mean()

sepal length (cm)    5.843333
sepal width (cm)     3.057333
petal length (cm)    3.758000
petal width (cm)     1.199333
dtype: float64

In [21]:
df.var()

sepal length (cm)    0.685694
sepal width (cm)     0.189979
petal length (cm)    3.116278
petal width (cm)     0.581006
dtype: float64

In [22]:
from sklearn.preprocessing import StandardScaler

In [23]:
scaler = StandardScaler()
scaler.fit(df)
iris_scaled = scaler.transform(df)

In [24]:
df_scaled = pd.DataFrame(iris_scaled, columns=iris.feature_names)
df_scaled

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444
...,...,...,...,...
145,1.038005,-0.131979,0.819596,1.448832
146,0.553333,-1.282963,0.705921,0.922303
147,0.795669,-0.131979,0.819596,1.053935
148,0.432165,0.788808,0.933271,1.448832


In [25]:
df_scaled.mean()

sepal length (cm)   -1.690315e-15
sepal width (cm)    -1.842970e-15
petal length (cm)   -1.698641e-15
petal width (cm)    -1.409243e-15
dtype: float64

In [26]:
df_scaled.var()

sepal length (cm)    1.006711
sepal width (cm)     1.006711
petal length (cm)    1.006711
petal width (cm)     1.006711
dtype: float64

In [27]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(df)
scaled = scaler.transform(df)

In [28]:
df_scaled = pd.DataFrame(data=scaled, columns=iris.feature_names)
df_scaled.min()

sepal length (cm)    0.0
sepal width (cm)     0.0
petal length (cm)    0.0
petal width (cm)     0.0
dtype: float64

In [29]:
df_scaled.max()

sepal length (cm)    1.0
sepal width (cm)     1.0
petal length (cm)    1.0
petal width (cm)     1.0
dtype: float64

In [30]:
train = np.arange(0, 11).reshape(-1, 1)
test = np.arange(0, 6).reshape(-1, 1)

In [33]:
scaler = MinMaxScaler()
scaler.fit(train)
scaled = scaler.transform(train)
train.reshape(-1)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [34]:
scaled.reshape(-1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [35]:
scaler.fit(test)
test_scaled = scaler.transform(test)
test.reshape(-1)

array([0, 1, 2, 3, 4, 5])

In [36]:
test_scaled.reshape(-1)

array([0. , 0.2, 0.4, 0.6, 0.8, 1. ])

In [37]:
## 모범 스케일링

scaler = MinMaxScaler()
scaler.fit(train)
scaled = scaler.transform(train)
train.reshape(-1)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [38]:
scaled.reshape(-1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])

In [39]:
test_scaled = scaler.transform(test)
test.reshape(-1)

array([0, 1, 2, 3, 4, 5])

In [40]:
test_scaled.reshape(-1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5])