In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, QuantileTransformer, PowerTransformer, RobustScaler, MinMaxScaler

from wrangle import wrangle_telco
import split_scale

In [2]:
df = wrangle_telco()
df.head(1)

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
0,0013-SMEOE,109.7,71,7904.25


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1695 entries, 0 to 1694
Data columns (total 4 columns):
customer_id        1695 non-null object
monthly_charges    1695 non-null float64
tenure             1695 non-null int64
total_charges      1695 non-null float64
dtypes: float64(2), int64(1), object(1)
memory usage: 53.1+ KB


# Validate split_my_data()

In [4]:
train, test = split_scale.split_my_data(df)

In [5]:
train.head(1)

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
1469,8661-BOYNW,84.4,72,6096.45


In [6]:
train.shape

(1186, 4)

In [7]:
test.head(1)

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
252,1494-EJZDW,20.15,10,220.8


In [8]:
test.shape

(509, 4)

# Define my X, independent variables, and y, dependent/target variable

In [9]:
X_train = train[['tenure']]
X_test = test[['tenure']]
y_train = [['total_charges']]
y_test = [['total_charges']]

In [10]:
X_train.head(1)

Unnamed: 0,tenure
1469,72


In [11]:
X_test.head(1)

Unnamed: 0,tenure
252,10


# Validate standard_scaler() function

In [13]:
scaler, X_train_scaled, X_test_scaled = split_scale.standard_scaler(X_train, X_test)


In [14]:
scaler

StandardScaler(copy=True, with_mean=True, with_std=True)

In [16]:
X_train_scaled.head()

Unnamed: 0,tenure
1469,0.838946
163,0.441745
392,0.044543
1546,0.838946
797,0.214772


In [17]:
X_test_scaled.head()

Unnamed: 0,tenure
252,-2.679126
632,0.328258
472,0.72546
1029,0.328258
910,0.782203


# Validate my scale_inverse() function

In [18]:
X_train_unscaled, X_test_unscaled = split_scale.scale_inverse(scaler, X_train_scaled, X_test_scaled)

In [19]:
X_train_unscaled.head()

Unnamed: 0,tenure
1469,72.0
163,65.0
392,58.0
1546,72.0
797,61.0


In [20]:
X_test_unscaled.head()

Unnamed: 0,tenure
252,10.0
632,63.0
472,70.0
1029,63.0
910,71.0


# Validate my uniform_scaler() function

In [21]:
df = wrangle_telco()

In [22]:
scaler, X_train_scaled, X_test_scaled = split_scale.uniform_scaler(X_train, X_test)

In [23]:
scaler

QuantileTransformer(copy=True, ignore_implicit_zeros=False, n_quantiles=100,
                    output_distribution='uniform', random_state=123,
                    subsample=100000)

In [24]:
X_train_scaled.head()

Unnamed: 0,tenure
1469,1.0
163,0.510101
392,0.373737
1546,1.0
797,0.424242


In [25]:
X_test_scaled.head()

Unnamed: 0,tenure
252,0.025253
632,0.469697
472,0.691919
1029,0.469697
910,0.757576


### I can peek at the distribution of the train_scaled and test_scaled if I want.

In [26]:
X_train_scaled.describe()

Unnamed: 0,tenure
count,1186.0
mean,0.519964
std,0.317644
min,0.0
25%,0.247475
50%,0.489899
75%,0.757576
max,1.0


In [27]:
X_test_scaled.describe()

Unnamed: 0,tenure
count,509.0
mean,0.507496
std,0.328644
min,0.0
25%,0.222222
50%,0.489899
75%,0.757576
max,1.0


# Validate my gaussian_scaler() function

In [29]:
X_train.head(1)

Unnamed: 0,tenure
1469,72


In [30]:
X_test.head(1)

Unnamed: 0,tenure
252,10


In [31]:
scaler, X_train_scaled, X_test_scaled = split_scale.gaussian_scaler(X_train, X_test)

In [32]:
scaler

PowerTransformer(copy=True, method='yeo-johnson', standardize=False)

In [33]:
X_train_scaled.head()

Unnamed: 0,tenure
1469,9950.70254
163,7856.438822
392,6040.584127
1546,9950.70254
797,6785.375569


In [34]:
X_test_scaled.head()

Unnamed: 0,tenure
252,117.379965
632,7309.661082
472,9323.346607
1029,7309.661082
910,9634.096705


# Validate my min_max_scaler() function

In [35]:
scaler, X_train_scaled, X_test_scaled = split_scale.min_max_scaler(X_train, X_test)

In [36]:
scaler

MinMaxScaler(copy=True, feature_range=(0, 1))

In [37]:
X_train_scaled.head()

Unnamed: 0,tenure
1469,1.0
163,0.901408
392,0.802817
1546,1.0
797,0.84507


In [38]:
X_test_scaled.head()

Unnamed: 0,tenure
252,0.126761
632,0.873239
472,0.971831
1029,0.873239
910,0.985915


# Validate my iqr_robust_scaler() function

In [39]:
scaler, X_train_scaled, X_test_scaled = split_scale.iqr_robust_scaler(X_train, X_test)

In [40]:
scaler

RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
             with_scaling=True)

In [41]:
X_train_scaled.head()

Unnamed: 0,tenure
1469,0.363636
163,0.045455
392,-0.272727
1546,0.363636
797,-0.136364


In [42]:
X_test_scaled.head()

Unnamed: 0,tenure
252,-2.454545
632,-0.045455
472,0.272727
1029,-0.045455
910,0.318182


# Looks Good!