In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from pydataset import data
import wrangle
import util
from sklearn.model_selection import train_test_split
from split_scale import split_my_data, standard_scaler, scale_inverse
from sklearn.preprocessing import StandardScaler, QuantileTransformer, PowerTransformer, RobustScaler, MinMaxScaler


In [2]:
telco = wrangle.wrangle_telco()

In [3]:
util.tell_me_about(telco)

DataFrame Shape:

(1685, 3)

Info about:

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1685 entries, 0 to 1694
Data columns (total 3 columns):
monthly_charges    1685 non-null float64
total_charges      1685 non-null float64
tenure             1685 non-null int64
dtypes: float64(2), int64(1)
memory usage: 52.7 KB
None

Describe:

       monthly_charges  total_charges      tenure
count      1685.000000    1685.000000  1685.00000
mean         60.872374    3728.933947    57.07181
std          34.712210    2571.252806    17.72913
min          18.400000      20.350000     1.00000
25%          24.050000    1278.800000    48.00000
50%          64.450000    3623.950000    64.00000
75%          90.550000    5999.850000    71.00000
max         118.750000    8672.450000    72.00000

Preview:

   monthly_charges  total_charges  tenure
0           109.70        7904.25      71
1            84.65        5377.80      63
2            90.45        5957.90      65
3            45.20        2460.55 

In [None]:
def split_my_data(df, train_ratio=.8, seed=123):
    """Takes in a df and returns a test
       and train set
    """
    train, test = train_test_split(df, train_size=train_ratio, random_state=seed)
    return train, test

In [4]:
train, test = split_my_data(telco)
train.head()

Unnamed: 0,monthly_charges,total_charges,tenure
119,75.5,5212.65,70
1424,20.3,1079.05,55
385,109.05,7108.2,65
1140,98.3,6859.5,70
1504,116.25,8564.75,71


In [6]:
tell_me_about(train)

DataFrame Shape:

(1348, 3)

Info about:

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1348 entries, 119 to 1544
Data columns (total 3 columns):
monthly_charges    1348 non-null float64
total_charges      1348 non-null float64
tenure             1348 non-null int64
dtypes: float64(2), int64(1)
memory usage: 42.1 KB
None

Describe:

       monthly_charges  total_charges       tenure
count      1348.000000    1348.000000  1348.000000
mean         60.921180    3740.536907    57.277448
std          34.756867    2571.617986    17.448668
min          18.400000      20.350000     1.000000
25%          23.887500    1277.275000    49.000000
50%          64.400000    3734.700000    64.000000
75%          90.687500    5987.675000    71.000000
max         118.750000    8672.450000    72.000000

Preview:

      monthly_charges  total_charges  tenure
119             75.50        5212.65      70
1424            20.30        1079.05      55
385            109.05        7108.20      65
1140       

In [7]:
scaler, scaled_train, scaled_test = standard_scaler(train, test)
print(scaled_train.head())
print(scaled_test.head())

      monthly_charges  total_charges    tenure
119          0.419607       0.572659  0.729412
1424        -1.169158      -1.035331 -0.130571
385          1.385242       1.310036  0.442751
1140         1.075836       1.213291  0.729412
1504         1.592472       1.876641  0.786745
      monthly_charges  total_charges    tenure
305         -1.163401      -0.870704  0.844077
452          1.450002       1.488589  0.557416
917          1.389560       1.287299  0.328087
1421        -1.193622      -0.978302  0.385419
1557        -1.059786      -1.226585 -1.907870


In [8]:
train, test = scale_inverse(scaler, scaled_train, scaled_test)
print(train.head())
print(test.head())

      monthly_charges  total_charges  tenure
119             75.50        5212.65    70.0
1424            20.30        1079.05    55.0
385            109.05        7108.20    65.0
1140            98.30        6859.50    70.0
1504           116.25        8564.75    71.0
      monthly_charges  total_charges  tenure
305             20.50        1502.25    72.0
452            111.30        7567.20    67.0
917            109.20        7049.75    63.0
1421            19.45        1225.65    64.0
1557            24.10         587.40    24.0
