<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Acquire-and-Prep-Data-from-Codeup-Database" data-toc-modified-id="Acquire-and-Prep-Data-from-Codeup-Database-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Acquire and Prep Data from Codeup Database</a></span></li><li><span><a href="#Split-Data" data-toc-modified-id="Split-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Split Data</a></span></li><li><span><a href="#Scale-Data-with-Standard-Scaler-Function" data-toc-modified-id="Scale-Data-with-Standard-Scaler-Function-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Scale Data with Standard Scaler Function</a></span></li><li><span><a href="#Create-General-Scaler-Function" data-toc-modified-id="Create-General-Scaler-Function-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Create General Scaler Function</a></span></li><li><span><a href="#Put-the-Guts-into-a-Function" data-toc-modified-id="Put-the-Guts-into-a-Function-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Put the Guts into a Function</a></span></li><li><span><a href="#Use-Function-From-Module" data-toc-modified-id="Use-Function-From-Module-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Use Function From Module</a></span></li><li><span><a href="#.inverse_transform()" data-toc-modified-id=".inverse_transform()-7"><span class="toc-item-num">7&nbsp;&nbsp;</span><code>.inverse_transform()</code></a></span></li></ul></div>

In [65]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from math import sqrt
from scipy import stats

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import warnings
warnings.filterwarnings("ignore")

from split_scale import split_my_data, standard_scaler, gen_scaler
from wrangle import wrangle_telco
from wrangle_zillow import wrangle_zillow

## Acquire and Prep Data from Codeup Database

In [36]:
telco = wrangle_telco()
telco.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
0,0013-SMEOE,109.7,71,7904.25
1,0014-BMAQU,84.65,63,5377.8
2,0016-QLJIS,90.45,65,5957.9
3,0017-DINOC,45.2,54,2460.55
4,0017-IUDMW,116.8,72,8456.75


In [37]:
telco.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1695 entries, 0 to 1694
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   customer_id      1695 non-null   object 
 1   monthly_charges  1695 non-null   float64
 2   tenure           1695 non-null   int64  
 3   total_charges    1695 non-null   float64
dtypes: float64(2), int64(1), object(1)
memory usage: 53.1+ KB


In [38]:
zillow = wrangle_zillow()
zillow.head()

Unnamed: 0,bedrooms,bathrooms,square_feet,taxes,home_value,propertylandusedesc,fips_number,zip_code
0,4.0,2.0,1604,6089.82,498347.0,Single Family Residential,6037,96415.0
1,3.0,3.0,2384,6679.55,549917.0,Single Family Residential,6037,96452.0
2,3.0,2.0,1574,3876.31,235272.0,Single Family Residential,6037,97319.0
3,2.0,2.0,1619,4206.15,340000.0,Single Family Residential,6037,97329.0
4,2.0,3.0,2408,24353.42,2017254.0,Single Family Residential,6037,96086.0


## Split Data

In [39]:
telco_train, telco_validate, telco_test = split_my_data(telco)

In [40]:
print(f'train -> {telco_train.shape}')
print(f'validate -> {telco_validate.shape}')
print(f'test -> {telco_test.shape}')

train -> (949, 4)
validate -> (407, 4)
test -> (339, 4)


___

In [41]:
zillow_train, zillow_validate, zillow_test = split_my_data(zillow)

In [42]:
print(f'train -> {zillow_train.shape}')
print(f'validate -> {zillow_validate.shape}')
print(f'test -> {zillow_test.shape}')

train -> (8929, 8)
validate -> (3828, 8)
test -> (3190, 8)


## Scale Data with Standard Scaler Function

In [43]:
scaler, telco_train_scaled, telco_validate_scaled, telco_test_scaled = standard_scaler(['monthly_charges', 'tenure', 'total_charges'], telco_train, telco_validate, telco_test)

In [44]:
scaler

StandardScaler(copy=True, with_mean=True, with_std=True)

In [45]:
telco_train_scaled.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges,monthly_charges_scaled,tenure_scaled,total_charges_scaled
1256,7501-IWUNG,73.8,61,4616.05,0.361757,0.245462,0.341049
225,1303-SRDOK,69.05,55,3842.6,0.22511,-0.082012,0.043393
662,3967-VQOGC,24.9,67,1680.25,-1.044987,0.572936,-0.788773
628,3777-XROBG,19.55,58,1079.65,-1.198894,0.081725,-1.019909
824,5075-JSDKI,24.45,59,1493.1,-1.057932,0.136304,-0.860796


## Create General Scaler Function

In [46]:
zillow.head(1)

Unnamed: 0,bedrooms,bathrooms,square_feet,taxes,home_value,propertylandusedesc,fips_number,zip_code
0,4.0,2.0,1604,6089.82,498347.0,Single Family Residential,6037,96415.0


In [47]:
columns_to_scale = ['bedrooms', 'bathrooms', 'square_feet']

In [48]:
new_column_names = [c + '_scaled' for c in columns_to_scale]

In [49]:
scaler = MinMaxScaler().fit(zillow_train[columns_to_scale])

In [50]:
zillow_train_scaled = pd.concat([
                        zillow_train,
                        pd.DataFrame(scaler.transform(zillow_train[columns_to_scale]), 
                        columns=new_column_names, 
                        index=zillow_train.index)],
                        axis=1)

In [51]:
zillow_train_scaled.head(1)

Unnamed: 0,bedrooms,bathrooms,square_feet,taxes,home_value,propertylandusedesc,fips_number,zip_code,bedrooms_scaled,bathrooms_scaled,square_feet_scaled
11287,3.0,2.0,1283,6387.27,559186.0,Single Family Residential,6037,96206.0,0.083333,0.052632,0.068678


## Put the Guts into a Function

In [52]:
def gen_scaler(columns_to_scale, train, validate, test, scaler):
    """
    Takes in a a list of string names for columns, train, validate, and test dfs with numeric values only,
    and a scaler and returns scaler, train_scaled, validate_scaled, test_scaled dfs
    """
    new_column_names = [c + '_scaled' for c in columns_to_scale]
    
    scaler.fit(train[columns_to_scale])
    
    train_scaled = pd.concat([
                        train,
                        pd.DataFrame(scaler.transform(train[columns_to_scale]), 
                        columns=new_column_names, 
                        index=train.index)],
                        axis=1)
    
    validate_scaled = pd.concat([
                        validate,
                        pd.DataFrame(scaler.transform(validate[columns_to_scale]), 
                        columns=new_column_names, 
                        index=validate.index)],
                        axis=1)
    
    test_scaled = pd.concat([
                        test,
                        pd.DataFrame(scaler.transform(test[columns_to_scale]), 
                        columns=new_column_names, 
                        index=test.index)],
                        axis=1)
    
    return scaler, train_scaled, validate_scaled, test_scaled

In [53]:
scaler, zillow_train_scaled, zillow_validate_scaled, zillow_test_scaled = gen_scaler(['bedrooms', 'bathrooms', 'square_feet'],
                                                                                 zillow_train, zillow_validate, zillow_test,
                                                                                 scaler=MinMaxScaler())

In [54]:
zillow_train_scaled.head(1)

Unnamed: 0,bedrooms,bathrooms,square_feet,taxes,home_value,propertylandusedesc,fips_number,zip_code,bedrooms_scaled,bathrooms_scaled,square_feet_scaled
11287,3.0,2.0,1283,6387.27,559186.0,Single Family Residential,6037,96206.0,0.083333,0.052632,0.068678


In [55]:
scaler, zillow_train_scaled, zillow_validate_scaled, zillow_test_scaled = gen_scaler(['bedrooms'],
                                                                                 zillow_train, zillow_validate, zillow_test,
                                                                                 scaler=MinMaxScaler())

In [56]:
zillow_train_scaled.head(1)

Unnamed: 0,bedrooms,bathrooms,square_feet,taxes,home_value,propertylandusedesc,fips_number,zip_code,bedrooms_scaled
11287,3.0,2.0,1283,6387.27,559186.0,Single Family Residential,6037,96206.0,0.083333


___

## Use Function From Module

In [57]:
scaler, zillow_train_scaled, zillow_validate_scaled, zillow_test_scaled = gen_scaler(['bedrooms', 'bathrooms', 'square_feet'],
                                                                                 zillow_train, zillow_validate, zillow_test,
                                                                                 scaler=MinMaxScaler())

In [58]:
zillow_train_scaled.head(1)

Unnamed: 0,bedrooms,bathrooms,square_feet,taxes,home_value,propertylandusedesc,fips_number,zip_code,bedrooms_scaled,bathrooms_scaled,square_feet_scaled
11287,3.0,2.0,1283,6387.27,559186.0,Single Family Residential,6037,96206.0,0.083333,0.052632,0.068678


## `.inverse_transform()`

Apply the `.inverse_transform()` method to your scaled data. Is the resulting dataset the exact same as the original data?

**Yes, it looks exactly the same.**

In [60]:
columns_to_unscale = ['monthly_charges_scaled', 'tenure_scaled', 'total_charges_scaled']
new_column_names = ['unscaled_monthly_charges', 'unscaled_tenure', 'unscaled_total_charges']

In [61]:
telco_train_unscaled = pd.concat([
                        telco_train,
                        pd.DataFrame(scaler.inverse_transform(telco_train_scaled[columns_to_unscale]), 
                        columns=new_column_names, 
                        index=telco_train.index)],
                        axis=1)

In [62]:
telco_train_unscaled.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges,unscaled_monthly_charges,unscaled_tenure,unscaled_total_charges
1256,7501-IWUNG,73.8,61,4616.05,5392.556026,3761.865477,5102.19467
225,1303-SRDOK,69.05,55,3842.6,3476.493227,-829.97661,928.450623
662,3967-VQOGC,24.9,67,1680.25,-14332.80626,8353.707565,-10740.171061
628,3777-XROBG,19.55,58,1079.65,-16490.898044,1465.944434,-13981.170059
824,5075-JSDKI,24.45,59,1493.1,-14514.327999,2231.251448,-11750.082753
