In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from math import sqrt
from scipy import stats

import warnings
warnings.filterwarnings("ignore")

from split_scale import split_my_data, standard_scaler, min_max_scaler, iqr_robust_scaler, uniform_scaler
from wrangle import wrangle_telco
from wrangle_zillow import wrangle_zillow

## Acquire and Prep Data from Codeup Database

In [2]:
telco = wrangle_telco()
telco.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges
0,0013-SMEOE,109.7,71,7904.25
1,0014-BMAQU,84.65,63,5377.8
2,0016-QLJIS,90.45,65,5957.9
3,0017-DINOC,45.2,54,2460.55
4,0017-IUDMW,116.8,72,8456.75


In [3]:
telco.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1695 entries, 0 to 1694
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   customer_id      1695 non-null   object 
 1   monthly_charges  1695 non-null   float64
 2   tenure           1695 non-null   int64  
 3   total_charges    1695 non-null   float64
dtypes: float64(2), int64(1), object(1)
memory usage: 53.1+ KB


In [4]:
zillow = wrangle_zillow()
zillow.head()

Unnamed: 0,bedrooms,bathrooms,square_feet,taxes,home_value,propertylandusedesc,fips_number,zip_code
0,4.0,2.0,1604,6089.82,498347.0,Single Family Residential,6037,96415.0
1,3.0,3.0,2384,6679.55,549917.0,Single Family Residential,6037,96452.0
2,3.0,2.0,1574,3876.31,235272.0,Single Family Residential,6037,97319.0
3,2.0,2.0,1619,4206.15,340000.0,Single Family Residential,6037,97329.0
4,2.0,3.0,2408,24353.42,2017254.0,Single Family Residential,6037,96086.0


## Split Data

In [5]:
telco_train, telco_validate, telco_test = split_my_data(telco)

In [6]:
print(f'train -> {telco_train.shape}')
print(f'validate -> {telco_validate.shape}')
print(f'test -> {telco_test.shape}')

train -> (949, 4)
validate -> (407, 4)
test -> (339, 4)


## Scale Data

In [7]:
scaler, telco_train_scaled, telco_validate_scaled, telco_test_scaled = standard_scaler(['monthly_charges', 'tenure', 'total_charges'], telco_train, telco_validate, telco_test)

In [8]:
scaler

StandardScaler(copy=True, with_mean=True, with_std=True)

In [9]:
telco_train_scaled.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges,monthly_charges_scaled,tenure_scaled,total_charges_scaled
1256,7501-IWUNG,73.8,61,4616.05,0.361757,0.245462,0.341049
225,1303-SRDOK,69.05,55,3842.6,0.22511,-0.082012,0.043393
662,3967-VQOGC,24.9,67,1680.25,-1.044987,0.572936,-0.788773
628,3777-XROBG,19.55,58,1079.65,-1.198894,0.081725,-1.019909
824,5075-JSDKI,24.45,59,1493.1,-1.057932,0.136304,-0.860796


## `.inverse_transform()`

Apply the `.inverse_transform()` method to your scaled data. Is the resulting dataset the exact same as the original data?

**Yes, it looks exactly the same.**

In [10]:
columns_to_unscale = ['monthly_charges_scaled', 'tenure_scaled', 'total_charges_scaled']
new_column_names = ['unscaled_monthly_charges', 'unscaled_tenure', 'unscaled_total_charges']

In [11]:
telco_train_unscaled = pd.concat([
                        telco_train,
                        pd.DataFrame(scaler.inverse_transform(telco_train_scaled[columns_to_unscale]), 
                        columns=new_column_names, 
                        index=telco_train.index)],
                        axis=1)

In [12]:
telco_train_unscaled.head()

Unnamed: 0,customer_id,monthly_charges,tenure,total_charges,unscaled_monthly_charges,unscaled_tenure,unscaled_total_charges
1256,7501-IWUNG,73.8,61,4616.05,73.8,61.0,4616.05
225,1303-SRDOK,69.05,55,3842.6,69.05,55.0,3842.6
662,3967-VQOGC,24.9,67,1680.25,24.9,67.0,1680.25
628,3777-XROBG,19.55,58,1079.65,19.55,58.0,1079.65
824,5075-JSDKI,24.45,59,1493.1,24.45,59.0,1493.1
