# Scaling Numeric Data

- Create ```split_scale.py``` that will contain the functions that follow. 
- Each scaler function should create the object, fit and transform both train and test. 
- They should return the scaler, train dataframe scaled, test dataframe scaled. 
- Be sure your indices represent the original indices from train/test, as those represent the indices from the original dataframe. 
- Be sure to set a random state where applicable for reproducibility!

In [2]:
import pandas as pd 
import numpy as np
import sklearn.preprocessing
import pydataset

In [71]:
tips = pydataset.data('tips')
X = tips[['total_bill', 'size']]
y = tips[['tip']]
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,16.99,1.01,Female,No,Sun,Dinner,2
2,10.34,1.66,Male,No,Sun,Dinner,3
3,21.01,3.5,Male,No,Sun,Dinner,3
4,23.68,3.31,Male,No,Sun,Dinner,2
5,24.59,3.61,Female,No,Sun,Dinner,4


## 1. Split function

In [72]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)

In [77]:
X_test.head()

Unnamed: 0,total_bill,size
113,38.07,3
20,20.65,3
188,30.46,5
170,10.63,2
32,18.35,4


### 1.1 split_function

In [79]:
def split_my_data(X, y, train_pct):
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_pct, random_state=123)
    return X_train, X_test, y_train, y_test

In [87]:
X_train, X_test, y_train, y_test = split_my_data(X, y, 0.8)

In [95]:
X_train.head()

Unnamed: 0,total_bill,size
24,39.42,4
191,15.69,2
210,12.76,2
11,10.27,2
197,10.34,2


In [96]:
X_train.columns

Index(['total_bill', 'size'], dtype='object')

## 2. Scale Functions

- ### 2.1 Standard scale

In [99]:
from sklearn.preprocessing import StandardScaler

def standard_scaler(train, test):
    #grab dataframe column names
    columns = train.columns
    #make a copy of original
    train_scaled = train.copy()
    test_scaled = test.copy()
    #create a scaler
    scaler = StandardScaler()
    #fit scaler
    scaler.fit(train)
    #aplly the scaler to train and test dataset
    train_scaled[columns] = scaler.transform(train)
    test_scaled[columns] = scaler.transform(test)
    return train_scaled, test_scaled, scaler
    

In [105]:
train_scaled, test_scaled, scaler = standard_scaler(X_train, X_test)

In [108]:
train_scaled.head()

Unnamed: 0,total_bill,size
24,2.227511,1.512853
191,-0.440469,-0.57939
210,-0.769891,-0.57939
11,-1.049843,-0.57939
197,-1.041973,-0.57939


- ### 2.2 scale inversive

In [109]:
scaler.inverse_transform(train_scaled)

array([[39.42,  4.  ],
       [15.69,  2.  ],
       [12.76,  2.  ],
       [10.27,  2.  ],
       [10.34,  2.  ],
       [19.81,  2.  ],
       [13.37,  2.  ],
       [35.26,  4.  ],
       [34.83,  4.  ],
       [10.09,  2.  ],
       [23.33,  2.  ],
       [11.61,  2.  ],
       [25.56,  4.  ],
       [21.5 ,  4.  ],
       [34.3 ,  6.  ],
       [26.86,  2.  ],
       [27.05,  6.  ],
       [10.77,  2.  ],
       [12.74,  2.  ],
       [50.81,  3.  ],
       [13.81,  2.  ],
       [48.33,  4.  ],
       [12.54,  2.  ],
       [18.15,  3.  ],
       [32.83,  2.  ],
       [ 9.78,  2.  ],
       [13.27,  2.  ],
       [ 9.6 ,  2.  ],
       [17.46,  2.  ],
       [24.08,  4.  ],
       [18.24,  2.  ],
       [16.4 ,  2.  ],
       [26.59,  3.  ],
       [29.8 ,  6.  ],
       [20.53,  4.  ],
       [ 8.58,  1.  ],
       [23.17,  4.  ],
       [43.11,  4.  ],
       [17.26,  3.  ],
       [15.98,  3.  ],
       [11.87,  2.  ],
       [27.18,  2.  ],
       [15.04,  2.  ],
       [27.

In [110]:
pd.DataFrame(scaler.inverse_transform(train_scaled), columns=train_scaled.columns, index=train_scaled.index)

Unnamed: 0,total_bill,size
24,39.42,4.0
191,15.69,2.0
210,12.76,2.0
11,10.27,2.0
197,10.34,2.0
...,...,...
99,21.01,2.0
221,12.16,2.0
67,16.45,2.0
127,8.52,2.0


In [111]:
def scale_inverse(df_scaled, scaler):
    original = pd.DataFrame(scaler.inverse_transform(df_scaled), columns=df_scaled.columns, index=df_scaled.index)
    return original

In [112]:
scale_inverse(train_scaled, scaler)

Unnamed: 0,total_bill,size
24,39.42,4.0
191,15.69,2.0
210,12.76,2.0
11,10.27,2.0
197,10.34,2.0
...,...,...
99,21.01,2.0
221,12.16,2.0
67,16.45,2.0
127,8.52,2.0
