### 1. Apply the scalers we talked about in this lesson to your data and visualize the results in a way you find helpful.

In [None]:
import matplotlib.pyplot as plt
import sklearn.preprocessing
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import QuantileTransformer

from acquire import get_telco_data
from prepare import prepare_telco_all
from prepare import prepare_telco
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = get_telco_data()
df.head()

In [None]:
train, validate, test = prepare_telco(df)
print(f'train -> {train.shape}')
print(f'validate -> {validate.shape}')
print(f'test -> {test.shape}')
train.head().T

In [None]:
df.plot.scatter(y ='monthly_charges', x ='tenure')

In [None]:
sns.relplot(x='tenure', y='monthly_charges', hue='contract_type', data=df)

In [None]:
print(df.tenure.describe())

In [None]:
train.tenure.plot.hist()

In [None]:
# 1. create the object
scaler = sklearn.preprocessing.MinMaxScaler()
# 2. fit the object
scaler.fit(train[['tenure']])
# 3. use the object
train['tenure_scaled'] = scaler.transform(train[['tenure']])
test['tenure_scaled'] = scaler.transform(test[['tenure']])
validate['tenure_scaled'] = scaler.transform(validate[['tenure']])

In [None]:
plt.figure(figsize=(13, 6))
plt.subplot(121)
train.tenure.plot.hist(title='Original')
plt.subplot(122)
train.tenure_scaled.plot.hist(title='Min-Max Scaled')

### MinMaxScaler()

In [None]:
# 1. create the object
scaler = sklearn.preprocessing.MinMaxScaler()
# 2. fit the object
scaler.fit(train[['monthly_charges']])
# 3. use the object
train['monthly_charges_scaled'] = scaler.transform(train[['monthly_charges']])
test['monthly_charges_scaled'] = scaler.transform(test[['monthly_charges']])
validate['monthly_charges_scaled'] = scaler.transform(validate[['monthly_charges']])

In [None]:
plt.figure(figsize=(13, 6))
plt.subplot(121)
train.monthly_charges.plot.hist(title='Original')
plt.subplot(122)
train.monthly_charges_scaled.plot.hist(title='Min-Max Scaled')

In [None]:
train.T

### StandardScaler()

In [None]:
# 1. create the object
scaler = sklearn.preprocessing.StandardScaler()
# 2. fit the object
scaler.fit(train[['monthly_charges']])
# 3. use the object
train['monthly_charges_scaled'] = scaler.transform(train[['monthly_charges']])
test['monthly_charges_scaled'] = scaler.transform(test[['monthly_charges']])
validate['monthly_charges_scaled'] = scaler.transform(validate[['monthly_charges']])

In [None]:
plt.figure(figsize=(13, 6))
plt.subplot(121)
train.monthly_charges.plot.hist(title='Original')
plt.subplot(122)
train.monthly_charges_scaled.plot.hist(title='Standard Scaled')

In [None]:
# Function created to run scaler.fit_transform. Way to show visual
def visualize_scaler(scaler, scaler_name):
    monthly_charges_scaled = scaler.fit_transform(train[['monthly_charges']])
    fig = plt.figure(figsize=(12, 10))
    gs = plt.GridSpec(2, 2) # subplot with a 2x2 grid
    ax1 = fig.add_subplot(gs[0, :]) # the first row, all the columns
    ax2 = fig.add_subplot(gs[1, 0]) # second row, first column
    ax3 = fig.add_subplot(gs[1, 1]) # second row, second column
    
    ax1.scatter(train.monthly_charges, monthly_charges_scaled)
    ax1.set(xlabel='monthly_charges', ylabel='monthly_charges_scaled', title=scaler_name)
    ax2.hist(train.monthly_charges)
    ax2.set(title='Original')
    ax3.hist(monthly_charges_scaled)
    ax3.set(title='Scaled')

In [None]:
visualize_scaler(sklearn.preprocessing.MinMaxScaler(), 'Min-Max Scaling')


In [None]:
visualize_scaler(sklearn.preprocessing.StandardScaler(), 'Min-Max Scaling')



In [None]:
#train = train[["monthly_charges", 'tenure_scaled', 'monthly_charges_scaled']]
#validate = validate[["monthly_charges", 'tenure_scaled', 'monthly_charges_scaled']]
#test = test[["monthly_charges", 'tenure_scaled', 'monthly_charges_scaled']]

In [None]:
train.head().T

### 2. Apply the .inverse_transform method to your scaled data. Is the resulting dataset the exact same as the original data?

In [None]:
# 1. create the object
scaler = sklearn.preprocessing.MinMaxScaler()
# 2. fit the object
scaler.fit(train[["monthly_charges", 'total_charges']])
# 3. use the object
train_scaled = scaler.inverse_transform(train[['monthly_charges','total_charges']])
# Turning np array into df
train_scaled = pd.DataFrame(train_scaled, columns=['monthly_charges_scaled','total_charges_scaled'])
train_scaled

scaler.inverse_transform(train_scaled[['monthly_charges_scaled', 'total_charges_scaled']])

### The data is the same. Have to inverse transform on the scaled data.

### 3. Read the documentation for sklearn's QuantileTransformer. Use normal for the output_distribution and apply this scaler to your data. Visualize the result of your data scaling.


In [None]:
train = train[[ "monthly_charges"]]
train

In [None]:
qt = QuantileTransformer(n_quantiles=10, random_state=0).fit(train)
qt_transform = qt.transform(train)

In [None]:
train.head()

In [None]:
qt_transform.T

In [None]:
df= pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index([train.index.values])

In [None]:
df = df.rename(columns={"monthly_charges": "monthly_charges_scaled"})

In [None]:
df

In [None]:
train = pd.concat([train,df], axis=1)
train

In [None]:
plt.figure(figsize=(13, 6))
plt.subplot(121)
train.monthly_charges_scaled.plot.hist(title='Original')
plt.subplot(122)
train.monthly_charges.plot.hist(title='Quantile Transformer Scaled')

### Easier way of doing it

In [None]:
scaler = sklearn.preprocessing.QuantileTransformer(output_distribution='normal')
# fit the scalar
scaler.fit(train[['monthly_charges']])
# use the scalar 
train['monthly_charges_scaled2'] = scaler.transform(train[['monthly_charges']])
test['monthly_charges_scaled2'] = scaler.transform(test[['monthly_charges']])
validate['monthly_charges_scaled2'] = scaler.transform(validate[['monthly_charges']])

In [None]:
train

In [None]:
!git status

In [None]:
#!git add scaling.ipynb

In [None]:
#!git commit -m "adding first 3 problems"

In [None]:
#!git push