In [None]:
# 0
# modules we'll use
import pandas as pd
import numpy as np

# for Box-Cox Transformation
from scipy import stats

# for min_max scaling
from mlxtend.preprocessing import minmax_scaling

# plotting modules
import seaborn as sns
import matplotlib.pyplot as plt

# read in all our data
kickstarters_2017 = pd.read_csv("../input/kickstarter-projects/ks-projects-201801.csv")

# set seed for reproducibility
np.random.seed(0)

In [None]:
# 0.1
# select the usd_goal_real column
original_data = pd.DataFrame(kickstarters_2017.usd_goal_real)

# scale the goals from 0 to 1
scaled_data = minmax_scaling(original_data, columns=['usd_goal_real'])

# plot the original & scaled data together to compare
fig, ax=plt.subplots(1,2,figsize=(15,3))
sns.distplot(kickstarters_2017.usd_goal_real, ax=ax[0])
ax[0].set_title("Original Data")
sns.distplot(scaled_data, ax=ax[1])
ax[1].set_title("Scaled data")

In [None]:
# 0.2
print('Original data\nPreview:\n', original_data.head())
print('Minimum value:', float(original_data.min()),
      '\nMaximum value:', float(original_data.max()))
print('_'*30)

print('\nScaled data\nPreview:\n', scaled_data.head())
print('Minimum value:', float(scaled_data.min()),
      '\nMaximum value:', float(scaled_data.max()))

In [None]:
# 1

scaled_goal_data = minmax_scaling(original_goal_data, columns=['goal'])

In [None]:
# 2

# TODO: Your code here!
# get the index of all positive pledges (Box-Cox only takes positive values)
index_of_all_positive_pledges = kickstarters_2017.pledged > 0

# get only positive pledges (using their indexes)
all_positive_pledges = kickstarters_2017.pledged.loc[index_of_all_positive_pledges]

# normalize the pledges (w/ Box-Cox)
all_normalized_pledges = pd.Series(stats.boxcox(positive_pledges)[0], 
                               name='pledged', index=all_positive_pledges.index)

# plot both together to compare
fig, ax=plt.subplots(1,2,figsize=(15,3))
sns.distplot(all_positive_pledges, ax=ax[0])
ax[0].set_title("Original Data")
sns.distplot(all_normalized_pledges, ax=ax[1])
ax[1].set_title("Normalized data")