### Scaling input features

In [22]:
#import python libraries/packages

# Pandas to handle dataframes
import pandas as pd    

# Import numpy 
import numpy as np

# HERE: Import sklearn for normalization function
import sklearn
from sklearn import preprocessing

#### Scale input features function: Min-max normalization to make features scaled from 0-1

In [16]:
def scale_input_feat(X):
    '''
    The function takes in X (our input features), and rescale based on min-max normalization
    it returns the normalized X
    '''
    #returns a numpy array for X (needed to use the min_max_scaler)
    X_arr = X.values 

    X_col_names = list(X.columns.values.tolist()) #get column names to then put back into X_norm

    #min-max normalization (rescaling) of input features
    min_max_scaler = preprocessing.MinMaxScaler()
    X_scaled = min_max_scaler.fit_transform(X_arr)
    X_norm= pd.DataFrame(X_scaled)

    #put back the original column names
    X_norm.columns = X_col_names
    
    return X_norm

#### EXAMPLE ON HOW TO USE FUNCTION

In [17]:
#CREATE SYNTHETIC DATA that represent Amino acids properties/counts

#param1 int from 0-100
#param2 float from 0-1
#param3 int from 100 to 10,000
param1= list(np.random.randint(low = 0,high=100,size=436))
param2 = list(np.random.random(436))
param3 = list(np.random.randint(low = 100,high=10000,size=436))

In [18]:
#BEFORE normalization
#create dataframe with different value ranges
df = pd.DataFrame(param1,columns=['Feature1'])
df['Feature2'] = param2
df['Feature3'] = param3
df

Unnamed: 0,Feature1,Feature2,Feature3
0,26,0.544135,3110
1,97,0.691809,3962
2,82,0.171922,1817
3,71,0.932274,3214
4,73,0.386189,3770
...,...,...,...
431,45,0.992094,3828
432,55,0.491925,1310
433,83,0.647584,6200
434,78,0.349900,8052


In [20]:
#assign input (X) /output (y) features
X= df[['Feature1','Feature2','Feature3']]

In [21]:
#AFTER normalization
#dataframe of our normalized input features
X = scale_input_feat(X)
X

Unnamed: 0,Feature1,Feature2,Feature3
0,0.262626,0.548218,0.302858
1,0.979798,0.697099,0.389215
2,0.828283,0.172962,0.171802
3,0.717172,0.939529,0.313400
4,0.737374,0.388981,0.369755
...,...,...,...
431,0.454545,0.999838,0.375633
432,0.555556,0.495581,0.120414
433,0.838384,0.652512,0.616055
434,0.787879,0.352395,0.803771
