## Function to combine values in rows

In [None]:
def combo(row):
    if pd.isna(row['Type 2']):
        return row['Type 1']
    else:
        return (row['Type 1'] + '-' + row['Type 2'])

## Code to snake column names

In [None]:
cols = []
for i in range(len(data.columns)):
    cols.append(data.columns[i].lower().replace(' ', '_'))
data.columns = cols

print(data.columns)

## Loop to plot multiple columns

In [None]:
for column in numerical.columns:
    sns.distplot(numerical[column])
    plt.show()

## Correlations and Heat map

In [None]:
correlations = model_data.corr()
correlations

In [None]:
fig, ax=plt.subplots(figsize=(10,8))
ax=sns.heatmap(correlations, annot=True)
plt.show()

## Train/test/split with shapes

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=13)
print('X_train shape is:',X_train.shape)
print('y_train shape is:', y_train.shape)
print('X_test shape is:', X_test.shape)
print('y_train shape is:', y_test.shape)

## Encodes columns with lists as values

In [None]:
#Special features need to be lists to use

X_train_cat = X_train_cat.drop('special_features',1).join(X_train_cat.special_features.str.join('|').str.get_dummies())
X_train_cat.isna().sum()

## Code for Ordinal Encoding

In [None]:
categorical["coverage"] = categorical["coverage"].map({"Basic" : 0, "Extended" :.5, "Premium" : 1})

## MinMaxScaler code

In [None]:
transformer = MinMaxScaler().fit(X_train_num)
X_train_norm = transformer.transform(X_train_num)
print(X_train_norm.shape)
X_train_num_scale = pd.DataFrame(X_train_norm, index = X_train_num.index, columns=X_train_num.columns)
X_train_num_scale.head()

## SMOTE for imbalanced data code

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
def over_sampling(training_x, training_y):

    smote = SMOTE(random_state = 100, k_neighbors = 3)
    X_train_scaled_SMOTE, y_train_SMOTE = smote.fit_resample(training_x, training_y)

    return X_train_scaled_SMOTE, y_train_SMOTE    

X_train_SMOTE, y_train_SMOTE = over_sampling(X_train, y_train)
X_test_SMOTE, y_test_SMOTE = over_sampling(X_test,y_test)

## One-Hot-Encoding 

In [None]:
# Create encoder to be used on new data later.

encoder = OneHotEncoder(drop='first').fit(X_cat)

cols = encoder.get_feature_names(input_features=X_cat.columns)

X_cat_encode = pd.DataFrame(encoder.transform(X_cat).toarray(),columns=cols)

X_cat_encode.head()


## Validation codes R2, MSE, MAE

In [None]:
print ('train R2: {} -- test R2: {}'.format(linreg.score(X_train, y_train),
                                            linreg.score(X_test, y_test)))

In [None]:
from sklearn.metrics import mean_squared_error as mse


train_mse=mse(linreg.predict(X_train), y_train)
test_mse=mse(linreg.predict(X_test), y_test)

print ('train MSE: {} -- test MSE: {}'.format(train_mse, test_mse))

In [None]:
print ('train RMSE: {} -- test RMSE: {}'.format(train_mse**.5, test_mse**.5))

In [None]:
from sklearn.metrics import mean_absolute_error as mae

train_mae=mae(linreg.predict(X_train), y_train)
test_mae=mae(linreg.predict(X_test), y_test)

print ('train MAE: {} -- test MAE: {}'.format(train_mse, test_mse))

## Function for metrics

In [None]:
def metrics(X, y):
    #Finds and prints the metrics of the algorithm
    predictions = lm.predict(X)
    r2 = r2_score(y, predictions)
    print('R2:', r2)
    mse = np.sqrt(mean_squared_error(y,predictions))
    print('MSE:', mse)
    rmse = math.sqrt(mse)
    print('RMSE:', rmse)
    mae = mean_absolute_error(y, predictions)
    print('MAE:', mae)

## Function to run multiple models to compare outcomes

In [None]:
# Define function to run all models
def models_automation(models, X_train, y_train):
    for model in models:
        model.fit(X_train, y_train)
        print(f"{model.__class__.__name__}: Train -> {model.score(X_train, y_train)}, Test -> {model.score(X_test, y_test)}")


In [None]:
model_list = [LinearRegression(),SGDRegressor(),KNeighborsRegressor(), MLPRegressor(),DecisionTreeRegressor(),RandomForestRegressor()]
models_automation(model_list, X_train, y_train)

## Function for time/date transformation

In [None]:
def date_encoding(table):
    #The input table needs to have a column 'effective_to_date'
    table['month'] = table['effective_to_date'].dt.month
    table['weekday'] = table['effective_to_date'].dt.day
    table = table.drop('effective_to_date',axis=1)
