In [4]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import StratifiedShuffleSplit

df = pd.read_csv("main_database_for_models.csv")

In [13]:
def prep_data(data):
    
    # Stratified Train-dev split 80:20
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in split.split(data, data["target"]):
        data_train = data.loc[train_index]
        data_test = data.loc[test_index]
    
    X_train = data_train.drop('target',axis=1)
    y_train = data_train['target']
    X_test = data_test.drop('target',axis=1)
    y_test = data_test['target']
    
    # Normalise data
    
    X_train_norm = tf.keras.utils.normalize(X_train.values,axis=1)
    X_test_norm = tf.keras.utils.normalize(X_test.values,axis=1)
    
    return X_train_norm, y_train, X_test_norm, y_test

def build_model(layer_tuple_list, opt, loss, metrics):
    model = tf.keras.models.Sequential()
    for (val,act) in layer_tuple_list:
        if val == "dropout":
            model.add(tf.keras.layers.Dropout(act))
        else:
            model.add(tf.keras.layers.Dense(val, activation = act))
        
    model.compile(optimizer=opt,
                 loss=loss,
                 metrics=metrics)
    
    return model

def calculate_class_weights(y_train):
    counts = {}
    max_count_class = 0
    max_count = 0
    all_classes = set(y_train.unique())
    for c in all_classes:
        count = len([x for x in y_train.tolist() if x == c])
        if count > max_count:
            max_count = count
            max_count_class = c
        counts[c] = count
    class_weights = {} 
    for c in all_classes:
        class_weights[c] = max_count/counts.get(c)
    return class_weights

def fit_model(model, X_train, y_train, X_test, y_test, epochs, use_class_weights):
    if use_class_weights:
        class_weight = calculate_class_weights(y_train)
        model.fit(X_train, y_train.values,epochs = epochs, validation_data = (X_test,y_test), 
                  class_weight = class_weight)
    else:
        model.fit(X_train, y_train.values, epochs = epochs, validation_data = (X_test,y_test))
    return model
    
def generic_build_fit(df, layer_tuple_list, epochs = 2, opt = 'adam', loss = 'sparse_categorical_crossentropy', 
                      metrics = ['accuracy'], use_class_weights = False):
    
    print("Preparing Data...")
    X_train, y_train, X_test, y_test = prep_data(df)
    
    print("Building Model...")
    model = build_model(layer_tuple_list, opt, loss, metrics)
    
    print("Fitting Model...")
    model = fit_model(model, X_train, y_train, X_test, y_test, epochs, use_class_weights)
    return model

In [17]:
# layers = [(64,tf.nn.relu),(64,tf.nn.relu),("dropout",0.5),(3,tf.nn.softmax)]
# model = generic_build_fit(df, layers, epochs=50, use_class_weights=True)