In [27]:
import numpy as np
import pandas as pd
import csv
from sklearn.model_selection import train_test_split

In [28]:
def load_data(file_path):
    """
    Load data from ``file_path`` as a dataframe.
    
    :param file_path: Path of the data file.
    """
    return pd.read_csv(file_path, sep='\t', names=["label" ,"message"], quoting=csv.QUOTE_NONE)

def save_data(file_path, df):
    """
    Save ``df`` to the given ``file_path`` as a CSV file.
    
    :param file_path: Path of the file to be saved.
    :param df: Dataframe containing the data to be saved.
    """
    df.to_csv(file_path, index=False)

def preprocess(df):
    """
    Preprocess the data in ``df`` to convert string labels to numeric values.
    The label "ham" will be converted to 0, while the label "spam" will be
    converted to 1.
    
    :param df: Dataframe containing the data to be preprocessed.
    """
    df["label"].replace(["ham", "spam"], [0, 1], inplace=True)
    
def prepare_train_validation_test_split(df, train_percent=0.7, test_percent=0.15, seed=None):
    """
    Create a randomized 70-15-15 train/validation/test split.
    
    :param df: Dataframe containing the data to be split.
    :param train_percent: Percentage allotted to training data.
    :param validate_percent: Percentage allotted to validation data.
    :param seed: Seed for random number generator.
    
    :returns: A tuple (data_train, data_validate, data_test)
    """
    data, data_test = train_test_split(df, test_size=test_percent, random_state=seed)
    data_train, data_validate = train_test_split(data, train_size=(train_percent)/(1 - test_percent), random_state=seed)
    return (data_train, data_validate, data_test)