In [None]:
import env
import acquire as acq
import pandas as pd
import matplotlib as plt
import os

def prep_iris():
    '''
    function takes in data from aquire.get_titanic_data(),
    applies preparatory steps to the dataset, then splits
    the dataset into train, validate, and test groups.
    '''
    iris_df = acq.get_iris_data()
    iris_df.drop(['species_id'], axis=1, inplace=True)
    iris_df.rename(columns={'species_name' : 'species'}, inplace=True)
    dummy_df = pd.get_dummies(iris_df.species, dummy_na=False, drop_first=True)
    iris_df = pd.concat([iris_df, dummy_df], axis=1)
    return iris_df


def prep_titanic():
    '''
    function takes in data from aquire.get_titanic_data(),
    applies preparatory steps to the dataset, then splits
    the dataset into train, validate, and test groups'''
    titanic_df = acq.get_titanic_data()
    titanic_df.drop(columns=['passenger_id', 'class', 'embarked', 'deck'], inplace=True)
    dummy_df1 = pd.get_dummies(titanic_df[['embark_town', 'sex']], 
                           dummy_na=False, drop_first=True)
    return titanic_df



def split_data(df, stratify_target='target_col_name'):
    '''
    take in a DataFrame and return train, validate, and test DataFrames; 
    stratify on target column name. Return train, validate, test DataFrames.
    '''
    train_validate, test = train_test_split(df, test_size=.2, 
                                            random_state=9751, stratify=df[stratify_target])
    train, validate = train_test_split(train_validate, 
                                       test_size=.3, 
                                       random_state=9751, 
                                       stratify=train_validate[stratify_target])
    return train, validate, test

