In [1]:
import acquire

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.model_selection import train_test_split


np.random.seed(123)

### Acquiring Data

In [16]:
#aquire iris data
iris_df = acquire.get_iris_data()
iris_df.head()

Unnamed: 0,species_id,species_name,sepal_length,sepal_width,petal_length,petal_width
0,1,setosa,5.1,3.5,1.4,0.2
1,1,setosa,4.9,3.0,1.4,0.2
2,1,setosa,4.7,3.2,1.3,0.2
3,1,setosa,4.6,3.1,1.5,0.2
4,1,setosa,5.0,3.6,1.4,0.2


In [17]:
#rename so split works on renamed column
iris_df = iris_df.rename(columns={'species_name' : 'species'})

### Splitting Data

In [22]:
#split data function
def split_data(iris_df):
    '''
    Takes in a dataframe and return train, validate, test subset dataframes
    '''
    train, test = train_test_split(iris_df, test_size = .2, random_state=123, stratify=iris_df.species)
    train, validate = train_test_split(train, test_size=.3, random_state=123, stratify=train.species)
    return train, validate, test


### Prepare Iris Data

In [19]:
#prepare function to prep iris data
def prep_iris(iris_df):
    cols_to_drop = ['species_id']
    iris_df = iris_df.drop(columns=cols_to_drop)
    iris_df = iris_df.rename(columns={'species_name' : 'species'})
    dummy_df = pd.get_dummies(iris_df[['species']], dummy_na=False)
    iris_df = pd.concat([iris_df, dummy_df], axis=1)
    
    # split the data
    train, validate, test = split_data(iris_df)
    return iris_df

In [20]:
#bring in fresh iris data to test prep function
iris_df = acquire.get_iris_data()
iris_df.head()

Unnamed: 0,species_id,species_name,sepal_length,sepal_width,petal_length,petal_width
0,1,setosa,5.1,3.5,1.4,0.2
1,1,setosa,4.9,3.0,1.4,0.2
2,1,setosa,4.7,3.2,1.3,0.2
3,1,setosa,4.6,3.1,1.5,0.2
4,1,setosa,5.0,3.6,1.4,0.2


In [21]:
#test prep_iris function on fresh iris data
iris_df = prep_iris(iris_df)
iris_df.head()

Unnamed: 0,species,sepal_length,sepal_width,petal_length,petal_width,species_setosa,species_versicolor,species_virginica
0,setosa,5.1,3.5,1.4,0.2,1,0,0
1,setosa,4.9,3.0,1.4,0.2,1,0,0
2,setosa,4.7,3.2,1.3,0.2,1,0,0
3,setosa,4.6,3.1,1.5,0.2,1,0,0
4,setosa,5.0,3.6,1.4,0.2,1,0,0
