# DOGS: Data Transformation #

### Imports ###

In [10]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

### Data Preview ###
Dog data was extracted from the data exported from the Champ_DOG_Preprocessing notebook.

In [11]:
all_dog_data = pd.read_csv('dog_data.csv', index_col=0)

In [12]:
print("Total entries: ", len(all_dog_data))
all_dog_data.head()

Total entries:  52240


Unnamed: 0,AnimalID,PrimaryBreed,Gender,IntakeInternalStatus,ReproductiveStatusAtIntake,OutcomeType,OutcomeSubtype,IntakeMonth,IntakeYear,IntakeDotW,OutcomeMonth,OutcomeYear,OutcomeDotW,IntakeAge,TimeUntilOutcome,LiveOutcome,IntakeTypes,Color,Breed
0,A281756,PIT BULL TERRIER,MALE,NORMAL,FERTILE,EUTH,TIME/SPACE,9,2006,0,9,2006,1,365.0,1,0,"OWNERSUR,OTC",WHITE,PIT BULL
1,A256128,AMERICAN PIT BULL TERRIER,MALE,NORMAL,FERTILE,EUTH,MEDICAL,11,2005,5,12,2005,3,,12,0,"STRAY,FIELD",BROWN,PIT BULL
2,A316619,LABRADOR RETRIEVER,MALE,FEARFUL,FERTILE,EUTH,TIME/SPACE,6,2007,4,7,2007,2,,5,0,"STRAY,FIELD",WHITE,LABRADOR RETRIEVER
3,A319056,BEAGLE,MALE,NORMAL,ALTERED,EUTH,TIME/SPACE,7,2007,3,8,2007,1,,19,0,"STRAY,OTC",TRICOLOR,BEAGLE
4,A258842,PIT BULL TERRIER,MALE,NORMAL,FERTILE,EUTH,BREED,12,2005,2,12,2005,3,,8,0,"CONFISCATE,CRUELTY",WHITE,PIT BULL


### Dropping Exploratory Features ###

In [13]:
dog_data = all_dog_data.drop(['AnimalID', 'OutcomeType', 'OutcomeSubtype', 'OutcomeYear', 'OutcomeDotW', 'OutcomeMonth',
                              'TimeUntilOutcome', 'PrimaryBreed'], axis = 1)

### Preparing Dog Data for Transformation ###

#### Changing Integer Types to Object Types ####

In [14]:
dog_data = dog_data.astype({
        'IntakeMonth': 'object',
        'IntakeYear': 'object',
        'IntakeDotW': 'object',
        'LiveOutcome': 'object'})

#### Bucketing IntakeAge ####

In [15]:
dog_data['IntakeAge'] = pd.cut(dog_data['IntakeAge'], [0,90,180,270,360,720,1080,2160,3240])

### Transforming Dog Data with Dummies ###

In [16]:
dog_transf = pd.get_dummies(dog_data, drop_first = True)
dog_transf.head()

Unnamed: 0,Gender_LITTER,Gender_MALE,Gender_UNKNOWN,IntakeInternalStatus_AGG ANIMAL,IntakeInternalStatus_AGG BARRIE,IntakeInternalStatus_AGG FEAR,IntakeInternalStatus_AGG FOOD,IntakeInternalStatus_AGG PEOPLE,IntakeInternalStatus_AGGRESSIVE,IntakeInternalStatus_DEHYDRA,...,Breed_OTHER-LARGE,Breed_OTHER-MEDIUM,Breed_OTHER-SMALL,Breed_PIT BULL,Breed_POMERANIAN,Breed_POODLE - MINIATURE,Breed_PUG,Breed_ROTTWEILER,Breed_SHIH TZU,Breed_TERRIER-SMALL
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


### Dropping Highly Correlated Features ###

In [17]:
corr = dog_transf.corr()

In [18]:
high_val = 0.5

high_corrs = corr[(corr > high_val) & (corr < 1)].sum()
high_corrs[high_corrs > 0]

Gender_UNKNOWN                        0.968538
ReproductiveStatusAtIntake_UNKNOWN    0.968538
dtype: float64

In [19]:
dog_transf = dog_transf.drop('ReproductiveStatusAtIntake_UNKNOWN', axis = 1)

### Transformed Dog Data Preview ###

In [20]:
dog_transf.head(20)

Unnamed: 0,Gender_LITTER,Gender_MALE,Gender_UNKNOWN,IntakeInternalStatus_AGG ANIMAL,IntakeInternalStatus_AGG BARRIE,IntakeInternalStatus_AGG FEAR,IntakeInternalStatus_AGG FOOD,IntakeInternalStatus_AGG PEOPLE,IntakeInternalStatus_AGGRESSIVE,IntakeInternalStatus_DEHYDRA,...,Breed_OTHER-LARGE,Breed_OTHER-MEDIUM,Breed_OTHER-SMALL,Breed_PIT BULL,Breed_POMERANIAN,Breed_POODLE - MINIATURE,Breed_PUG,Breed_ROTTWEILER,Breed_SHIH TZU,Breed_TERRIER-SMALL
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
5,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


### Saving Transformed Dog Data ###

In [21]:
dog_transf.to_csv('dog_transformed_data.csv')