# CATS: Data Transformation #

### Imports ###

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

### Data Preview ###
Cat data was extracted from the data exported from the Champ_CAT_Preprocessing notebook.

In [2]:
all_cat_data = pd.read_csv('cat_data.csv', index_col=0)

In [3]:
print("Total entries: ", len(all_cat_data))
all_cat_data.head()

Total entries:  57350


Unnamed: 0,AnimalID,Gender,IntakeInternalStatus,ReproductiveStatusAtIntake,OutcomeType,OutcomeSubtype,IntakeMonth,IntakeYear,IntakeDotW,OutcomeMonth,OutcomeYear,OutcomeDotW,IntakeAge,TimeUntilOutcome,LiveOutcome,IntakeTypes,Color,Breed
0,A366370,MALE,FEARFUL,ALTERED,EUTH,FERAL,11,2008,4,11,2008,2,,5,0,"STRAY,OTC",WHITE,SHORT
1,A366531,UNKNOWN,NORMAL,UNKNOWN,EUTH,CONTAG DIS,11,2008,0,11,2008,2,,9,0,"STRAY,OTC",BLACK,SHORT
2,A314432,UNKNOWN,NORMAL,UNKNOWN,EUTH,TIME/SPACE,6,2007,4,6,2007,4,,0,0,"OWNERSUR,OTC",BROWN,SHORT
3,A317335,UNKNOWN,NORMAL,UNKNOWN,EUTH,TIME/SPACE,7,2007,3,7,2007,2,,6,0,"STRAY,OTC",GRAY,SHORT
4,A317486,FEMALE,NURSING,FERTILE,EUTH,TIME/SPACE,7,2007,4,7,2007,3,,6,0,"STRAY,OTC",GRAY,SHORT


### Dropping Exploratory Features ###

In [4]:
cat_data = all_cat_data.drop(['AnimalID', 'OutcomeType', 'OutcomeSubtype', 'OutcomeYear', 'OutcomeDotW', 'OutcomeMonth',
                              'TimeUntilOutcome'], axis = 1)

### Preparing Cat Data for Transformation ###

#### Changing Integer Types to Object Types ####

In [5]:
cat_data = cat_data.astype({
        'IntakeMonth': 'object',
        'IntakeYear': 'object',
        'IntakeDotW': 'object',
        'LiveOutcome': 'object'})

#### Bucketing IntakeAge ####

In [6]:
cat_data['IntakeAge'] = pd.cut(cat_data['IntakeAge'], [0,90,180,270,360,720,1080,2160,3240])

### Transforming Cat Data with Dummies ###

In [7]:
cat_transf = pd.get_dummies(cat_data, drop_first = True)
cat_transf.head()

Unnamed: 0,Gender_LITTER,Gender_MALE,Gender_UNKNOWN,IntakeInternalStatus_AGG ANIMAL,IntakeInternalStatus_AGG FEAR,IntakeInternalStatus_AGG PEOPLE,IntakeInternalStatus_AGGRESSIVE,IntakeInternalStatus_DEHYDRA,IntakeInternalStatus_DIARRHEA,IntakeInternalStatus_EMACIATED,...,Color_GRAY,Color_ORANGE,Color_OTHER,Color_TORTIE,Color_WHITE,Breed_LONG,Breed_MED,Breed_OTHER,Breed_SHORT,Breed_SIAMESE
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
1,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0


### Dropping Highly Correlated Features ###

In [8]:
corr = cat_transf.corr()

In [9]:
high_val = 0.5

high_corrs = corr[(corr > high_val) & (corr < 1)].sum()
high_corrs[high_corrs > 0]

Gender_UNKNOWN                        0.983583
ReproductiveStatusAtIntake_UNKNOWN    0.983583
dtype: float64

In [10]:
cat_transf = cat_transf.drop('ReproductiveStatusAtIntake_UNKNOWN', axis = 1)

### Transformed Cat Data Preview ###

In [11]:
cat_transf.head(20)

Unnamed: 0,Gender_LITTER,Gender_MALE,Gender_UNKNOWN,IntakeInternalStatus_AGG ANIMAL,IntakeInternalStatus_AGG FEAR,IntakeInternalStatus_AGG PEOPLE,IntakeInternalStatus_AGGRESSIVE,IntakeInternalStatus_DEHYDRA,IntakeInternalStatus_DIARRHEA,IntakeInternalStatus_EMACIATED,...,Color_GRAY,Color_ORANGE,Color_OTHER,Color_TORTIE,Color_WHITE,Breed_LONG,Breed_MED,Breed_OTHER,Breed_SHORT,Breed_SIAMESE
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
1,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,0,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
6,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
7,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
9,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


### Saving Transformed Cat Data ###

In [12]:
cat_transf.to_csv('cat_transformed_data.csv')