# Cleaning/Exploring Animal Outcome Data

### Imports

In [3]:
import pandas as pd
import re
import numpy as np
from collections import Counter
import data_cleaning as dc
from sqlalchemy import create_engine

#### Load /explore file 

In [4]:
outcomes_path = '/Users/murdock/Downloads/Austin_Animal_Center_Outcomes.csv'

animal_outcomes_df = pd.read_csv(outcomes_path)

In [49]:
animal_outcomes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A741715,*Pebbles,01/11/2017 06:17:00 PM,01/11/2017 06:17:00 PM,03/07/2016,Adoption,,Cat,Spayed Female,10 months,Domestic Shorthair Mix,Calico
1,A658751,Benji,11/13/2016 01:38:00 PM,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,5 years,Border Terrier Mix,Tan
2,A721285,,02/24/2016 02:42:00 PM,02/24/2016 02:42:00 PM,02/24/2014,Euthanasia,Suffering,Other,Unknown,2 years,Raccoon Mix,Black/Gray
3,A746650,Rose,04/07/2017 11:58:00 AM,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,1 year,Labrador Retriever/Jack Russell Terrier,Yellow
4,A750122,Happy Camper,05/24/2017 06:36:00 PM,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1 month,Labrador Retriever Mix,Black


In [51]:
animal_outcomes_df[animal_outcomes_df['Animal ID'] == 'A677918']

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
688,A677918,Buddy,02/04/2015 05:20:00 PM,02/04/2015 05:20:00 PM,03/26/2008,Return to Owner,,Dog,Neutered Male,6 years,Pit Bull Mix,White/Chocolate
1346,A677918,Buddy,03/29/2015 04:56:00 PM,03/29/2015 04:56:00 PM,03/26/2008,Euthanasia,Aggressive,Dog,Neutered Male,7 years,Pit Bull Mix,White/Chocolate
55338,A677918,Buddy,05/01/2014 04:56:00 PM,05/01/2014 04:56:00 PM,03/26/2008,Return to Owner,,Dog,Neutered Male,6 years,Pit Bull Mix,White/Chocolate


In [3]:
animal_outcomes_df = animal_outcomes_df[animal_outcomes_df['Animal Type'] == 'Dog']

In [4]:
animal_outcomes_df['DateTime'] = pd.to_datetime(animal_outcomes_df['DateTime'], format='%m/%d/%Y %H:%M:%S %p')

In [5]:
animal_outcomes_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41387 entries, 1 to 73537
Data columns (total 12 columns):
Animal ID           41387 non-null object
Name                34842 non-null object
DateTime            41387 non-null datetime64[ns]
MonthYear           41387 non-null object
Date of Birth       41387 non-null object
Outcome Type        41386 non-null object
Outcome Subtype     12842 non-null object
Animal Type         41387 non-null object
Sex upon Outcome    41385 non-null object
Age upon Outcome    41386 non-null object
Breed               41387 non-null object
Color               41387 non-null object
dtypes: datetime64[ns](1), object(11)
memory usage: 4.1+ MB


In [6]:
animal_outcomes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
1,A658751,Benji,2016-11-13 01:38:00,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,5 years,Border Terrier Mix,Tan
3,A746650,Rose,2017-04-07 11:58:00,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,1 year,Labrador Retriever/Jack Russell Terrier,Yellow
4,A750122,Happy Camper,2017-05-24 06:36:00,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1 month,Labrador Retriever Mix,Black
7,A666430,Lucy,2013-11-07 11:47:00,11/07/2013 11:47:00 AM,11/06/2012,Transfer,Partner,Dog,Spayed Female,1 year,Beagle Mix,White/Brown
8,A675708,*Johnny,2014-06-03 02:20:00,06/03/2014 02:20:00 PM,03/31/2013,Adoption,,Dog,Neutered Male,1 year,Pit Bull,Blue/White


### Convert column names

In [7]:
animal_outcomes_df.columns = ['animalid', 'name', 'datetime', 'monthyear', 'dateofbirth', 'outcometype', 'outcomesubtype', 'animaltype', 'sexuponoutcome', 'ageuponoutcome', 'breed', 'color']

In [8]:
animal_outcomes_df.head()

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
1,A658751,Benji,2016-11-13 01:38:00,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,5 years,Border Terrier Mix,Tan
3,A746650,Rose,2017-04-07 11:58:00,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,1 year,Labrador Retriever/Jack Russell Terrier,Yellow
4,A750122,Happy Camper,2017-05-24 06:36:00,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1 month,Labrador Retriever Mix,Black
7,A666430,Lucy,2013-11-07 11:47:00,11/07/2013 11:47:00 AM,11/06/2012,Transfer,Partner,Dog,Spayed Female,1 year,Beagle Mix,White/Brown
8,A675708,*Johnny,2014-06-03 02:20:00,06/03/2014 02:20:00 PM,03/31/2013,Adoption,,Dog,Neutered Male,1 year,Pit Bull,Blue/White


### Remove any rows that have NaN values in important columns

In [9]:
animal_outcomes_df = animal_outcomes_df.dropna(subset=['outcometype'])
animal_outcomes_df = animal_outcomes_df.dropna(subset=['sexuponoutcome'])
animal_outcomes_df = animal_outcomes_df.dropna(subset=['ageuponoutcome'])
animal_outcomes_df = animal_outcomes_df.dropna(subset=['breed'])
animal_outcomes_df = animal_outcomes_df.dropna(subset=['color'])
animal_outcomes_df = animal_outcomes_df.dropna(subset=['datetime'])
animal_outcomes_df = animal_outcomes_df.reset_index(drop=True)

In [10]:
animal_outcomes_df.tail() # 41386, 9336 if drop all na, 41385 w/ outcome type

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
41380,A759829,Drew,2017-10-08 09:36:00,10/08/2017 09:36:00 AM,10/08/2010,Return to Owner,,Dog,Neutered Male,7 years,Rat Terrier Mix,White/Black
41381,A757837,402,2017-10-09 07:37:00,10/09/2017 07:37:00 PM,07/06/2017,Transfer,Partner,Dog,Unknown,3 months,Labrador Retriever Mix,Blue Merle
41382,A757645,141,2017-10-09 07:35:00,10/09/2017 07:35:00 PM,09/04/2016,Transfer,Partner,Dog,Intact Male,1 year,Labrador Retriever Mix,Black/Brown
41383,A757835,400,2017-10-09 07:37:00,10/09/2017 07:37:00 PM,07/06/2017,Transfer,Partner,Dog,Unknown,3 months,Labrador Retriever Mix,Black
41384,A665334,Mcruber,2017-10-08 01:42:00,10/08/2017 01:42:00 PM,10/16/2011,Return to Owner,,Dog,Neutered Male,5 years,Pit Bull Mix,Blue/White


### Convert ages to # months

In [11]:
animal_outcomes_df['ageuponoutcome'] = animal_outcomes_df['ageuponoutcome'].apply(dc.convert_ages)

### Group colors to get less unique options

In [12]:
animal_outcomes_df['color'] = animal_outcomes_df['color'].apply(dc.group_colors)

In [13]:
count = Counter(animal_outcomes_df['color'].values)

In [14]:
count

Counter({'Agouti': 4,
         'Apricot': 50,
         'Black': 2365,
         'Blue': 241,
         'Blue Merle': 3,
         'Brindle': 2797,
         'Brown': 1701,
         'Brown Brindle': 1,
         'Buff': 354,
         'Chocolate': 376,
         'Cream': 369,
         'Fawn': 172,
         'Gold': 119,
         'Gray': 144,
         'Liver': 2,
         'Merle': 715,
         'Orange': 1,
         'Red': 855,
         'Sable': 432,
         'Silver': 29,
         'Tan': 1839,
         'Tick': 284,
         'Tricolor': 1861,
         'Twocolor': 23981,
         'White': 2203,
         'Yellow': 487})

In [15]:
animal_outcomes_df.head()

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
0,A658751,Benji,2016-11-13 01:38:00,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,60.0,Border Terrier Mix,Tan
1,A746650,Rose,2017-04-07 11:58:00,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,12.0,Labrador Retriever/Jack Russell Terrier,Yellow
2,A750122,Happy Camper,2017-05-24 06:36:00,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1.0,Labrador Retriever Mix,Black
3,A666430,Lucy,2013-11-07 11:47:00,11/07/2013 11:47:00 AM,11/06/2012,Transfer,Partner,Dog,Spayed Female,12.0,Beagle Mix,Twocolor
4,A675708,*Johnny,2014-06-03 02:20:00,06/03/2014 02:20:00 PM,03/31/2013,Adoption,,Dog,Neutered Male,12.0,Pit Bull,Twocolor


### Start looking at breeds

In [16]:
len(animal_outcomes_df['breed'].unique())

# A lot of unique breeds!!

1848

#### Make sure mixed breeds are not just repetitions of other entries but backwards (hound/shepherd vs shepherd/hound)

In [17]:
dog_mixes = animal_outcomes_df[animal_outcomes_df['breed'].str.contains('/')]
mixes = [sorted(x.split('/')) for x in dog_mixes['breed'].values]
def fix_breeds(breed):
    """
    Makes sure breed names are unique and not in a reversed order.
    
    INPUT:
    breed - an entry from the dataframe's breed column
    OUTPUT:
    a breed entry
    """
    breed_split = sorted(breed.split('/'))
    if breed_split in mixes:
        return '/'.join(breed_split)
    else:
        return breed


In [18]:
animal_outcomes_df['breed'] = animal_outcomes_df['breed'].apply(fix_breeds)

### Now lets pickle and make dataframes with different breed combinations for modeling

In [19]:
path = '/Users/murdock/Documents/metis/project3/outcome_data.pkl'
animal_outcomes_df.to_pickle(path)

### What about doing pit bulls vs not pit bull?

In [20]:
pits = []
for breed in animal_outcomes_df['breed']:
    if 'bull' in breed.lower():
        pits.append(breed)
len(pits), len(animal_outcomes_df['breed'])

# lot of pit bulls!!

(7635, 41385)

In [21]:
pitbull_df = animal_outcomes_df.copy()
pitbull_df.head()

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
0,A658751,Benji,2016-11-13 01:38:00,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,60.0,Border Terrier Mix,Tan
1,A746650,Rose,2017-04-07 11:58:00,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,12.0,Jack Russell Terrier/Labrador Retriever,Yellow
2,A750122,Happy Camper,2017-05-24 06:36:00,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1.0,Labrador Retriever Mix,Black
3,A666430,Lucy,2013-11-07 11:47:00,11/07/2013 11:47:00 AM,11/06/2012,Transfer,Partner,Dog,Spayed Female,12.0,Beagle Mix,Twocolor
4,A675708,*Johnny,2014-06-03 02:20:00,06/03/2014 02:20:00 PM,03/31/2013,Adoption,,Dog,Neutered Male,12.0,Pit Bull,Twocolor


#### Apply function to rename breeds as pitbull or not

In [22]:
pitbull_df['breed'] = pitbull_df['breed'].apply(dc.pit_bull_separation)

In [23]:
pitbull_df.head()

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
0,A658751,Benji,2016-11-13 01:38:00,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,60.0,Not Pit Bull,Tan
1,A746650,Rose,2017-04-07 11:58:00,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,12.0,Not Pit Bull,Yellow
2,A750122,Happy Camper,2017-05-24 06:36:00,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1.0,Not Pit Bull,Black
3,A666430,Lucy,2013-11-07 11:47:00,11/07/2013 11:47:00 AM,11/06/2012,Transfer,Partner,Dog,Spayed Female,12.0,Not Pit Bull,Twocolor
4,A675708,*Johnny,2014-06-03 02:20:00,06/03/2014 02:20:00 PM,03/31/2013,Adoption,,Dog,Neutered Male,12.0,Pit Bull,Twocolor


In [24]:
pitbull_path = '/Users/murdock/Documents/metis/project3/outcome_pitbull.pkl'
pitbull_df.to_pickle(pitbull_path)

### What if we do purebreds vs mixed breeds?

In [25]:
pure_vs_mix_df = animal_outcomes_df.copy()

#### Apply function to relabel breeds

In [26]:
pure_vs_mix_df['breed'] = pure_vs_mix_df['breed'].apply(dc.pure_vs_mix)

In [27]:
pure_vs_mix_df.head()

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
0,A658751,Benji,2016-11-13 01:38:00,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,60.0,Mixed Breed,Tan
1,A746650,Rose,2017-04-07 11:58:00,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,12.0,Mixed Breed,Yellow
2,A750122,Happy Camper,2017-05-24 06:36:00,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1.0,Mixed Breed,Black
3,A666430,Lucy,2013-11-07 11:47:00,11/07/2013 11:47:00 AM,11/06/2012,Transfer,Partner,Dog,Spayed Female,12.0,Mixed Breed,Twocolor
4,A675708,*Johnny,2014-06-03 02:20:00,06/03/2014 02:20:00 PM,03/31/2013,Adoption,,Dog,Neutered Male,12.0,Purebred,Twocolor


In [28]:
pure_vs_mix_path = '/Users/murdock/Documents/metis/project3/outcome_pure_vs_mix.pkl'
pure_vs_mix_df.to_pickle(pure_vs_mix_path)

### What if we separate out the mixed and purebreds that occur less than 50 times?

In [29]:
low_occurrences_df = animal_outcomes_df.copy()

In [30]:
breed_mixtures = animal_outcomes_df[animal_outcomes_df['breed'].str.contains('/')].groupby('breed').count().sort_values('animalid', ascending=False)
breed_mixtures.head(10)

Unnamed: 0_level_0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,color
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Chihuahua Shorthair/Dachshund,408,319,408,408,408,408,124,408,408,408,408
Labrador Retriever/Pit Bull,295,258,295,295,295,295,99,295,295,295,295
German Shepherd/Labrador Retriever,285,233,285,285,285,285,80,285,285,285,285
Border Collie/Labrador Retriever,148,123,148,148,148,148,49,148,148,148,148
Australian Cattle Dog/Labrador Retriever,121,104,121,121,121,121,33,121,121,121,121
Chihuahua Shorthair/Rat Terrier,106,90,106,106,106,106,32,106,106,106,106
Miniature Poodle/Miniature Schnauzer,102,82,102,102,102,102,33,102,102,102,102
Chihuahua Shorthair/Jack Russell Terrier,96,78,96,96,96,96,35,96,96,96,96
Boxer/Pit Bull,91,84,91,91,91,91,27,91,91,91,91
Boxer/Labrador Retriever,85,75,85,85,85,85,20,85,85,85,85


In [31]:
breed_counts = animal_outcomes_df.groupby('breed').count().sort_values('animalid', ascending=False)
breed_counts.tail()

Unnamed: 0_level_0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,color
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Golden Retriever/Miniature Poodle,1,1,1,1,1,1,0,1,1,1,1
Golden Retriever/Nova Scotia Duck Tolling Retriever,1,1,1,1,1,1,1,1,1,1,1
Bouv Flandres/Otterhound,1,1,1,1,1,1,0,1,1,1,1
Golden Retriever/Pit Bull,1,1,1,1,1,1,0,1,1,1,1
Yorkshire Terrier/Yorkshire Terrier,1,1,1,1,1,1,0,1,1,1,1


In [32]:
low_quantity_counts = breed_counts[breed_counts['animalid'] < 50]
low_quantity = list(low_quantity_counts.index)
def merge_low_quantities(breed):
    """
    Relabels breeds identified as occurring in low quantities
    
    INPUT:
    breed - an entry from the dataframe's breed column
    OUTPUT:
    a breed entry depending on the occurrence of the breed
    """
    if breed in low_quantity:
        if '/' in breed:
            return 'Other Mixed Breed'
        elif 'Mix' in breed:
            return 'Other Mixed Breed'
        else:
            return 'Other Purebred'
    else:
        return breed

In [33]:
low_occurrences_df['breed'] = low_occurrences_df['breed'].apply(merge_low_quantities)

In [34]:
low_occurrences_df.head()

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
0,A658751,Benji,2016-11-13 01:38:00,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,60.0,Border Terrier Mix,Tan
1,A746650,Rose,2017-04-07 11:58:00,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,12.0,Other Mixed Breed,Yellow
2,A750122,Happy Camper,2017-05-24 06:36:00,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1.0,Labrador Retriever Mix,Black
3,A666430,Lucy,2013-11-07 11:47:00,11/07/2013 11:47:00 AM,11/06/2012,Transfer,Partner,Dog,Spayed Female,12.0,Beagle Mix,Twocolor
4,A675708,*Johnny,2014-06-03 02:20:00,06/03/2014 02:20:00 PM,03/31/2013,Adoption,,Dog,Neutered Male,12.0,Pit Bull,Twocolor


In [35]:
low_occurrences_df.groupby('breed').count().sort_values('animalid', ascending=False)

Unnamed: 0_level_0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,color
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Other Mixed Breed,6454,5493,6454,6454,6454,6454,1854,6454,6454,6454,6454
Pit Bull Mix,5704,5141,5704,5704,5704,5704,1850,5704,5704,5704,5704
Chihuahua Shorthair Mix,4449,3555,4449,4449,4449,4449,1571,4449,4449,4449,4449
Labrador Retriever Mix,4252,3437,4252,4252,4252,4252,1349,4252,4252,4252,4252
German Shepherd Mix,1761,1502,1761,1761,1761,1761,483,1761,1761,1761,1761
Other Purebred,1322,1168,1322,1322,1322,1322,427,1322,1322,1322,1322
Australian Cattle Dog Mix,988,818,988,988,988,988,311,988,988,988,988
Dachshund Mix,765,582,765,765,765,765,246,765,765,765,765
Boxer Mix,641,557,641,641,641,641,197,641,641,641,641
Miniature Poodle Mix,611,480,611,611,611,611,157,611,611,611,611


### PICKLE DATAFRAME

In [36]:
low_occurrences_path = '/Users/murdock/Documents/metis/project3/outcome_low_occurrences.pkl'
low_occurrences_df.to_pickle(low_occurrences_path)

In [44]:
animal_outcomes_df[animal_outcomes_df['animalid'] == 'A701901']

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
444,A701901,Sasha,2015-12-12 04:14:00,12/12/2015 04:14:00 PM,05/06/2014,Return to Owner,,Dog,Spayed Female,12.0,Pit Bull Mix,Blue
744,A701901,Sasha,2016-04-02 04:45:00,04/02/2016 04:45:00 PM,05/06/2014,Transfer,Partner,Dog,Spayed Female,12.0,Pit Bull Mix,Blue
1128,A701901,Sasha,2015-05-20 06:58:00,05/20/2015 06:58:00 PM,05/06/2014,Return to Owner,,Dog,Spayed Female,12.0,Pit Bull Mix,Blue
1677,A701901,Sasha,2015-09-16 12:20:00,09/16/2015 12:20:00 PM,05/06/2014,Missing,Possible Theft,Dog,Spayed Female,12.0,Pit Bull Mix,Blue
12087,A701901,Sasha,2017-05-14 12:25:00,05/14/2017 12:25:00 PM,05/06/2014,Return to Owner,,Dog,Spayed Female,36.0,Pit Bull Mix,Blue
29462,A701901,Sasha,2015-05-15 07:09:00,05/15/2015 07:09:00 PM,05/06/2014,Return to Owner,,Dog,Spayed Female,12.0,Pit Bull Mix,Blue
31402,A701901,Sasha,2015-11-30 05:12:00,11/30/2015 05:12:00 PM,05/06/2014,Return to Owner,,Dog,Spayed Female,12.0,Pit Bull Mix,Blue


### Upload dataframes to psql

In [38]:
engine_name = 'postgresql:/credentials/project3'
cnx = create_engine(engine_name)

In [39]:
animal_outcomes_df.to_sql("animal_outcomes", cnx)
low_occurrences_df.to_sql("lowoccur_outcomes", cnx)
pure_vs_mix_df.to_sql("purevsmix_outcomes", cnx)
pitbull_df.to_sql("pitbull_outcomes", cnx)

In [40]:
animal_outcomes_df['outcometype'].unique()

array(['Return to Owner', 'Transfer', 'Adoption', 'Euthanasia', 'Disposal',
       'Rto-Adopt', 'Died', 'Missing'], dtype=object)

In [47]:
animal_outcomes_df[animal_outcomes_df['animalid'].duplicated()]

Unnamed: 0,animalid,name,datetime,monthyear,dateofbirth,outcometype,outcomesubtype,animaltype,sexuponoutcome,ageuponoutcome,breed,color
744,A701901,Sasha,2016-04-02 04:45:00,04/02/2016 04:45:00 PM,05/06/2014,Transfer,Partner,Dog,Spayed Female,12.0,Pit Bull Mix,Blue
780,A677918,Buddy,2015-03-29 04:56:00,03/29/2015 04:56:00 PM,03/26/2008,Euthanasia,Aggressive,Dog,Neutered Male,84.0,Pit Bull Mix,Twocolor
986,A622128,Ginger,2014-01-19 02:43:00,01/19/2014 02:43:00 PM,02/09/2012,Adoption,,Dog,Spayed Female,12.0,Boxer Mix,Brindle
1016,A686467,Duke,2014-11-22 07:37:00,11/22/2014 07:37:00 PM,08/22/2010,Adoption,,Dog,Neutered Male,48.0,Catahoula Mix,Twocolor
1128,A701901,Sasha,2015-05-20 06:58:00,05/20/2015 06:58:00 PM,05/06/2014,Return to Owner,,Dog,Spayed Female,12.0,Pit Bull Mix,Blue
1158,A710383,Pepper,2016-07-03 03:40:00,07/03/2016 03:40:00 PM,08/22/2013,Adoption,,Dog,Spayed Female,24.0,Rat Terrier Mix,Twocolor
1173,A676817,Flecha,2014-11-18 05:51:00,11/18/2014 05:51:00 PM,04/15/2012,Return to Owner,,Dog,Neutered Male,24.0,Pit Bull Mix,Twocolor
1216,A720298,Bully,2017-01-09 01:16:00,01/09/2017 01:16:00 PM,11/06/2015,Transfer,Partner,Dog,Neutered Male,12.0,Pit Bull Mix,Twocolor
1243,A696932,Coach,2015-03-08 05:33:00,03/08/2015 05:33:00 PM,07/14/2014,Adoption,,Dog,Spayed Female,7.0,Border Collie Mix,Twocolor
1248,A707116,Heidi,2015-08-08 06:54:00,08/08/2015 06:54:00 PM,07/08/2014,Adoption,,Dog,Spayed Female,12.0,Chinese Sharpei/Labrador Retriever,Twocolor


In [55]:
animal_outcomes_df[animal_outcomes_df['Animal ID'] == 'A552088']

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
5565,A552088,Sasha,04/03/2015 06:28:00 PM,04/03/2015 06:28:00 PM,11/04/2008,Adoption,,Dog,Spayed Female,6 years,Labrador Retriever Mix,Brown
24585,A552088,Sasha,05/12/2016 08:06:00 AM,05/12/2016 08:06:00 AM,11/04/2008,Adoption,Foster,Dog,Spayed Female,7 years,Labrador Retriever Mix,Brown
