# Cleaning/Exploring Intake Data


### Imports

In [1]:
import pandas as pd
import re
import numpy as np
from collections import Counter
import data_cleaning as dc
from sqlalchemy import create_engine

### Load dataframe

In [17]:
intakes_path = '/Users/murdock/Downloads/Austin_Animal_Center_Intakes.csv'

animal_intakes_df = pd.read_csv(intakes_path)

In [18]:
animal_intakes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A748291,*Madison,05/01/2017 02:26:00 PM,05/01/2017 02:26:00 PM,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,Black
1,A730601,,07/07/2016 12:11:00 PM,07/07/2016 12:11:00 PM,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,Blue Tabby
2,A748238,,05/01/2017 10:53:00 AM,05/01/2017 10:53:00 AM,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,White
3,A683644,*Zoey,07/13/2014 11:02:00 AM,07/13/2014 11:02:00 AM,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,Brown/White
4,A748635,,05/04/2017 05:56:00 PM,05/04/2017 05:56:00 PM,11003 Harris Branch Pkwy in Austin (TX),Stray,Normal,Cat,Unknown,9 months,Domestic Shorthair Mix,Blue


In [19]:
animal_intakes_df[animal_intakes_df['Animal ID'] == 'A677918']

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
10583,A677918,Buddy,02/04/2015 10:45:00 AM,02/04/2015 10:45:00 AM,Ih35 And Cesar Chavez in Austin (TX),Stray,Normal,Dog,Neutered Male,6 years,Pit Bull Mix,White/Chocolate
19845,A677918,Buddy,03/25/2015 06:41:00 PM,03/25/2015 06:41:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,7 years,Pit Bull Mix,White/Chocolate
54043,A677918,Buddy,05/01/2014 10:58:00 AM,05/01/2014 10:58:00 AM,1300 Southport Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,6 years,Pit Bull Mix,White/Chocolate


In [3]:
animal_intakes_df = animal_intakes_df[animal_intakes_df['Animal Type'] == 'Dog']

In [4]:
animal_intakes_df = animal_intakes_df.reset_index(drop=True)

In [5]:
animal_intakes_df['DateTime'] = pd.to_datetime(animal_intakes_df['DateTime'], format='%m/%d/%Y %H:%M:%S %p')

In [6]:
animal_intakes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41357 entries, 0 to 41356
Data columns (total 12 columns):
Animal ID           41357 non-null object
Name                34771 non-null object
DateTime            41357 non-null datetime64[ns]
MonthYear           41357 non-null object
Found Location      41357 non-null object
Intake Type         41357 non-null object
Intake Condition    41357 non-null object
Animal Type         41357 non-null object
Sex upon Intake     41356 non-null object
Age upon Intake     41357 non-null object
Breed               41357 non-null object
Color               41357 non-null object
dtypes: datetime64[ns](1), object(11)
memory usage: 3.8+ MB


In [7]:
animal_intakes_df.head(10)

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A748291,*Madison,2017-05-01 02:26:00,05/01/2017 02:26:00 PM,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,Black
1,A748238,,2017-05-01 10:53:00,05/01/2017 10:53:00 AM,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,White
2,A683644,*Zoey,2014-07-13 11:02:00,07/13/2014 11:02:00 AM,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,Brown/White
3,A676515,Rico,2014-04-11 08:45:00,04/11/2014 08:45:00 AM,615 E. Wonsley in Austin (TX),Stray,Normal,Dog,Intact Male,2 months,Pit Bull Mix,White/Brown
4,A692161,George,2014-11-15 03:18:00,11/15/2014 03:18:00 PM,Avenue G/42Nd in Austin (TX),Owner Surrender,Normal,Dog,Intact Male,5 months,Pit Bull Mix,Brown/White
5,A720597,*Franklin,2016-02-12 02:35:00,02/12/2016 02:35:00 PM,8413 Danville Dr in Austin (TX),Stray,Injured,Dog,Intact Male,2 years,Pit Bull Mix,Blue
6,A749535,*Bongo,2017-05-16 11:29:00,05/16/2017 11:29:00 AM,6607 S Ih 35 Frontage Rd in Austin (TX),Stray,Normal,Dog,Intact Male,2 years,Border Collie Mix,Black/White
7,A711377,Ruso,2015-09-06 09:26:00,09/06/2015 09:26:00 AM,Los Arbolos Neighborhood in Austin (TX),Stray,Normal,Dog,Intact Male,3 months,Anatol Shepherd Mix,Brown/Tan
8,A715806,*Samantha,2015-11-10 12:49:00,11/10/2015 12:49:00 PM,1506 Thorneridge Road in Austin (TX),Stray,Normal,Dog,Intact Female,2 years,Boxer/Labrador Retriever,Black
9,A671448,Xander,2014-01-24 02:54:00,01/24/2014 02:54:00 PM,Austin (TX),Public Assist,Normal,Dog,Neutered Male,3 years,Pekingese Mix,Brown/White


### Convert all ages to months

In [8]:
animal_intakes_df['Age upon Intake'] = animal_intakes_df['Age upon Intake'].apply(dc.convert_ages)

### Select columns to keep

In [9]:
animal_intakes_df = animal_intakes_df[['Animal ID', 'DateTime', 'Intake Type', 'Intake Condition', 'Sex upon Intake', 'Age upon Intake']]

In [10]:
animal_intakes_df.columns = ['animalid', 'datetime', 'intaketype', 'intakecondition', 'sexuponintake', 'ageuponintake']
animal_intakes_df.head()

Unnamed: 0,animalid,datetime,intaketype,intakecondition,sexuponintake,ageuponintake
0,A748291,2017-05-01 02:26:00,Stray,Normal,Intact Female,10.0
1,A748238,2017-05-01 10:53:00,Stray,Normal,Intact Male,36.0
2,A683644,2014-07-13 11:02:00,Owner Surrender,Nursing,Intact Female,1.0
3,A676515,2014-04-11 08:45:00,Stray,Normal,Intact Male,2.0
4,A692161,2014-11-15 03:18:00,Owner Surrender,Normal,Intact Male,5.0


### Pickle Dataframe

In [11]:
path = '/Users/murdock/Documents/metis/project3/intake_data.pkl'
animal_intakes_df.to_pickle(path)

### Upload to psql

In [12]:
engine_name = 'postgresql://credentials/project3'
cnx = create_engine(engine_name)

In [13]:
animal_intakes_df.to_sql("animal_intakes", cnx)

In [23]:
animal_intakes_df[animal_intakes_df['Animal ID'] == 'A552088']

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
20562,A552088,Sasha,09/12/2014 06:51:00 PM,09/12/2014 06:51:00 PM,Delano St And Hudson in Austin (TX),Stray,Normal,Dog,Spayed Female,5 years,Labrador Retriever Mix,Brown
41650,A552088,Sasha,09/13/2015 01:20:00 PM,09/13/2015 01:20:00 PM,7825 Elkhorn Mountain Trail in Austin (TX),Stray,Normal,Dog,Spayed Female,6 years,Labrador Retriever Mix,Brown
