# Project: Be A Hero & Save A Pet Today
## Team Name: In the Dawg Houz
### Team:  Emily Cogsgill, Marya Crigler,  Carlos Pisani, Stephen Schadt

## Dependencies 

In [1]:
# Dependencies
import pandas as pd
import numpy as np
import datetime
import re
import requests
import math
import operator

from datetime import datetime
# Google API Key
from config import gkey


## Clean up raw City of Austin Animal Shelter Data
### Intake and Outcome data files downloaded from City of Austin on November 29, 2017

In [2]:
# Set filepaths
csv_filepath1 = "raw data/Austin_Animal_Center_Intakes.csv"
csv_filepath2 = "raw data/Austin_Animal_Center_Outcomes.csv"

In [3]:
#Define reusable functions for cleanse

#Determine purebred status
def GetPurebred(x):
  if "Mix" in x["Breed"] : return 0
  elif "/" in x["Breed"] : return 0
  else: return 1


#Convert the Age to a consistent unit (days)
def GetAgeDays(x):
    if x["AgeUnits"] == "day":
        numDays = x["NumAge"]
    elif x["AgeUnits"] == "week":
        numDays = x["NumAge"] * 7
    elif x["AgeUnits"] == "month":
        numDays = x["NumAge"] * 30
    elif x["AgeUnits"] == "year":
        numDays = x["NumAge"] * 365
    else:
        numDays = 0
    return numDays

#Get intake outcome day length
def get_days_length(val):
    val = str(val)
    days = re.findall('\d*',val)[0]
    return days


### Cleanse Intake file

In [4]:
# Read the csv files into new dataframe
intakes_df = pd.read_csv(csv_filepath1, encoding="iso-8859-1", low_memory=False)
intakes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A748291,*Madison,05/01/2017 02:26:00 PM,05/01/2017 02:26:00 PM,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,Black
1,A750529,,05/28/2017 01:22:00 PM,05/28/2017 01:22:00 PM,8312 North Ih 35 in Austin (TX),Stray,Normal,Dog,Intact Female,5 months,Miniature Schnauzer Mix,White/Cream
2,A730601,,07/07/2016 12:11:00 PM,07/07/2016 12:11:00 PM,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,Blue Tabby
3,A748238,,05/01/2017 10:53:00 AM,05/01/2017 10:53:00 AM,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,White
4,A683644,*Zoey,07/13/2014 11:02:00 AM,07/13/2014 11:02:00 AM,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,Brown/White


In [5]:
# Drop redundant columns
intakes_df.drop(['MonthYear'], axis = 1, inplace = True)
    #intakes_df.head()

In [6]:
# Transform date field and create new Month and Year columns
intakes_df["DateTime"] = pd.to_datetime(intakes_df["DateTime"])
intakes_df["Month"]= intakes_df["DateTime"].dt.month
intakes_df["Year"]= intakes_df["DateTime"].dt.year

In [7]:
# Identify unique values in the Sex upon Intake column
intakes_df["Sex upon Intake"].unique()

array(['Intact Female', 'Intact Male', 'Spayed Female', 'Unknown',
       'Neutered Male', nan], dtype=object)

In [8]:
# Split Sex upon Intake to an IntakeSex and IntakeSpayNeuter columns
intakes_df["Sex"] = intakes_df["Sex upon Intake"].map({"Intact Female":"Female", "Intact Male":"Male", "Spayed Female":"Female", "Neutered Male":"Male","Unknown":"Unknown Sex"})
intakes_df["Female"] = intakes_df["Sex upon Intake"].map({"Intact Female":1, "Intact Male":0, "Spayed Female":1, "Neutered Male":0,"Unknown":0})
intakes_df["Male"] = intakes_df["Sex upon Intake"].map({"Intact Female":0, "Intact Male":1, "Spayed Female":0, "Neutered Male":1,"Unknown":0})
intakes_df["SpayNeuter"] = intakes_df["Sex upon Intake"].map({"Intact Female":"No", "Intact Male":"No", "Spayed Female":"Yes", "Neutered Male":"Yes","Unknown":"No"})
    #intakes_df.head()

In [9]:
# Set Purebred Status
intakes_df['Purebred'] = intakes_df.apply(GetPurebred, axis=1)
    #intakes_df.head()

In [10]:
#Split Age Upon Intake
intakes_df['NumAge'], intakes_df['AgeUnits'] = intakes_df['Age upon Intake'].str.split(' ', 1).str
    #intakes_df.head()

In [11]:
#Make NumAge a numeric colunm for later calclations
intakes_df["NumAge"] = pd.to_numeric(intakes_df["NumAge"])
    #intakes_df.head()

In [12]:
# Identify unique values in the intake age units
intakes_df["AgeUnits"].unique()

array(['months', 'years', 'weeks', 'month', 'year', 'week', 'days', 'day',
       nan], dtype=object)

In [13]:
#Make the intake age unit values consistent
intakes_df["AgeUnits"].replace(["days","weeks","months","years"],["day","week","month","year"], inplace=True)
    #intakes_df["IntakeAgeUnits"].unique()

In [14]:
#Set the Age Days column
intakes_df['AgeDays'] = intakes_df.apply(GetAgeDays, axis=1)
    #intakes_df.head()

In [15]:
# Get metrics on the raw intake file
intakes_df.describe()

Unnamed: 0,Month,Year,Female,Male,Purebred,NumAge,AgeDays
count,75577.0,75577.0,75576.0,75576.0,75577.0,75576.0,75577.0
mean,6.689601,2015.309009,0.437388,0.47654,0.067322,3.4453,761.731559
std,3.257848,1.213225,0.496067,0.499453,0.250581,2.908907,1052.134727
min,1.0,2013.0,0.0,0.0,0.0,0.0,0.0
25%,4.0,2014.0,0.0,0.0,0.0,1.0,60.0
50%,7.0,2015.0,0.0,0.0,0.0,2.0,365.0
75%,10.0,2016.0,1.0,1.0,0.0,5.0,1095.0
max,12.0,2017.0,1.0,1.0,1.0,25.0,9125.0


In [16]:
#Metrics cont'd
intake_recs = len(intakes_df)
intake_dups = intakes_df.duplicated("Animal ID").sum()
intake_types = intakes_df.groupby(["Animal Type"]).size()
intakeOther = intakes_df[intakes_df["Animal Type"]=="Other"]
intakeOtherGrp = intakeOther.groupby(["Breed"], as_index=False)
intakeOtherCnts = intakeOtherGrp["Animal ID"].count()
sort_intakeOtherCnts = intakeOtherCnts.sort_values("Animal ID", ascending=False)

print(f"Number of records in Intakes file: {intake_recs}")
print(f"Duplicate Intake Animal IDs: {intake_dups}")
print(f"Intakes by Animal Type: {intake_types}")
#print(f"Intake of Other group by Animal Type: {sort_intakeOtherCnts}")

Number of records in Intakes file: 75577
Duplicate Intake Animal IDs: 7031
Intakes by Animal Type: Animal Type
Bird           328
Cat          28489
Dog          42590
Livestock        8
Other         4162
dtype: int64


### Clease Outcome file

In [17]:
#  *** Cleanse the Outcomes file  ***
# Read the csv files into new dataframes
outcomes_df = pd.read_csv(csv_filepath2, encoding="iso-8859-1", low_memory=False)
outcomes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A741715,*Pebbles,01/11/2017 06:17:00 PM,01/11/2017 06:17:00 PM,03/07/2016,Adoption,,Cat,Spayed Female,10 months,Domestic Shorthair Mix,Calico
1,A658751,Benji,11/13/2016 01:38:00 PM,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,5 years,Border Terrier Mix,Tan
2,A721285,,02/24/2016 02:42:00 PM,02/24/2016 02:42:00 PM,02/24/2014,Euthanasia,Suffering,Other,Unknown,2 years,Raccoon Mix,Black/Gray
3,A746650,Rose,04/07/2017 11:58:00 AM,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,1 year,Labrador Retriever/Jack Russell Terrier,Yellow
4,A750122,Happy Camper,05/24/2017 06:36:00 PM,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1 month,Labrador Retriever Mix,Black


In [18]:
# Drop redundant columns
outcomes_df.drop(['MonthYear'], axis = 1, inplace = True)
    #outcomes_df.head()

In [19]:
# Transform date field and create new Month and Year columns
outcomes_df["DateTime"] = pd.to_datetime(outcomes_df["DateTime"])
outcomes_df["Month"]= outcomes_df["DateTime"].dt.month
outcomes_df["Year"]= outcomes_df["DateTime"].dt.year

In [20]:
# Identify unique values in the Sex upon Outcome column
outcomes_df["Sex upon Outcome"].unique()

array(['Spayed Female', 'Neutered Male', 'Unknown', 'Intact Female',
       'Intact Male', nan], dtype=object)

In [21]:
# Split Sex upon Outcome to an OutcomeSex and OutcomeSpayNeuter columns
outcomes_df["Sex"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":"Female", "Intact Male":"Male", "Spayed Female":"Female", "Neutered Male":"Male","Unknown":"Unknown Sex"})
outcomes_df["Female"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":1, "Intact Male":0, "Spayed Female":1, "Neutered Male":0,"Unknown":0})
outcomes_df["Male"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":0, "Intact Male":1, "Spayed Female":0, "Neutered Male":1,"Unknown":0})
outcomes_df["SpayNeuter"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":"No", "Intact Male":"No", "Spayed Female":"Yes", "Neutered Male":"Yes","Unknown":"No"})
    #outcomes_df.head()

In [22]:
# Get purebred 
outcomes_df['Purebred'] = outcomes_df.apply(GetPurebred, axis=1)

In [23]:
# Split out age
outcomes_df['NumAge'], outcomes_df['AgeUnits'] = outcomes_df['Age upon Outcome'].str.split(' ', 1).str

#Make NumAge a numeric colunm for later calclations
outcomes_df["NumAge"] = pd.to_numeric(outcomes_df["NumAge"])

#Make the intake age unit values consistent
outcomes_df["AgeUnits"].replace(["days","weeks","months","years"],["day","week","month","year"], inplace=True)

#Set the Age Days column
outcomes_df['AgeDays'] = outcomes_df.apply(GetAgeDays, axis=1)

In [24]:
# Get metrics on the raw outcomes file
outcomes_df.describe()

Unnamed: 0,Month,Year,Female,Male,Purebred,NumAge,AgeDays
count,75508.0,75508.0,75506.0,75506.0,75508.0,75502.0,75508.0
mean,6.799902,2015.29377,0.437767,0.476492,0.067198,3.528211,772.159175
std,3.301991,1.219788,0.496115,0.49945,0.250367,2.901128,1052.357298
min,1.0,2013.0,0.0,0.0,0.0,0.0,0.0
25%,4.0,2014.0,0.0,0.0,0.0,2.0,90.0
50%,7.0,2015.0,0.0,0.0,0.0,2.0,365.0
75%,10.0,2016.0,1.0,1.0,0.0,5.0,1095.0
max,12.0,2017.0,1.0,1.0,1.0,25.0,9125.0


In [25]:
#Metrics cont'd
outcome_recs = len(outcomes_df)
outcome_dups = outcomes_df.duplicated("Animal ID").sum()
outcome_types = outcomes_df.groupby(["Animal Type"]).size()

print(f"Number of records in Outcomes file: {outcome_recs}")
print(f"Duplicate Outcome Animal IDs: {outcome_dups}")
print(f"Outcomes by Animal Type: {outcome_types}")

Number of records in Outcomes file: 75508
Duplicate Outcome Animal IDs: 6975
Outcomes by Animal Type: Animal Type
Bird           327
Cat          28519
Dog          42498
Livestock        9
Other         4155
dtype: int64


### Merge Intake and Outcome files on animal id

In [26]:
# **** Merge intakes and outcomes files
animalData_df = pd.merge(intakes_df, outcomes_df, on="Animal ID", how="inner", suffixes=('_intake', '_outcome'))
animalData_df.head()

Unnamed: 0,Animal ID,Name_intake,DateTime_intake,Found Location,Intake Type,Intake Condition,Animal Type_intake,Sex upon Intake,Age upon Intake,Breed_intake,...,Month_outcome,Year_outcome,Sex_outcome,Female_outcome,Male_outcome,SpayNeuter_outcome,Purebred_outcome,NumAge_outcome,AgeUnits_outcome,AgeDays_outcome
0,A748291,*Madison,2017-05-01 14:26:00,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,...,9,2017,Female,1.0,0.0,Yes,0,1.0,year,365.0
1,A750529,,2017-05-28 13:22:00,8312 North Ih 35 in Austin (TX),Stray,Normal,Dog,Intact Female,5 months,Miniature Schnauzer Mix,...,6,2017,Female,1.0,0.0,Yes,0,5.0,month,150.0
2,A730601,,2016-07-07 12:11:00,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,...,7,2016,Male,0.0,1.0,Yes,0,7.0,month,210.0
3,A748238,,2017-05-01 10:53:00,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,...,5,2017,Male,0.0,1.0,Yes,0,3.0,year,1095.0
4,A683644,*Zoey,2014-07-13 11:02:00,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,...,11,2014,Female,1.0,0.0,Yes,0,4.0,month,120.0


In [27]:
print(animalData_df.columns)

Index(['Animal ID', 'Name_intake', 'DateTime_intake', 'Found Location',
       'Intake Type', 'Intake Condition', 'Animal Type_intake',
       'Sex upon Intake', 'Age upon Intake', 'Breed_intake', 'Color_intake',
       'Month_intake', 'Year_intake', 'Sex_intake', 'Female_intake',
       'Male_intake', 'SpayNeuter_intake', 'Purebred_intake', 'NumAge_intake',
       'AgeUnits_intake', 'AgeDays_intake', 'Name_outcome', 'DateTime_outcome',
       'Date of Birth', 'Outcome Type', 'Outcome Subtype',
       'Animal Type_outcome', 'Sex upon Outcome', 'Age upon Outcome',
       'Breed_outcome', 'Color_outcome', 'Month_outcome', 'Year_outcome',
       'Sex_outcome', 'Female_outcome', 'Male_outcome', 'SpayNeuter_outcome',
       'Purebred_outcome', 'NumAge_outcome', 'AgeUnits_outcome',
       'AgeDays_outcome'],
      dtype='object')


In [28]:
#Remove rows with duplicate animal IDs
animalData_df=animalData_df.drop_duplicates("Animal ID",keep=False)
len(animalData_df)

62049

In [29]:
#Test to see if any other columns are redundant
animalData_df["Color_intake"].equals(animalData_df["Color_outcome"])

False

In [30]:
#Filter for only Dogs and Cats
animalData_df = animalData_df.loc[animalData_df["Animal Type_intake"].isin(["Dog","Cat"])]
len(animalData_df)

57593

In [31]:
# Calc date diff between intakes and outcomes
animalData_df["Intake Outcome Days"] = animalData_df["DateTime_outcome"]-animalData_df["DateTime_intake"]
#animalData_df.head()

In [32]:
animalData_df["IODays"] = animalData_df['Intake Outcome Days'].apply(get_days_length)
animalData_df["IODays"]=pd.to_numeric(animalData_df["IODays"])
animalData_df.head()

Unnamed: 0,Animal ID,Name_intake,DateTime_intake,Found Location,Intake Type,Intake Condition,Animal Type_intake,Sex upon Intake,Age upon Intake,Breed_intake,...,Sex_outcome,Female_outcome,Male_outcome,SpayNeuter_outcome,Purebred_outcome,NumAge_outcome,AgeUnits_outcome,AgeDays_outcome,Intake Outcome Days,IODays
0,A748291,*Madison,2017-05-01 14:26:00,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,...,Female,1.0,0.0,Yes,0,1.0,year,365.0,125 days 09:34:00,125.0
1,A750529,,2017-05-28 13:22:00,8312 North Ih 35 in Austin (TX),Stray,Normal,Dog,Intact Female,5 months,Miniature Schnauzer Mix,...,Female,1.0,0.0,Yes,0,5.0,month,150.0,4 days 03:20:00,4.0
2,A730601,,2016-07-07 12:11:00,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,...,Male,0.0,1.0,Yes,0,7.0,month,210.0,0 days 20:49:00,0.0
3,A748238,,2017-05-01 10:53:00,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,...,Male,0.0,1.0,Yes,0,3.0,year,1095.0,5 days 05:04:00,5.0
4,A683644,*Zoey,2014-07-13 11:02:00,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,...,Female,1.0,0.0,Yes,0,4.0,month,120.0,115 days 23:04:00,115.0


#### Export merged file to CSV

In [33]:
#Export to CSV
animalData_df.to_csv("raw data/MergedData.csv", encoding="utf-8", index=False)

## Cleanup of Austin vs Louisville Data
### Computing time deltas for Louisville data

In [34]:
louisville_data = pd.read_csv("raw data/Louisville.csv")
louisville_data.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeInternalStatus,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome
0,A366370,CAT,2008-11-07 10:50:00,STRAY,OTC,WHITE,DOMESTIC SHORTHAIR,,NEUTERED MALE,BROWN,...,FEARFUL,HEALTHY,ALTERED,2008-11-12 15:46:00,EUTH,FERAL,,,UNHEALTHY/UNTREATABLE,ALTERED
1,A366531,CAT,2008-11-10 10:20:00,STRAY,OTC,BLACK,DOMESTIC SHORTHAIR,DOMESTIC SHORTHAIR,UNKNOWN,,...,NORMAL,HEALTHY,UNKNOWN,2008-11-19 20:10:00,EUTH,CONTAG DIS,,SICK,HEALTHY,UNKNOWN
2,A532367,BIRD,2014-07-23 23:21:00,CONFISCATE,CRUELTY,RED,CHICKEN,,MALE,BLACK,...,OTHER,HEALTHY,FERTILE,2014-11-05 15:49:00,TRANSFER,,,,HEALTHY,FERTILE
3,A532474,OTHER,2014-07-24 18:29:00,ET REQUEST,,BROWN,BAT,,UNKNOWN,,...,OTHER,HEALTHY,UNKNOWN,2014-07-24 23:59:00,EUTH,MEDICAL,,OTHER,HEALTHY,UNKNOWN
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,NORMAL,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE


In [35]:
louisville_data['Days from Intake to Outcome']=''

for row in range(0,len(louisville_data)):
    #print(row)
    try: 
        intake_date=str(louisville_data.iloc[row,2])
        outcome_date=str(louisville_data.iloc[row,15])                 
        intake_datetime = datetime.strptime(intake_date, '%Y-%m-%d %H:%M:%S')
        outcome_datetime = datetime.strptime(outcome_date, '%Y-%m-%d %H:%M:%S')                 
        days_to_outcome=(outcome_datetime-intake_datetime).total_seconds()/86400
        louisville_data.iloc[row,22]=days_to_outcome
    except ValueError:
        continue

In [36]:
print(louisville_data.columns)

Index(['AnimalID', 'AnimalType', 'IntakeDate', 'IntakeType', 'IntakeSubtype',
       'PrimaryColor', 'PrimaryBreed', 'SecondaryBreed', 'Gender',
       'SecondaryColor', 'DOB', 'IntakeReason', 'IntakeInternalStatus',
       'IntakeAsilomarStatus', 'ReproductiveStatusAtIntake', 'OutcomeDate',
       'OutcomeType', 'OutcomeSubtype', 'OutcomeReason',
       'OutcomeInternalStatus', 'OutcomeAsilomarStatus',
       'ReproductiveStatusAtOutcome', 'Days from Intake to Outcome'],
      dtype='object')


In [37]:
louisville_data.rename(columns={"Time from Intake to Outcome":"Days between Intake and Outcome"})

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome,Days from Intake to Outcome
0,A366370,CAT,2008-11-07 10:50:00,STRAY,OTC,WHITE,DOMESTIC SHORTHAIR,,NEUTERED MALE,BROWN,...,HEALTHY,ALTERED,2008-11-12 15:46:00,EUTH,FERAL,,,UNHEALTHY/UNTREATABLE,ALTERED,5.20556
1,A366531,CAT,2008-11-10 10:20:00,STRAY,OTC,BLACK,DOMESTIC SHORTHAIR,DOMESTIC SHORTHAIR,UNKNOWN,,...,HEALTHY,UNKNOWN,2008-11-19 20:10:00,EUTH,CONTAG DIS,,SICK,HEALTHY,UNKNOWN,9.40972
2,A532367,BIRD,2014-07-23 23:21:00,CONFISCATE,CRUELTY,RED,CHICKEN,,MALE,BLACK,...,HEALTHY,FERTILE,2014-11-05 15:49:00,TRANSFER,,,,HEALTHY,FERTILE,104.686
3,A532474,OTHER,2014-07-24 18:29:00,ET REQUEST,,BROWN,BAT,,UNKNOWN,,...,HEALTHY,UNKNOWN,2014-07-24 23:59:00,EUTH,MEDICAL,,OTHER,HEALTHY,UNKNOWN,0.229167
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE,0.815278
5,A451184,BIRD,2012-01-29 15:25:00,OWNER SUR,FIELD,BLACK,CHICKEN,,UNKNOWN,WHITE,...,HEALTHY,UNKNOWN,2012-02-22 23:59:00,TRANSFER,RESCUE GRP,,NORMAL,HEALTHY,UNKNOWN,24.3569
6,A278854,CAT,2006-08-17 14:28:00,STRAY,OTC,BROWN TIGER,DOMESTIC SHORTHAIR,,FEMALE,,...,HEALTHY,FERTILE,2006-08-28 11:30:00,EUTH,MEDICAL,,,HEALTHY,FERTILE,10.8764
7,A278862,CAT,2006-08-17 14:28:00,STRAY,OTC,GRAY,DOMESTIC SHORTHAIR,,MALE,,...,HEALTHY,FERTILE,2006-08-28 09:20:00,DIED,IN KENNEL,,,HEALTHY,FERTILE,10.7861
8,A278889,CAT,2006-08-17 16:53:00,STRAY,FIELD,WHITE,DOMESTIC SHORTHAIR,,FEMALE,GRAY,...,HEALTHY,FERTILE,2006-08-22 11:15:00,EUTH,MEDICAL,,,HEALTHY,FERTILE,4.76528
9,A279309,CAT,2006-08-22 18:36:00,STRAY,FIELD,BLACK,DOMESTIC SHORTHAIR,,FEMALE,,...,HEALTHY,FERTILE,2006-09-07 15:34:00,DIED,IN KENNEL,,,HEALTHY,FERTILE,15.8736


In [38]:
#louisville_data=louisville_data.drop(columns=['Days from Intake to Outcome'])

In [39]:
louisville_data=louisville_data.rename(columns={"Time from Intake to Outcome":"Days between Intake and Outcome"})
louisville_data.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome,Days from Intake to Outcome
0,A366370,CAT,2008-11-07 10:50:00,STRAY,OTC,WHITE,DOMESTIC SHORTHAIR,,NEUTERED MALE,BROWN,...,HEALTHY,ALTERED,2008-11-12 15:46:00,EUTH,FERAL,,,UNHEALTHY/UNTREATABLE,ALTERED,5.20556
1,A366531,CAT,2008-11-10 10:20:00,STRAY,OTC,BLACK,DOMESTIC SHORTHAIR,DOMESTIC SHORTHAIR,UNKNOWN,,...,HEALTHY,UNKNOWN,2008-11-19 20:10:00,EUTH,CONTAG DIS,,SICK,HEALTHY,UNKNOWN,9.40972
2,A532367,BIRD,2014-07-23 23:21:00,CONFISCATE,CRUELTY,RED,CHICKEN,,MALE,BLACK,...,HEALTHY,FERTILE,2014-11-05 15:49:00,TRANSFER,,,,HEALTHY,FERTILE,104.686
3,A532474,OTHER,2014-07-24 18:29:00,ET REQUEST,,BROWN,BAT,,UNKNOWN,,...,HEALTHY,UNKNOWN,2014-07-24 23:59:00,EUTH,MEDICAL,,OTHER,HEALTHY,UNKNOWN,0.229167
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE,0.815278


In [40]:
louisville_data.to_csv('raw data/test/Louisville_with_time_deltas.csv')

In [41]:
louisville_data_dogs_cats_only=louisville_data[(louisville_data['AnimalType']=='CAT') | (louisville_data['AnimalType']=='DOG')]

In [42]:
#confirm it worked
louisville_data_dogs_cats_only['AnimalType'].value_counts()

DOG    76687
CAT    68212
Name: AnimalType, dtype: int64

In [43]:
louisville_data_dogs_cats_only.to_csv('raw data/test/Louisville_with_time_deltas_dogs_cats_only.csv')

### Computing time deltas for Austin data

In [44]:
#Read in the data
austin_data = pd.read_csv("raw data/MergedData.csv")

In [45]:
austin_data['Days from Intake to Outcome']=''

for row in range(len(austin_data)):
    #print(f"Processing row {row}")
    try: 
        intake_date=str(austin_data.loc[row,'DateTime_intake'])
        outcome_date=str(austin_data.loc[row,'DateTime_outcome'])    
        intake_datetime = datetime.strptime(intake_date, '%m/%d/%Y %H:%M:%S %p')
        outcome_datetime = datetime.strptime(outcome_date, '%m/%d/%Y %H:%M:%S %p')                 
        days_to_outcome=(outcome_datetime-intake_datetime).total_seconds()/86400
        austin_data.loc[row,'Days from Intake to Outcome']=days_to_outcome
    except ValueError:
        continue

In [46]:
#confirm there are only cats and dogs
austin_data['Animal Type_intake'].value_counts()

Dog    31697
Cat    25896
Name: Animal Type_intake, dtype: int64

In [47]:
austin_data.to_csv('raw data/test/Austin_merged_data_with_time_deltas.csv')

## Animal Shelter Intake Analysis in Austin, TX by Location

#### Build animal / location dataframes

In [48]:
#
# Filtered Dataframe: Only intakes from 1/1/16, 12/1/16, 6/1/17, or 9/1/17 and forward 
#  (depending on how many API calls we can make)
#

# *** Below datasets contain larger datasets and should only be loaded if you have ample API calls to make against Google Maps API ***
# df_intakes_2016_and_on = pd.read_csv('raw data/Austin_Animal_Center_Intakes_2016_and_on.csv', encoding='latin-1')
# df_intakes_2017_and_on = pd.read_csv('raw data/Austin_Animal_Center_Intakes_2017_and_on.csv', encoding='latin-1')
# df_intakes_2017_and_on = pd.read_csv('raw data/Austin_Animal_Center_Intakes_092017_and_on.csv', encoding='latin-1')

# *** Below dataset only contains 1000ish records - use this one for testing purposes ***
df_intakes_2017_and_on = pd.read_csv('raw data/Austin_Animal_Center_Intakes_2017-11_and_on.csv', encoding='latin-1')

# Create clean dataframe to populate only rows with applicable addresses
df_intakes_clean = pd.DataFrame(columns=["DateTime", "Found Address", "Intake Type", "Intake Condition",
                                        "Animal Type", "Sex upon Intake", "Age upon Intake", "Breed", "Color"])

In [49]:
print(f"Length of dataset: {len(df_intakes_2017_and_on)}")
df_intakes_2017_and_on.head()

Length of dataset: 981


Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A761394,Bella,11/4/2017 8:14,11/4/2017 8:14,12034 Research Boulevard in Austin (TX),Stray,Injured,Dog,Intact Female,3 years,German Shepherd Mix,White
1,A757964,Duke,11/4/2017 8:22,11/4/2017 8:22,Austin (TX),Stray,Injured,Dog,Intact Male,1 year,Great Pyrenees Mix,White
2,A761396,*Barney,11/4/2017 8:22,11/4/2017 8:22,Austin (TX),Stray,Injured,Dog,Intact Male,1 year,Australian Cattle Dog Mix,Tan/White
3,A754170,Diva,11/4/2017 10:11,11/4/2017 10:11,7211 Ritchie in Austin (TX),Stray,Normal,Dog,Spayed Female,6 months,Pit Bull Mix,Blue/White
4,A724545,Chloe,11/4/2017 10:11,11/4/2017 10:11,7211 Ritchie in Austin (TX),Stray,Normal,Dog,Spayed Female,1 year,Pit Bull Mix,Brown/White


In [50]:
# *********************************************
# *** Function to clean address column data ***
# *********************************************
def clean_address(addr):
    '''
    Function: clean_address
    Argument: address
    Return values: tuple final address (string), is_full_address (boolean)
    '''
    # variable determining whether or not this is a full address (defaults to False)
    is_full_address = False

    # Initialize address variable by cleaning off the (TX) part
    addressclean = addr.replace(" (TX)","")

    # Split the address from the city
    address = addressclean.split(" in ")

    # Clean up address
    address_words = addressclean.split(" ")

    # First find out if this address is not applicable
    if addressclean == "Outside Jurisdiction":
        address_final = "NA"
    
    # Next, find out if this is an actual street address
    elif (len(address) > 1):
        is_full_address = True

        # Street address (raw)
        address_street = address[0]
        address_city = address[1]
        
        # Clean up addresses with "/" characters into [street1 and street2] syntax
        address_corner = address_street.split("/")
        if len(address_corner) > 1:
            address_street = f"{address_corner[0]} and {address_corner[1]}"
        else:
            address_street = address_corner[0]
            
        address_final = f"{address_street},{address_city},TX"
        
    # Finally, for non-address strings...single-city listing
    else:
        address_final = f"{addressclean},TX"
        
    return (address_final, is_full_address)

In [51]:
#
# Loop through last-1-year or last-2-years dataset, and only insert rows with clean addresses into clean dataset
#
for index, row in df_intakes_2017_and_on.iterrows():
    # Call function to clean up address into something we can pass to Google API
    address_tuple = clean_address(row["Found Location"])
    address = address_tuple[0]
    is_full_address = address_tuple[1]
    
    if address == "NA":
        print("Outside jurisdiction - skipping")
        continue
    elif is_full_address == False:
        print("No actual address - skipping")
        continue
    else:
        # Fill empty "clean" dataframe with rows we actually want to process
        df_intakes_clean = df_intakes_clean.append({"DateTime": row["DateTime"],
                                "Found Address": address,
                                "Intake Type": row["Intake Type"],
                                "Intake Condition": row["Intake Condition"],        
                                "Animal Type": row["Animal Type"],
                                "Sex upon Intake": row["Sex upon Intake"],
                                "Age upon Intake": row["Age upon Intake"],
                                "Breed": row["Breed"],
                                "Color": row["Color"]}, ignore_index=True)
        

No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
Outside jurisdiction - skipping
Outside jurisdiction - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actual address - skipping
No actua

In [52]:
# Visualize cleaned dataset
print(f"Length of entire dataset: {len(df_intakes_clean)}")
df_intakes_clean["Intake Type"].value_counts()
df_intakes_clean.head()

Length of entire dataset: 699


Unnamed: 0,DateTime,Found Address,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,11/4/2017 8:14,"12034 Research Boulevard,Austin,TX",Stray,Injured,Dog,Intact Female,3 years,German Shepherd Mix,White
1,11/4/2017 10:11,"7211 Ritchie,Austin,TX",Stray,Normal,Dog,Spayed Female,6 months,Pit Bull Mix,Blue/White
2,11/4/2017 10:11,"7211 Ritchie,Austin,TX",Stray,Normal,Dog,Spayed Female,1 year,Pit Bull Mix,Brown/White
3,11/4/2017 10:25,"900 Thompson Street,Austin,TX",Stray,Normal,Dog,Spayed Female,2 years,Pit Bull Mix,White
4,11/4/2017 11:11,"26Th Street And Rio Grande,Austin,TX",Stray,Normal,Dog,Intact Male,4 months,Rat Terrier/Dachshund,Black/Tan


### Google Maps API integration

In [53]:
# Create summary dataframe to house address, lat, long, and boolean indicating if this pet lived in a home
df_summary = pd.DataFrame(columns=["Address", "Latitude", "Longitude", "Pet at Home", "Animal Type"])

#### Function and For-loop to grab lat/lng from Google API

In [54]:
# If we have already populated the summary dataframe, drop any rows where Lat/Long values are zero, 
#  so appending below will be clean
df_summary = df_summary[df_summary.Latitude != 0]

# Visualize the trimmed dataframe
print(f"Length of dataset: {len(df_summary)}")
df_summary.head()

Length of dataset: 0


Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type


In [55]:
# **************************************************************
# *** Function to pull lat / lng values from Google Maps API ***
# **************************************************************
def get_lat_long(address):
    '''
    Function: get_lat_long
    Purpose:  Get lat and long codes from Google maps API, given an address string
    Argument: address
    Returns:  lat, long values
    '''
    # Create endpoint URL
    endpoint_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={gkey}"

    # Run request to grab the JSON at the requested URL
    google_api_json = requests.get(endpoint_url).json()

    # Append the lat/lng to the appropriate columns (use try / except to skip addresses with errors)
    try: 
        lat = google_api_json["results"][0]["geometry"]["location"]["lat"]
        lng = google_api_json["results"][0]["geometry"]["location"]["lng"]
        retval = (lat, lng)

    except IndexError:
        retval = (0, 0)

    return retval
# Initialize loop variables
row_count = 0 
processed_addresses = []

#
# Loop through cleaned dataset and determine lat/lng using Google maps geocoding API
#
# ** NOTE: this loop must not exceed 25,000 calls to the Google API in a 24 hour period, per the Google free API terms ***
#
for index, row in df_intakes_clean.iterrows():
    row_count += 1
    
    # Set address and pet_at_home boolean variable
    addr = row["Found Address"]
    pet_at_home = False
    
    # If we've already done a lookup for this address, no need to call Maps API
    if (addr in processed_addresses):
        print(f"Address already processed: {addr}")
        continue
    else:
        # Verify if this address already has a lat/long value in the table. If so, continue. 
        is_address_in_df = df_summary[df_summary.Address == addr].count()["Address"]
       
        # If this address isn't already in the dataframe, call Google API to populate lat/lon
        if (is_address_in_df == 0):
            print(f"New address being processed: {addr}: {str(row_count)}")
            (latitude, longitude) = get_lat_long(addr)
            
            # Append to addresses array to mark this address as processed
            processed_addresses.append(addr)
        else:
            # Address was found, but the Latitude value is populated
            print(f"Address already populated: {addr}")
            continue
       
    # Set variables for "Pet at Home", "Animal Type"
    animal_type = row["Animal Type"]
    if row["Intake Type"] == "Owner Surrender" or row["Intake Type"] == "Euthanasia Request" or row["Intake Type"] == "Public Assist":
        pet_at_home = True

    # Append values to our summary dataframe
    df_summary = df_summary.append({"Address": addr,
                                    "Latitude": latitude,
                                    "Longitude": longitude,
                                    "Pet at Home": pet_at_home,
                                    "Animal Type": animal_type},
                                    ignore_index=True)

New address being processed: 12034 Research Boulevard,Austin,TX: 1
New address being processed: 7211 Ritchie,Austin,TX: 2
Address already processed: 7211 Ritchie,Austin,TX
New address being processed: 900 Thompson Street,Austin,TX: 4
New address being processed: 26Th Street And Rio Grande,Austin,TX: 5
New address being processed: Wally Ave,Austin,TX: 6
New address being processed: 7806 South 1St Street,Austin,TX: 7
New address being processed: 5309 Spring Meadow Rd,Austin,TX: 8
New address being processed: 4602 East Stassney Lane,Austin,TX: 9
New address being processed: 1016 Camino La Costa,Austin,TX: 10
Address already processed: 1016 Camino La Costa,Austin,TX
Address already processed: 1016 Camino La Costa,Austin,TX
Address already processed: 1016 Camino La Costa,Austin,TX
New address being processed: 12362 Ballerstedt Road,Travis,TX: 14
New address being processed: 12Th Street And Chicon,Austin,TX: 15
New address being processed: 11904 Garden Gate,Austin,TX: 16
New address being pr

New address being processed: 40 N Ih 35,Austin,TX: 130
New address being processed: 12117 Manchaca,Travis,TX: 131
New address being processed: Manchaca Road  And Glen Allen Street,Austin,TX: 132
New address being processed: 2323 Wells Branch Parkway,Austin,TX: 133
New address being processed: 20409 Crooked Stick,Pflugerville,TX: 134
New address being processed: Grove Blvd & Hogan Avenue,Austin,TX: 135
New address being processed: Fm 2222 And River Place Blouevard,Austin,TX: 136
New address being processed: 9400 Petrichor,Manor,TX: 137
New address being processed: 305 E Yager,Austin,TX: 138
New address being processed: 2000 S Ih 35,Austin,TX: 139
New address being processed: 18208 Great Falls Dr,Manor,TX: 140
New address being processed: 7610  Cameron,Austin,TX: 141
New address being processed: South Oltorf And Durwood Street,Austin,TX: 142
New address being processed: 5209 Coppermead Lane,Austin,TX: 143
Address already processed: 5209 Coppermead Lane,Austin,TX
Address already processed

New address being processed: Bluff Springs Road And Quicksilver Drive,Austin,TX: 254
Address already processed: Bluff Springs Road And Quicksilver Drive,Austin,TX
New address being processed: 3401 West Parmer Lane,Austin,TX: 256
Address already processed: 3401 West Parmer Lane,Austin,TX
New address being processed: 12001 Hispana Court,Austin,TX: 258
New address being processed: 5808 Mckinney Falls Parkway,Austin,TX: 259
New address being processed: 8400 Old Bee Caves,Austin,TX: 260
Address already processed: 8400 Old Bee Caves,Austin,TX
New address being processed: 908 Cedar Glen,Austin,TX: 262
Address already processed: 908 Cedar Glen,Austin,TX
New address being processed: Donald Drive And Claudia June Avenue,Austin,TX: 264
New address being processed: 4705 Leather Leaf,Austin,TX: 265
New address being processed: Metric And Cedar Bend Drive,Austin,TX: 266
New address being processed: North Lamar And Rundberg,Austin,TX: 267
New address being processed: Escarpment And Convict Hill,Austi

New address being processed: 2300 Cesar Chavez,Austin,TX: 381
New address being processed: 404 Ledgeway,Travis,TX: 382
New address being processed: 1729 Constantino Circle,Austin,TX: 383
New address being processed: South Pleasant Valley And Teri Road,Austin,TX: 384
New address being processed: 19400 Block Callan Court,Manor,TX: 385
New address being processed: 12179 Running Bird Ln,Austin,TX: 386
Address already processed: 19400 Block Callan Court,Manor,TX
New address being processed: West Braker Lane And Austin Park Lane,Austin,TX: 388
New address being processed: Franklin Neightborhood Park Trail,Austin,TX: 389
Address already processed: Franklin Neightborhood Park Trail,Austin,TX
Address already processed: Franklin Neightborhood Park Trail,Austin,TX
New address being processed: 13730 N Fm 620,Austin,TX: 392
New address being processed: 19518 Englemann Ln,Travis,TX: 393
New address being processed: Tollway 130 And Fm 973,Austin,TX: 394
Address already processed: 10505 S Ih 35,Austin

New address being processed: 1700 Burton Drive,Austin,TX: 509
New address being processed: North Imperial And Wayside,Austin,TX: 510
New address being processed: 6701 Burnet Road,Austin,TX: 511
Address already processed: 6701 Burnet Road,Austin,TX
New address being processed: Ih35 And Onion Creek Overpass,Austin,TX: 513
New address being processed: 300 Carmen Court,Austin,TX: 514
New address being processed: 912 Keith Ln,Austin,TX: 515
New address being processed: 4900 Barkbridge Trl,Austin,TX: 516
Address already processed: 4900 Barkbridge Trl,Austin,TX
New address being processed: 1704 Nelms Drive,Austin,TX: 518
New address being processed: 8328 Sassman,Austin,TX: 519
New address being processed: 7706 Village Creek,Austin,TX: 520
New address being processed: 5310 Prock Lane,Austin,TX: 521
Address already processed: 5310 Prock Lane,Austin,TX
New address being processed: 124 Bolles Circle,Austin,TX: 523
New address being processed: 12800 Chime Drive,Manor,TX: 524
New address being proc

New address being processed: 1906 Willow Creek,Austin,TX: 635
New address being processed: 2901 West Slaughter Lane,Austin,TX: 636
New address being processed: Union Lee Road,Manor,TX: 637
New address being processed: 2200 Palmera,Austin,TX: 638
New address being processed: Murray Lane,Manor,TX: 639
New address being processed: 1163 Lott Avenue,Austin,TX: 640
New address being processed: 4802 Flicker Cove,Austin,TX: 641
New address being processed: 2203 Leah Cv A,Austin,TX: 642
New address being processed: 8105 Linden Road,Del Valle,TX: 643
Address already processed: 8105 Linden Road,Del Valle,TX
New address being processed: 3500 Windsor Road,Austin,TX: 645
New address being processed: Woodward And East Ben White Boulevard,Austin,TX: 646
New address being processed: 6118 Fairway Street,Austin,TX: 647
New address being processed: 8103 Seeling Drive,Austin,TX: 648
New address being processed: 207 Beaver,Austin,TX: 649
New address being processed: Barwood Park,Austin,TX: 650
New address b

In [56]:
print(f"Length of summary dataset: {len(df_summary)}")
#df_summary.head()

# Save the DataFrame as a csv
df_summary.to_csv("raw data/animal_shelter_analysis_summary_clean_LocationData.csv", encoding="utf-8", index=False)

Length of summary dataset: 543


### Create summary dataframes: 2016 to Present and Animals from Homes
#### Create filtered datasets (animals in homes, cats, dogs)

In [58]:
# *** Filter original dataframe into smaller datasets ***

# All intakes not 'Wildlife' or 'Stray' Intake Type: includes "Intake Type" of:
#     - Euthanasia request
#     - Owner surrender
#     - Public assist
df_animals_homes = df_intakes_clean.loc[((df_intakes_clean["Intake Type"] =="Owner Surrender") | 
                                         (df_intakes_clean["Intake Type"] == "Euthanasia Request") |
                                         (df_intakes_clean["Intake Type"] == "Public Assist")),]

# Dogs only
df_animals_dogs = df_intakes_clean.loc[(df_intakes_clean["Animal Type"] =="Dog"),]

# Cats only 
df_animals_cats = df_intakes_clean.loc[(df_intakes_clean["Animal Type"] =="Cat"),]

# Visualize homes dataset
df_animals_homes.head()

Unnamed: 0,DateTime,Found Address,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
33,11/5/2017 9:29,"2901 South Capital Of Texas Highway,Austin,TX",Public Assist,Sick,Dog,Intact Female,4 weeks,Border Collie Mix,Black/White
34,11/5/2017 9:29,"2901 South Capital Of Texas Highway,Austin,TX",Public Assist,Normal,Dog,Intact Male,4 weeks,Border Collie Mix,Black/White
69,11/5/2017 22:34,"Hwy 130 And Hwy 183,Travis,TX",Public Assist,Normal,Dog,Intact Male,7 years,Chihuahua Shorthair Mix,Red/Tan
125,11/7/2017 13:16,"Jacks Pass,Travis,TX",Public Assist,Normal,Dog,Neutered Male,10 years,Maltese Mix,White/White
137,11/7/2017 19:32,"305 E Yager,Austin,TX",Public Assist,Normal,Cat,Spayed Female,3 years,Domestic Shorthair Mix,Tortie


In [59]:
df_animals_dogs.head()

Unnamed: 0,DateTime,Found Address,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,11/4/2017 8:14,"12034 Research Boulevard,Austin,TX",Stray,Injured,Dog,Intact Female,3 years,German Shepherd Mix,White
1,11/4/2017 10:11,"7211 Ritchie,Austin,TX",Stray,Normal,Dog,Spayed Female,6 months,Pit Bull Mix,Blue/White
2,11/4/2017 10:11,"7211 Ritchie,Austin,TX",Stray,Normal,Dog,Spayed Female,1 year,Pit Bull Mix,Brown/White
3,11/4/2017 10:25,"900 Thompson Street,Austin,TX",Stray,Normal,Dog,Spayed Female,2 years,Pit Bull Mix,White
4,11/4/2017 11:11,"26Th Street And Rio Grande,Austin,TX",Stray,Normal,Dog,Intact Male,4 months,Rat Terrier/Dachshund,Black/Tan


In [60]:
# Strays
df_animals_strays = df_intakes_clean.loc[(df_intakes_clean["Intake Type"] =="Stray"),]
print(f"Length of strays dataset: {len(df_animals_strays)}")

Length of strays dataset: 640


#### Create filtered dataframes of unique address lists

In [61]:
# Calculate address counts for master dataset
address_counts_all = df_intakes_clean["Found Address"].value_counts()
df_address_counts_all = pd.Series.to_frame(address_counts_all).reset_index()
df_address_counts_all = df_address_counts_all.rename(columns={'index': 'Address', 'Found Address': 'Count'})
df_address_counts_all.head()

Unnamed: 0,Address,Count
0,"10505 S Ih 35,Austin,TX",9
1,"11906 Bronco Circle,Austin,TX",8
2,"La Paz And Paloma,Del Valle,TX",8
3,"815 Christopher Street,Austin,TX",7
4,"Jollyville Road And Pavillion Boulevard,Austin,TX",6


In [62]:
# Function to create cleaned dataframe each filtered addresses dataset
def convert_address_counts_to_df(address_counts):
    '''
    Function: convert_address_counts_to_df
    Description: Convert address value counts to dataframe
    Arguments: Series
    Returns:  Dataframe
    '''
    df_address_counts = pd.Series.to_frame(address_counts).reset_index()
    df_address_counts = df_address_counts.rename(columns={'index': 'Address', 'Found Address': 'Count'})
    return df_address_counts

# Create pets-in-homes-specific counts dataframe
address_counts_homes = df_animals_homes["Found Address"].value_counts()
df_address_counts_homes = convert_address_counts_to_df(address_counts_homes)

# Create dogs-specific counts dataframe
address_counts_dogs = df_animals_dogs["Found Address"].value_counts()
df_address_counts_dogs = convert_address_counts_to_df(address_counts_dogs)

# Create cats-specific counts dataframe 
address_counts_cats = df_animals_cats["Found Address"].value_counts()
df_address_counts_cats = convert_address_counts_to_df(address_counts_cats)

# Create strays-specific counts dataframe 
address_counts_strays = df_animals_strays["Found Address"].value_counts()
df_address_counts_strays = convert_address_counts_to_df(address_counts_strays)

# Visualize Strays address dataframe
df_address_counts_strays.head()

Unnamed: 0,Address,Count
0,"La Paz And Paloma,Del Valle,TX",8
1,"11906 Bronco Circle,Austin,TX",8
2,"815 Christopher Street,Austin,TX",7
3,"Jollyville Road And Pavillion Boulevard,Austin,TX",6
4,"1016 Camino La Costa,Austin,TX",5


In [63]:
# Visualize pets-in-homes addresses counts
df_address_counts_homes.head()

Unnamed: 0,Address,Count
0,"10505 S Ih 35,Austin,TX",8
1,"2901 South Capital Of Texas Highway,Austin,TX",2
2,"8311 Citation,Travis,TX",2
3,"7109 South Brook Dr,Austin,TX",2
4,"11901 Pavilion,Austin,TX",2


#### Create filtered summary dataframes with only valid Latitude/Longitude values

In [64]:
# All animals
df_animals_summary_all = df_summary.loc[(df_summary["Latitude"] != 0),]

# All animals in homes
df_animals_summary_homes = df_summary.loc[(df_summary["Pet at Home"] == True) &
                                                         (df_summary["Latitude"] != 0),]

# Dogs only
df_animals_summary_dogs = df_summary.loc[(df_summary["Animal Type"] == "Dog") &
                                                         (df_summary["Latitude"] != 0),]

# Cats only 
df_animals_summary_cats = df_summary.loc[(df_summary["Animal Type"] == "Cat") & 
                                                         (df_summary["Latitude"] != 0),]

In [65]:
# Visualize filtered dataframes
print(f"Length of 'All Animals' summary dataframe: {len(df_animals_summary_all)}")
print(f"Length of 'Animals in Homes' summary dataframe: {len(df_animals_summary_homes)}")
df_animals_summary_all.head()

Length of 'All Animals' summary dataframe: 533
Length of 'Animals in Homes' summary dataframe: 25


Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type
0,"12034 Research Boulevard,Austin,TX",30.425374,-97.752882,False,Dog
1,"7211 Ritchie,Austin,TX",30.304242,-97.638796,False,Dog
2,"900 Thompson Street,Austin,TX",30.270169,-97.70847,False,Dog
3,"26Th Street And Rio Grande,Austin,TX",30.290664,-97.744405,False,Dog
4,"Wally Ave,Austin,TX",30.281282,-97.684354,False,Dog


In [66]:
df_address_counts_cats.head()

Unnamed: 0,Address,Count
0,"815 Christopher Street,Austin,TX",7
1,"16120 Jacobson Road,Del Valle,TX",5
2,"7337 Manchaca Road,Austin,TX",5
3,"1016 Camino La Costa,Austin,TX",5
4,"1209 East 52Nd Street,Austin,TX",5


#### Merge intake address counts into summary dataframes

In [67]:
# Merge address count into master summary dataset
df_summary_all = pd.merge(df_animals_summary_all, df_address_counts_all, on="Address")

In [68]:
# Export and visualize summary of all address counts
df_summary_all.to_csv('raw data/animal_shelter_analysis_address_counts_ALL.csv', encoding='latin-1', index=False)
df_summary_all.head()

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Count
0,"12034 Research Boulevard,Austin,TX",30.425374,-97.752882,False,Dog,1
1,"7211 Ritchie,Austin,TX",30.304242,-97.638796,False,Dog,2
2,"900 Thompson Street,Austin,TX",30.270169,-97.70847,False,Dog,1
3,"26Th Street And Rio Grande,Austin,TX",30.290664,-97.744405,False,Dog,1
4,"Wally Ave,Austin,TX",30.281282,-97.684354,False,Dog,1


In [69]:
# Merge address count into pets-in-homes dataset
df_summary_homes = pd.merge(df_animals_summary_homes, df_address_counts_homes, on="Address")

In [70]:
# Export and visualize summary of all address counts
df_summary_homes.to_csv('raw data/animal_shelter_analysis_address_counts_HOMES.csv', encoding='latin-1', index=False)
df_summary_homes.head()

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Count
0,"2901 South Capital Of Texas Highway,Austin,TX",30.257339,-97.807188,True,Dog,2
1,"Hwy 130 And Hwy 183,Travis,TX",30.05035,-97.691024,True,Dog,1
2,"Jacks Pass,Travis,TX",30.374948,-97.919534,True,Dog,1
3,"305 E Yager,Austin,TX",30.394662,-97.667902,True,Cat,1
4,"2000 S Ih 35,Austin,TX",30.237388,-97.739782,True,Dog,1


In [71]:
# Merge address count into dogs & cats dataset
df_summary_dogs = pd.merge(df_animals_summary_dogs, df_address_counts_dogs, on="Address")
df_summary_cats = pd.merge(df_animals_summary_cats, df_address_counts_cats, on="Address")

In [72]:
# Export and visualize summary of Dogs address counts
df_summary_dogs.to_csv('raw data/animal_shelter_analysis_address_counts_DOGS.csv', encoding='latin-1', index=False)
df_summary_dogs.head()

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Count
0,"12034 Research Boulevard,Austin,TX",30.425374,-97.752882,False,Dog,1
1,"7211 Ritchie,Austin,TX",30.304242,-97.638796,False,Dog,2
2,"900 Thompson Street,Austin,TX",30.270169,-97.70847,False,Dog,1
3,"26Th Street And Rio Grande,Austin,TX",30.290664,-97.744405,False,Dog,1
4,"Wally Ave,Austin,TX",30.281282,-97.684354,False,Dog,1


In [73]:
# Export and visualize summary of Cats address counts
df_summary_cats.to_csv('raw data/animal_shelter_analysis_address_counts_CATS.csv', encoding='latin-1', index=False)
df_summary_cats.head()

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Count
0,"5309 Spring Meadow Rd,Austin,TX",30.19616,-97.735726,False,Cat,1
1,"1016 Camino La Costa,Austin,TX",30.326758,-97.701259,False,Cat,5
2,"11904 Garden Gate,Austin,TX",30.225535,-97.627627,False,Cat,1
3,"2301 Quicksilver,Austin,TX",30.176523,-97.766594,False,Cat,4
4,"443 Mesa Drive,Del Valle,TX",30.113526,-97.580517,False,Cat,3


In [74]:
# Merge address count into strays dataset
df_summary_strays = pd.merge(df_summary, df_address_counts_strays, on="Address")

In [75]:
# Export and visualize summary of Dogs address counts
df_summary_strays.to_csv('raw data/animal_shelter_analysis_address_counts_STRAYS.csv', encoding='latin-1', index=False)
df_summary_strays.head()

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Count
0,"12034 Research Boulevard,Austin,TX",30.425374,-97.752882,False,Dog,1
1,"7211 Ritchie,Austin,TX",30.304242,-97.638796,False,Dog,2
2,"900 Thompson Street,Austin,TX",30.270169,-97.70847,False,Dog,1
3,"26Th Street And Rio Grande,Austin,TX",30.290664,-97.744405,False,Dog,1
4,"Wally Ave,Austin,TX",30.281282,-97.684354,False,Dog,1


### Dataframes for plotting number of veterinarians vs. number of pet intakes

In [76]:
# Add necessary column to plotting datasets
df_animals_summary_all["Vet Count"] = ""
df_animals_summary_homes["Vet Count"] = ""
df_animals_summary_dogs["Vet Count"] = ""
df_animals_summary_cats["Vet Count"] = ""

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the cavea

#### Loop through dataset and assign Vet count values by calling Google Radarsearch API

In [77]:
df_animals_summary_all.head()

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Vet Count
0,"12034 Research Boulevard,Austin,TX",30.425374,-97.752882,False,Dog,
1,"7211 Ritchie,Austin,TX",30.304242,-97.638796,False,Dog,
2,"900 Thompson Street,Austin,TX",30.270169,-97.70847,False,Dog,
3,"26Th Street And Rio Grande,Austin,TX",30.290664,-97.744405,False,Dog,
4,"Wally Ave,Austin,TX",30.281282,-97.684354,False,Dog,


In [78]:
# Counter
row_count = 0

# Loop through and run Google search to get all banks in 5 mile radius (8000 meters)
for index, row in df_animals_summary_all.iterrows():
    
    # Create endpoint url using Google Places Radar and the lat/lng we identified earlier
    #  - Radius search of roughly 1 mile
    #  - Places type "veterinary_care" only
    target_url =f"https://maps.googleapis.com/maps/api/place/radarsearch/json?location={row['Latitude']},{row['Longitude']}&radius=1700&type=veterinary_care&key={gkey}"

    # This link helps to handily see the JSON generated for each query
    print(f"Now retrieving address #{row_count}: {df_animals_summary_all.loc[index]['Address']}")
    row_count += 1 
    
    # Run request to retrieve JSON from target URL (only if it hasn't been set yet)
    if df_animals_summary_all.loc[index]['Vet Count'] == 0 or df_animals_summary_all.loc[index]['Vet Count'] == "":
        vet_data = requests.get(target_url).json()
        
        # Retrieve vet count via number of results within the radius (2500 meters)
        vet_count = len(vet_data["results"])  
        print(f"Final Vet Count for address '{row['Address']}': {str(vet_count)}")
        print("")    
    
        # Store the vet count into the Data Frame
        df_animals_summary_all.set_value(index, "Vet Count", vet_count)
    else:
        print(f"Vet Count already set for this address: {row['Address']}")
        
    # Reset vet_count, so a previous record cannot influence a later one
    vet_count = 0

# Visualize the new dataset
df_animals_summary_all.head()

Now retrieving address #0: 12034 Research Boulevard,Austin,TX
Final Vet Count for address '12034 Research Boulevard,Austin,TX': 14

Now retrieving address #1: 7211 Ritchie,Austin,TX
Final Vet Count for address '7211 Ritchie,Austin,TX': 0

Now retrieving address #2: 900 Thompson Street,Austin,TX
Final Vet Count for address '900 Thompson Street,Austin,TX': 0

Now retrieving address #3: 26Th Street And Rio Grande,Austin,TX
Final Vet Count for address '26Th Street And Rio Grande,Austin,TX': 4

Now retrieving address #4: Wally Ave,Austin,TX
Final Vet Count for address 'Wally Ave,Austin,TX': 0

Now retrieving address #5: 7806 South 1St Street,Austin,TX
Final Vet Count for address '7806 South 1St Street,Austin,TX': 1

Now retrieving address #6: 5309 Spring Meadow Rd,Austin,TX
Final Vet Count for address '5309 Spring Meadow Rd,Austin,TX': 1

Now retrieving address #7: 4602 East Stassney Lane,Austin,TX
Final Vet Count for address '4602 East Stassney Lane,Austin,TX': 1

Now retrieving address #8

Final Vet Count for address '611 Carpenter Avenue,Austin,TX': 1

Now retrieving address #67: Boyce Lane And Kelton,Austin,TX
Final Vet Count for address 'Boyce Lane And Kelton,Austin,TX': 0

Now retrieving address #68: 1631 E 2Nd St,Austin,TX
Final Vet Count for address '1631 E 2Nd St,Austin,TX': 2

Now retrieving address #69: Homestead Village Circle And Avery Ranch,Austin,TX
Final Vet Count for address 'Homestead Village Circle And Avery Ranch,Austin,TX': 1

Now retrieving address #70: East William Cannon Drive And Mckinney Falls Parkway,Austin,TX
Final Vet Count for address 'East William Cannon Drive And Mckinney Falls Parkway,Austin,TX': 0

Now retrieving address #71: 900 Barton Springs Rd,Austin,TX
Final Vet Count for address '900 Barton Springs Rd,Austin,TX': 2

Now retrieving address #72: Caldwell Lane And Albert Brown Drive,Del Valle,TX
Final Vet Count for address 'Caldwell Lane And Albert Brown Drive,Del Valle,TX': 0

Now retrieving address #73: 1515 Wickersham Lane,Austin,TX


Final Vet Count for address 'Willow Street And Perdanales Street,Austin,TX': 1

Now retrieving address #130: 1720 Bunche Road,Austin,TX
Final Vet Count for address '1720 Bunche Road,Austin,TX': 0

Now retrieving address #131: Airport Blvd At Oak Springs,Austin,TX
Final Vet Count for address 'Airport Blvd At Oak Springs,Austin,TX': 0

Now retrieving address #132: 13412 Guarnere,Austin,TX
Final Vet Count for address '13412 Guarnere,Austin,TX': 0

Now retrieving address #133: 51St Street And Berkman,Austin,TX
Final Vet Count for address '51St Street And Berkman,Austin,TX': 4

Now retrieving address #134: 14420 Bois D Arc,Manor,TX
Final Vet Count for address '14420 Bois D Arc,Manor,TX': 0

Now retrieving address #135: 5319 Presidio Road,Austin,TX
Final Vet Count for address '5319 Presidio Road,Austin,TX': 5

Now retrieving address #136: Atterbury Lane And Shropshire,Austin,TX
Final Vet Count for address 'Atterbury Lane And Shropshire,Austin,TX': 2

Now retrieving address #137: 13512 Fm 812

Final Vet Count for address 'Metric And Cedar Bend Drive,Austin,TX': 17

Now retrieving address #194: North Lamar And Rundberg,Austin,TX
Final Vet Count for address 'North Lamar And Rundberg,Austin,TX': 0

Now retrieving address #195: Escarpment And Convict Hill,Austin,TX
Final Vet Count for address 'Escarpment And Convict Hill,Austin,TX': 4

Now retrieving address #196: 11005 Little Thicket Road,Austin,TX
Final Vet Count for address '11005 Little Thicket Road,Austin,TX': 0

Now retrieving address #197: 7000 Comanche Trail,Austin,TX
Final Vet Count for address '7000 Comanche Trail,Austin,TX': 0

Now retrieving address #198: Chinook Drive And Pinto Path,Austin,TX
Final Vet Count for address 'Chinook Drive And Pinto Path,Austin,TX': 5

Now retrieving address #199: 183 And Montopolis,Austin,TX
Final Vet Count for address '183 And Montopolis,Austin,TX': 1

Now retrieving address #200: 10505 S Ih 35,Austin,TX
Final Vet Count for address '10505 S Ih 35,Austin,TX': 1

Now retrieving address #

Final Vet Count for address '6711 Johnny Morris Rd,Austin,TX': 0

Now retrieving address #260: 7337 Manchaca Road,Austin,TX
Final Vet Count for address '7337 Manchaca Road,Austin,TX': 1

Now retrieving address #261: 603 Davis,Austin,TX
Final Vet Count for address '603 Davis,Austin,TX': 2

Now retrieving address #262: 12207 Timber Arch,Manor,TX
Final Vet Count for address '12207 Timber Arch,Manor,TX': 0

Now retrieving address #263: 11611 Raymond C Ewry Lane,Austin,TX
Final Vet Count for address '11611 Raymond C Ewry Lane,Austin,TX': 3

Now retrieving address #264: 3313 East 12Th Street,Austin,TX
Final Vet Count for address '3313 East 12Th Street,Austin,TX': 0

Now retrieving address #265: 710 E Ben White Blvd,Austin,TX
Final Vet Count for address '710 E Ben White Blvd,Austin,TX': 3

Now retrieving address #266: 6801 River Place Blvd,Austin,TX
Final Vet Count for address '6801 River Place Blvd,Austin,TX': 1

Now retrieving address #267: 10081 Old Manchaca Road,Austin,TX
Final Vet Count 

Final Vet Count for address '3218 Park Hills Drive,Austin,TX': 7

Now retrieving address #324: Rutland And Mountain Quail,Austin,TX
Final Vet Count for address 'Rutland And Mountain Quail,Austin,TX': 1

Now retrieving address #325: 2511 Glenn Springs,Austin,TX
Final Vet Count for address '2511 Glenn Springs,Austin,TX': 2

Now retrieving address #326: William Cannon And Salt Springs,Austin,TX
Final Vet Count for address 'William Cannon And Salt Springs,Austin,TX': 0

Now retrieving address #327: Parker Lane And Woodland Avenue,Austin,TX
Final Vet Count for address 'Parker Lane And Woodland Avenue,Austin,TX': 2

Now retrieving address #328: 135 And Stassney,Austin,TX
Final Vet Count for address '135 And Stassney,Austin,TX': 6

Now retrieving address #329: Oak Knoll And Jollyville,Austin,TX
Final Vet Count for address 'Oak Knoll And Jollyville,Austin,TX': 9

Now retrieving address #330: 8228 Research Blvd,Austin,TX
Final Vet Count for address '8228 Research Blvd,Austin,TX': 2

Now retriev

Final Vet Count for address '841 Airport Boulevard,Austin,TX': 1

Now retrieving address #389: 1088 Park Plz,Austin,TX
Final Vet Count for address '1088 Park Plz,Austin,TX': 1

Now retrieving address #390: Fm 620 And Anderson Mill Road,Austin,TX
Final Vet Count for address 'Fm 620 And Anderson Mill Road,Austin,TX': 3

Now retrieving address #391: Highway 183 And Bolm Road,Austin,TX
Final Vet Count for address 'Highway 183 And Bolm Road,Austin,TX': 1

Now retrieving address #392: 3431 West William Cannon Drive,Austin,TX
Final Vet Count for address '3431 West William Cannon Drive,Austin,TX': 11

Now retrieving address #393: 4413 Quicksilver,Austin,TX
Final Vet Count for address '4413 Quicksilver,Austin,TX': 0

Now retrieving address #394: 5004 Creek Bend,Austin,TX
Final Vet Count for address '5004 Creek Bend,Austin,TX': 1

Now retrieving address #395: 1704 Nelms,Austin,TX
Final Vet Count for address '1704 Nelms,Austin,TX': 1

Now retrieving address #396: 7201 Levander Loop,Austin,TX
Fina

Final Vet Count for address '8900 Mesa Dr,Austin,TX': 3

Now retrieving address #453: Slaughter Ln And Westgate,Austin,TX
Final Vet Count for address 'Slaughter Ln And Westgate,Austin,TX': 3

Now retrieving address #454: 1625 And 183,Austin,TX
Final Vet Count for address '1625 And 183,Austin,TX': 0

Now retrieving address #455: Coastal Dr And Alexandria,Austin,TX
Final Vet Count for address 'Coastal Dr And Alexandria,Austin,TX': 5

Now retrieving address #456: 701 Center Ridge Drive,Austin,TX
Final Vet Count for address '701 Center Ridge Drive,Austin,TX': 3

Now retrieving address #457: 3901 East Stassney Lane,Austin,TX
Final Vet Count for address '3901 East Stassney Lane,Austin,TX': 1

Now retrieving address #458: 8028 El Roy Road,Del Valle,TX
Final Vet Count for address '8028 El Roy Road,Del Valle,TX': 0

Now retrieving address #459: 529 West Oltorf Street,Austin,TX
Final Vet Count for address '529 West Oltorf Street,Austin,TX': 12

Now retrieving address #460: 7104 Berkman Drive,Aus

Final Vet Count for address '8515 S Ih 35,Austin,TX': 2

Now retrieving address #519: 220 Foremost,Austin,TX
Final Vet Count for address '220 Foremost,Austin,TX': 1

Now retrieving address #520: 10812 N Ih 35,Austin,TX
Final Vet Count for address '10812 N Ih 35,Austin,TX': 1

Now retrieving address #521: East St Johns Avenue And Meador Avenue,Austin,TX
Final Vet Count for address 'East St Johns Avenue And Meador Avenue,Austin,TX': 5

Now retrieving address #522: 2125 Boyds Way,Austin,TX
Final Vet Count for address '2125 Boyds Way,Austin,TX': 0

Now retrieving address #523: 5909 Reicher Dr,Austin,TX
Final Vet Count for address '5909 Reicher Dr,Austin,TX': 1

Now retrieving address #524: 120 Mist Flower,Travis,TX
Final Vet Count for address '120 Mist Flower,Travis,TX': 2

Now retrieving address #525: 20670 Cameron,Travis,TX
Final Vet Count for address '20670 Cameron,Travis,TX': 0

Now retrieving address #526: 2007 Nightview Dr,Austin,TX
Final Vet Count for address '2007 Nightview Dr,Aust

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Vet Count
0,"12034 Research Boulevard,Austin,TX",30.425374,-97.752882,False,Dog,14
1,"7211 Ritchie,Austin,TX",30.304242,-97.638796,False,Dog,0
2,"900 Thompson Street,Austin,TX",30.270169,-97.70847,False,Dog,0
3,"26Th Street And Rio Grande,Austin,TX",30.290664,-97.744405,False,Dog,4
4,"Wally Ave,Austin,TX",30.281282,-97.684354,False,Dog,0


In [79]:
# Save the dataframe to CSV
df_animals_summary_all.to_csv('raw data/animal_shelter_analysis_with_Vet_data.csv', encoding='latin-1', index=False)

#### Add number of intake addresses within each vet's lat/long combination to dataframe

In [80]:
import math

In [81]:
# Add new tracking column for number of intakes within radius
df_animals_summary_all["Intakes within Radius"] = ""

# Function to calculate if a given lat/long point is contained in the Google place's kilometer radius
def is_location_within_1700meters(check_point_lat, check_point_long, center_point_lat, center_point_long, radius_km):
    '''
    Function: is_location_within_1700meters
    Purpose:  Given lat long values for center point and check point, figure out whether or not check points are within
                X kilometers (in our case, 1.7)
    '''
    
    km_lat = 40000 / 360
    km_lng = math.cos(math.pi * center_point_lat/180) * km_lat
    dst_x = math.fabs(center_point_long - check_point_long) * km_lng
    dst_y = math.fabs(center_point_lat - check_point_lat) * km_lat
    
    return math.sqrt(dst_x * dst_x + dst_y * dst_y) <= radius_km;

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [82]:
# Radius should be 1.7km, to match 1700 meter Google Radarsearch lookup radius
radius = 1.7 

# Loop through dataset and assign how many intake locations are within given lat/long combination
row_count = 0
for index, row in df_animals_summary_all.iterrows():
    # This link helps to handily see the JSON generated for each query
    print(f"Now retrieving address #{row_count}: {df_animals_summary_all.loc[index]['Address']}")
    row_count += 1 

    # Reset inner loop count / boolean variables
    is_found = False
    num_found = 0
    
    # Loop through dataframe again, and determine how many lat/long combinations are within the current lat/long's 1700 meter radius
    for i_inner, r_inner in df_animals_summary_all.iterrows():    
        is_found = is_location_within_1700meters(r_inner["Latitude"], r_inner["Longitude"], row["Latitude"], row["Longitude"], radius)
        if is_found == True:
            num_found += 1
            
    # Set the number of found intakes for this center point 
    df_animals_summary_all.set_value(index, "Intakes within Radius", num_found)

Now retrieving address #0: 12034 Research Boulevard,Austin,TX
Now retrieving address #1: 7211 Ritchie,Austin,TX
Now retrieving address #2: 900 Thompson Street,Austin,TX
Now retrieving address #3: 26Th Street And Rio Grande,Austin,TX
Now retrieving address #4: Wally Ave,Austin,TX
Now retrieving address #5: 7806 South 1St Street,Austin,TX
Now retrieving address #6: 5309 Spring Meadow Rd,Austin,TX
Now retrieving address #7: 4602 East Stassney Lane,Austin,TX
Now retrieving address #8: 1016 Camino La Costa,Austin,TX
Now retrieving address #9: 12362 Ballerstedt Road,Travis,TX
Now retrieving address #10: 12Th Street And Chicon,Austin,TX
Now retrieving address #11: 11904 Garden Gate,Austin,TX
Now retrieving address #12: East Cesar Chavez And Willow Street,Austin,TX
Now retrieving address #13: 12221 North Mopac Express Way,Austin,TX
Now retrieving address #14: 1104 Tillery,Austin,TX
Now retrieving address #15: Laurelleaf Drive And Black Willow Street,Pflugerville,TX
Now retrieving address #16: 

Now retrieving address #135: 5319 Presidio Road,Austin,TX
Now retrieving address #136: Atterbury Lane And Shropshire,Austin,TX
Now retrieving address #137: 13512 Fm 812,Austin,TX
Now retrieving address #138: Riverside Drive And South Congress Avenue,Austin,TX
Now retrieving address #139: Arboretum Boulevard,Austin,TX
Now retrieving address #140: 400 Las Lomas Drive,West Lake Hills,TX
Now retrieving address #141: Berkman Drive And Briarcliff Drive,Austin,TX
Now retrieving address #142: 511 East 46Th Street,Austin,TX
Now retrieving address #143: West Anderson Lane And Georgian Drive,Austin,TX
Now retrieving address #144: 8027 Birmingham Drive,Austin,TX
Now retrieving address #145: 1617 North Interstate 35 Frontage Road,Austin,TX
Now retrieving address #146: 7109 South Brook Dr,Austin,TX
Now retrieving address #147: 1209 East 52Nd Street,Austin,TX
Now retrieving address #148: Stassney And South 1St,Austin,TX
Now retrieving address #149: 10560 Bilbrook Place,Austin,TX
Now retrieving addres

Now retrieving address #272: Wichersham And Oltorf Street,Austin,TX
Now retrieving address #273: 1902 W 37Th Street,Austin,TX
Now retrieving address #274: 11000 Block Of Research Blvd,Austin,TX
Now retrieving address #275: Wedgewood Dr & Braker Lane,Austin,TX
Now retrieving address #276: 1101 Wheatley Avenue,Austin,TX
Now retrieving address #277: East Ben White And Burleson Road,Austin,TX
Now retrieving address #278: 2417 W Ben White,Austin,TX
Now retrieving address #279: 1709 East 38Th,Austin,TX
Now retrieving address #280: 5300 Apple Orchard Lane,Austin,TX
Now retrieving address #281: 8004 Seminary Ridge,Austin,TX
Now retrieving address #282: 8311 Citation,Travis,TX
Now retrieving address #283: 12113 Stoney Meadow Drive,Del Valle,TX
Now retrieving address #284: 1601 South Mopac Express Way,Austin,TX
Now retrieving address #285: 303 East Pheasant Drive,Austin,TX
Now retrieving address #286: 2300 Cesar Chavez,Austin,TX
Now retrieving address #287: 404 Ledgeway,Travis,TX
Now retrieving 

Now retrieving address #407: 6211 Manor Rd #120,Austin,TX
Now retrieving address #408: Seldaia Trail,Travis,TX
Now retrieving address #409: 311 W William Cannon,Austin,TX
Now retrieving address #410: South Congress Avenue And East Ben White Blvd,Austin,TX
Now retrieving address #411: Perez Street And East 16Th,Austin,TX
Now retrieving address #412: Howard Lane And Ih 35,Austin,TX
Now retrieving address #413: 11923 Highway 290,Manor,TX
Now retrieving address #414: Rosewood Avenue And Chicon Street,Austin,TX
Now retrieving address #415: 5952 Hammerhill Run,Austin,TX
Now retrieving address #416: Old Bee Caves Road And Highway 71,Austin,TX
Now retrieving address #417: 13904 Joyce Lane,Travis,TX
Now retrieving address #418: 10705 Lambert Cir,Austin,TX
Now retrieving address #419: 15000 Parrish Lane,Austin,TX
Now retrieving address #420: 6005 Pino Lane,Austin,TX
Now retrieving address #421: Fm 969 And Johnny Morrison Road,Austin,TX
Now retrieving address #422: 1414 W Ben White Blvd,Austin,TX

In [83]:
# Save the dataframe to CSV, and visualize it
df_animals_summary_all.to_csv('raw data/animal_shelter_analysis_with_VetAndRadius_data.csv', encoding='latin-1', index=False)
df_animals_summary_all.head()

Unnamed: 0,Address,Latitude,Longitude,Pet at Home,Animal Type,Vet Count,Intakes within Radius
0,"12034 Research Boulevard,Austin,TX",30.425374,-97.752882,False,Dog,14,5
1,"7211 Ritchie,Austin,TX",30.304242,-97.638796,False,Dog,0,6
2,"900 Thompson Street,Austin,TX",30.270169,-97.70847,False,Dog,0,20
3,"26Th Street And Rio Grande,Austin,TX",30.290664,-97.744405,False,Dog,4,7
4,"Wally Ave,Austin,TX",30.281282,-97.684354,False,Dog,0,13


## Other Factor Cleanup

In [84]:
austin = "raw data/MergedData.csv"
#austin_df = pd.read_csv(austin)
austin_df = pd.read_csv(austin, encoding = 'latin-1')
#austin_df.head()

In [85]:
dog_df = austin_df.loc[(austin_df['Animal Type_intake'] == "Dog"), : ]
dog_df = dog_df.rename(columns={"Outcome Type" : "OutcomeType"})
#df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'}, inplace=True)
#dog_df

In [86]:
#create a dataframe for popular dog breeds
dog_df["Breed_intake"].value_counts()
breed_df = dog_df.loc[(dog_df['Breed_intake'] == ("Pit Bull Mix", "Labrador Retriever Mix", "German Shepherd Mix", "Chihuahua Shorthair Mix")), : ]
breed_df = dog_df[dog_df["Breed_intake"].isin(["Pit Bull Mix", "German Shepherd Mix", "Labrador Retriever Mix", "Chihuahua Shorthair Mix"])]
breed_df["Breed_intake"].value_counts()
breed_df["Breed_intake"] = breed_df['Breed_intake'].replace({'Pit Bull Mix': 'Pit Bull Terrier', 'Labrador Retriever Mix': 'Labrador Retriever', 'German Shepherd Mix': 'German Shepherd', 'Chihuahua Shorthair Mix' : 'Chihuahua'})
breed_df = breed_df.rename(columns={"Breed_intake" : "Dog Breed"})
#breed_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [87]:
years = round(dog_df["AgeDays_outcome"] / 365,2)
dog_df = dog_df.assign(AgeYears= years)
age_bins = [0,1,3,5,7,9, 25]
age_labels = ["<1", "1-3", "3-5", "5-7", "7-9", ">9"]
dog_df["Age Range"] = pd.cut(dog_df["AgeYears"], age_bins, labels=age_labels)
dog_df.head()

#Make Bins for Age
#hp_bins = [180, 200, 350, 400]
#hp_labels = ["Slow", "Decent", "Fast"]

Unnamed: 0,Animal ID,Name_intake,DateTime_intake,Found Location,Intake Type,Intake Condition,Animal Type_intake,Sex upon Intake,Age upon Intake,Breed_intake,...,Male_outcome,SpayNeuter_outcome,Purebred_outcome,NumAge_outcome,AgeUnits_outcome,AgeDays_outcome,Intake Outcome Days,IODays,AgeYears,Age Range
0,A748291,*Madison,2017-05-01 14:26:00,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,...,0.0,Yes,0,1.0,year,365.0,125 days 09:34:00.000000000,125.0,1.0,<1
1,A750529,,2017-05-28 13:22:00,8312 North Ih 35 in Austin (TX),Stray,Normal,Dog,Intact Female,5 months,Miniature Schnauzer Mix,...,0.0,Yes,0,5.0,month,150.0,4 days 03:20:00.000000000,4.0,0.41,<1
3,A748238,,2017-05-01 10:53:00,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,...,1.0,Yes,0,3.0,year,1095.0,5 days 05:04:00.000000000,5.0,3.0,1-3
4,A683644,*Zoey,2014-07-13 11:02:00,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,...,0.0,Yes,0,4.0,month,120.0,115 days 23:04:00.000000000,115.0,0.33,<1
5,A676515,Rico,2014-04-11 08:45:00,615 E. Wonsley in Austin (TX),Stray,Normal,Dog,Intact Male,2 months,Pit Bull Mix,...,1.0,Yes,0,3.0,month,90.0,3 days 09:53:00.000000000,3.0,0.25,<1


In [88]:
#Totals Age
total = dog_df.groupby("Age Range")
total_counts = total.OutcomeType.agg(["count"])
total_counts = total_counts.rename(columns={"count" : "Adoptions"})
total_list = total_counts["Adoptions"]

In [89]:
#Euthanizations Age
dog_euth_df = dog_df.loc[(dog_df['OutcomeType'] == "Euthanasia"), : ]
euth = dog_euth_df.groupby("Age Range")
euths = euth.OutcomeType.agg(["count"])
euths = euths.rename(columns={"count" : "Euthanizations"})
euths_list = euths['Euthanizations']

In [90]:
#adopt Age and DF
dog_adopt_df = dog_df.loc[(dog_df['OutcomeType'] == "Adoption"), : ]
group = dog_adopt_df.groupby("Age Range")
counts = group.OutcomeType.agg(["count"])
counts = counts.rename(columns={"count" : "Adoptions"})
counts["Euthanizations"] = euths_list
counts["Total Dogs"] = total_list
counts["% of Animals Adopted"] = round(counts["Adoptions"] / counts["Total Dogs"] * 100,0)
counts["% of Animals Euthenized"] = round(counts["Euthanizations"] / counts["Total Dogs"] * 100,0)
counts_austin = counts
counts_austin.to_csv("raw data/Austin_Counts.csv")
counts_austin

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Dogs,% of Animals Adopted,% of Animals Euthenized
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-3,3119,394,7954,39.0,5.0
3-5,953,155,3003,32.0,5.0
5-7,495,92,1844,27.0,5.0
7-9,330,92,1290,26.0,7.0
<1,8642,309,15697,55.0,2.0
>9,291,217,1844,16.0,12.0


In [91]:
#Totals Breed
total_breed = breed_df.groupby("Dog Breed")
breed_count = total_breed.OutcomeType.agg(["count"])
breed_count = breed_count.rename(columns={"count" : "Total Dogs"})
breed_total= breed_count["Total Dogs"]

In [92]:
#Euthanizations breed
breed_euth_df = breed_df.loc[(breed_df['OutcomeType'] == "Euthanasia"), : ]
breed_euth_df = breed_euth_df.groupby("Dog Breed")
breed_euths = breed_euth_df.OutcomeType.agg(["count"])
breed_euths = breed_euths.rename(columns={"count" : "Euthanizations"})
breed_euths_list = breed_euths['Euthanizations']

In [93]:
breed_adopt_df = breed_df.loc[(breed_df['OutcomeType'] == "Adoption"), : ]
breed_adopt_df = breed_adopt_df.groupby("Dog Breed")
breed_adopts = breed_adopt_df.OutcomeType.agg(["count"])
breed_adopts = breed_adopts.rename(columns={"count" : "Adoptions"})
breed_adopts["Euthanizations"] = breed_euths_list
breed_adopts["Total Dogs"] = breed_total
breed_adopts["% of Animals Adopted"] = round(breed_adopts["Adoptions"] / breed_adopts["Total Dogs"] * 100,0)
breed_adopts["% of Animals Euthenized"] = round(breed_adopts["Euthanizations"] / breed_adopts["Total Dogs"] * 100,0)
breed_adopts_austin = breed_adopts
breed_adopts_austin.to_csv("raw data/Austin_Counts_Breed.csv")
breed_adopts_austin

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Dogs,% of Animals Adopted,% of Animals Euthenized
Dog Breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,1718,146,3788,45.0,4.0
German Shepherd,596,44,1335,45.0,3.0
Labrador Retriever,1580,104,3330,47.0,3.0
Pit Bull Terrier,1299,344,3793,34.0,9.0


In [94]:
# Cat Analysis
cat_df = austin_df.loc[(austin_df['Animal Type_intake'] == "Cat"), : ]
cat_df = cat_df.rename(columns={"Outcome Type" : "OutcomeType"})
#df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'}, inplace=True)
#cat_df

In [95]:
#Binning for Cats
years = round(cat_df["AgeDays_outcome"] / 365,2)
cat_df = cat_df.assign(AgeYears= years)
age_bins = [0,1,3,5,7,9, 25]
age_labels = ["<1", "1-3", "3-5", "5-7", "7-9", ">9"]
cat_df["Age Range"] = pd.cut(cat_df["AgeYears"], age_bins, labels=age_labels)
cat_df.head()

Unnamed: 0,Animal ID,Name_intake,DateTime_intake,Found Location,Intake Type,Intake Condition,Animal Type_intake,Sex upon Intake,Age upon Intake,Breed_intake,...,Male_outcome,SpayNeuter_outcome,Purebred_outcome,NumAge_outcome,AgeUnits_outcome,AgeDays_outcome,Intake Outcome Days,IODays,AgeYears,Age Range
2,A730601,,2016-07-07 12:11:00,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,...,1.0,Yes,0,7.0,month,210.0,0 days 20:49:00.000000000,0.0,0.58,<1
6,A679549,*Gilbert,2014-05-22 15:43:00,124 W Anderson in Austin (TX),Stray,Normal,Cat,Intact Male,1 month,Domestic Shorthair Mix,...,1.0,Yes,0,2.0,month,60.0,24 days 22:11:00.000000000,24.0,0.16,<1
7,A683656,,2014-07-13 13:20:00,8238 Research Blvd in Austin (TX),Stray,Normal,Cat,Intact Male,2 months,Snowshoe Mix,...,1.0,Yes,0,3.0,month,90.0,4 days 03:37:00.000000000,4.0,0.25,<1
8,A709749,*Janeane,2015-08-12 18:29:00,4800 Weletka Dr in Austin (TX),Stray,Normal,Cat,Intact Female,1 year,Domestic Shorthair Mix,...,0.0,Yes,0,1.0,year,365.0,40 days 18:20:00.000000000,40.0,1.0,<1
10,A733551,*Phillip,2016-08-23 14:35:00,183 And Cameron in Austin (TX),Stray,Normal,Cat,Intact Male,1 month,Domestic Shorthair Mix,...,1.0,No,0,2.0,month,60.0,8 days 09:25:00.000000000,8.0,0.16,<1


In [96]:
#Cat DataFrame Manipulation (Total Cats)
total_cat = cat_df.groupby("Age Range")
total_counts_cat = total_cat.OutcomeType.agg(["count"])
total_counts_cat = total_counts_cat.rename(columns={"count" : "Adoptions"})
total_list_cat = total_counts_cat["Adoptions"]
total_counts_cat

Unnamed: 0_level_0,Adoptions
Age Range,Unnamed: 1_level_1
1-3,2903
3-5,906
5-7,554
7-9,447
<1,20176
>9,759


In [97]:
#Euthanizations Cats
cat_euth_df = cat_df.loc[(cat_df['OutcomeType'] == "Euthanasia"), : ]
euth_cat = cat_euth_df.groupby("Age Range")
euths_cat = euth_cat.OutcomeType.agg(["count"])
euths_cat = euths_cat.rename(columns={"count" : "Euthanizations"})
euths_list_cat = euths_cat['Euthanizations']
euths_cat

Unnamed: 0_level_0,Euthanizations
Age Range,Unnamed: 1_level_1
1-3,242
3-5,110
5-7,43
7-9,63
<1,750
>9,177


In [98]:
#Adoptions and DataFrame
cat_adopt_df = cat_df.loc[(cat_df['OutcomeType'] == "Adoption"), : ]
group_cat = cat_adopt_df.groupby("Age Range")
counts_cat = group_cat.OutcomeType.agg(["count"])
counts_cat = counts_cat.rename(columns={"count" : "Adoptions"})
counts_cat["Euthanizations"] = euths_list_cat
counts_cat["Total Cats"] = total_list_cat
counts_cat["% of Cats Adopted"] = round(counts_cat["Adoptions"] / counts_cat["Total Cats"] * 100,0)
counts_cat["% of Cats Euthenized"] = round(counts_cat["Euthanizations"] / counts_cat["Total Cats"] * 100,0)
counts_austin_cat = counts_cat
counts_austin_cat.to_csv("raw data/Austin_Counts_Cat.csv")
counts_austin_cat

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Cats,% of Cats Adopted,% of Cats Euthenized
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-3,715,242,2903,25.0,8.0
3-5,268,110,906,30.0,12.0
5-7,203,43,554,37.0,8.0
7-9,192,63,447,43.0,14.0
<1,8903,750,20176,44.0,4.0
>9,227,177,759,30.0,23.0


In [99]:
#Select 4 Popular Cat Colors
cat_df["Color_intake"].value_counts()
color_df = cat_df[cat_df["Color_intake"].isin(["Brown Tabby", "Black", "Calico", "Orange Tabby"])]
color_df["Color_intake"].value_counts()
#color_df["Breed_intake"] = color_df['Breed_intake'].replace({'Pit Bull Mix': 'Pit Bull Terrier', 'Labrador Retriever Mix': 'Labrador Retriever', 'German Shepherd Mix': 'German Shepherd', 'Chihuahua Shorthair Mix' : 'Chihuahua'})
color_df = color_df.rename(columns={"Color_intake" : "Cat Color"})
#color_df

In [100]:
#Totals color
total_color = color_df.groupby("Cat Color")
color_count = total_color.OutcomeType.agg(["count"])
color_count = color_count.rename(columns={"count" : "Total Cats"})
color_total= color_count["Total Cats"]
color_count

Unnamed: 0_level_0,Total Cats
Cat Color,Unnamed: 1_level_1
Black,3428
Brown Tabby,3971
Calico,1146
Orange Tabby,1914


In [101]:
#Euthanizations color
color_euth_df = color_df.loc[(color_df['OutcomeType'] == "Euthanasia"), : ]
color_euth_df = color_euth_df.groupby("Cat Color")
color_euths = color_euth_df.OutcomeType.agg(["count"])
color_euths = color_euths.rename(columns={"count" : "Euthanizations"})
color_euths_list = color_euths['Euthanizations']
color_euths

Unnamed: 0_level_0,Euthanizations
Cat Color,Unnamed: 1_level_1
Black,185
Brown Tabby,206
Calico,51
Orange Tabby,109


In [102]:
#Colors Total
color_adopt_df = color_df.loc[(color_df['OutcomeType'] == "Adoption"), : ]
color_adopt_df = color_adopt_df.groupby("Cat Color")
color_adopts = color_adopt_df.OutcomeType.agg(["count"])
color_adopts = color_adopts.rename(columns={"count" : "Adoptions"})
color_adopts["Euthanizations"] = color_euths_list
color_adopts["Total Cats"] = color_total
color_adopts["% of Cats Adopted"] = round(color_adopts["Adoptions"] / color_adopts["Total Cats"] * 100,0)
color_adopts["% of Cats Euthenized"] = round(color_adopts["Euthanizations"] / color_adopts["Total Cats"] * 100,0)
color_adopts_austin = color_adopts
color_adopts_austin.to_csv("raw data/Austin_Counts_Color.csv")
color_adopts_austin

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Cats,% of Cats Adopted,% of Cats Euthenized
Cat Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Black,1332,185,3428,39.0,5.0
Brown Tabby,1580,206,3971,40.0,5.0
Calico,504,51,1146,44.0,4.0
Orange Tabby,715,109,1914,37.0,6.0


### Louisville Data

In [104]:
#map csv
austin = "raw data/Louisville.csv"
#austin_df = pd.read_csv(austin)
louisville_df = pd.read_csv(austin, encoding = 'latin-1')
louisville_df.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeInternalStatus,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome
0,A366370,CAT,2008-11-07 10:50:00,STRAY,OTC,WHITE,DOMESTIC SHORTHAIR,,NEUTERED MALE,BROWN,...,FEARFUL,HEALTHY,ALTERED,2008-11-12 15:46:00,EUTH,FERAL,,,UNHEALTHY/UNTREATABLE,ALTERED
1,A366531,CAT,2008-11-10 10:20:00,STRAY,OTC,BLACK,DOMESTIC SHORTHAIR,DOMESTIC SHORTHAIR,UNKNOWN,,...,NORMAL,HEALTHY,UNKNOWN,2008-11-19 20:10:00,EUTH,CONTAG DIS,,SICK,HEALTHY,UNKNOWN
2,A532367,BIRD,2014-07-23 23:21:00,CONFISCATE,CRUELTY,RED,CHICKEN,,MALE,BLACK,...,OTHER,HEALTHY,FERTILE,2014-11-05 15:49:00,TRANSFER,,,,HEALTHY,FERTILE
3,A532474,OTHER,2014-07-24 18:29:00,ET REQUEST,,BROWN,BAT,,UNKNOWN,,...,OTHER,HEALTHY,UNKNOWN,2014-07-24 23:59:00,EUTH,MEDICAL,,OTHER,HEALTHY,UNKNOWN
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,NORMAL,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE


In [105]:
#Select Dogs Only for this Analysis
dog_df = louisville_df.loc[(louisville_df['AnimalType'] == "DOG"), : ]
dog_df.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeInternalStatus,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,NORMAL,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE
11,A279338,DOG,2006-08-23 08:22:00,STRAY,FIELD,WHITE,PIT BULL TERRIER,,MALE,BLACK,...,INJURED,HEALTHY,FERTILE,2006-08-30 09:18:00,EUTH,MEDICAL,,,HEALTHY,FERTILE
13,A288560,DOG,2006-10-15 22:09:00,STRAY,FIELD,BROWN,BLOODHOUND,MIX,MALE,,...,NORMAL,HEALTHY,FERTILE,2006-10-24 13:46:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE
16,A296878,DOG,2007-01-29 18:07:00,STRAY,OTC,CHOCOLATE,LABRADOR RETRIEVER,,FEMALE,,...,NORMAL,HEALTHY,FERTILE,2007-02-08 12:27:00,TRANSFER,RESCUE GRP,,,HEALTHY,FERTILE
17,A296987,DOG,2007-01-30 18:03:00,OWNER SUR,OTC,TAN,PIT BULL TERRIER,,MALE,WHITE,...,NORMAL,HEALTHY,FERTILE,2007-02-01 11:37:00,RTO,,,,HEALTHY,FERTILE


In [106]:
#Drop all entries without a DOB
dog_df['DOB'].replace('', np.nan, inplace=True)
dog_df.dropna(subset=['DOB'], inplace=True)
dog_df.dropna(subset=['OutcomeDate'], inplace=True)
dog_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeInternalStatus,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,NORMAL,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE
17,A296987,DOG,2007-01-30 18:03:00,OWNER SUR,OTC,TAN,PIT BULL TERRIER,,MALE,WHITE,...,NORMAL,HEALTHY,FERTILE,2007-02-01 11:37:00,RTO,,,,HEALTHY,FERTILE
27,A440609,DOG,2012-01-04 15:56:00,STRAY,OTC,BROWN BRINDLE,PLOTT HOUND,MIX,SPAYED FEMALE,WHITE,...,NORMAL,HEALTHY,ALTERED,2012-02-09 16:40:00,FOSTER,RES WAGON,,,HEALTHY,ALTERED
40,A000399,DOG,2005-02-18 11:37:00,STRAY,OTC,TAN,GOLDEN RETRIEVER,MIX,NEUTERED MALE,,...,NORMAL,HEALTHY,ALTERED,2005-04-15 15:03:00,ADOPTION,,,,HEALTHY,ALTERED
47,A318940,DOG,2007-07-19 08:58:00,OUTSURGERY,,BROWN BRINDLE,MASTIFF,,NEUTERED MALE,,...,NORMAL,HEALTHY,ALTERED,2007-07-19 04:28:00,RELEASED,IN SURGERY,,,HEALTHY,ALTERED


In [107]:
#Crearte lists four outcome date and dob to find the Age of the pet

time_outcome = dog_df["OutcomeDate"].tolist()
time_dob = dog_df["DOB"].tolist()
outcome_list = []
dob_list = []

len(time_dob)

#age_list
#age = map(operator.sub, time_outcome, time_dob)
#print(age)

53104

In [108]:
#For loops for datetime conversions
for time in time_outcome:
    
    converted_outcome = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    outcome_list.append(converted_outcome)

for time in time_dob:
    converted_dob = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    dob_list.append(converted_dob)

#louisville_df["AdoptionAge"] = louisville_df["OutcomeDate"] - louisville_df["DOB"]

In [109]:
#list comprehension to find the age
zip(outcome_list, dob_list) # Just to demonstrate
age = [x - y for x, y in zip(outcome_list, dob_list)]

In [110]:
#Create a dataframe with delta
ages_df = pd.DataFrame(
    {'DOB': dob_list,
     'AdoptionDate': outcome_list,
     'Delta': age
    })
ages_df.head()

Unnamed: 0,AdoptionDate,DOB,Delta
0,2006-09-12 13:44:00,2005-09-11,366 days 13:44:00
1,2007-02-01 11:37:00,2006-07-30,186 days 11:37:00
2,2012-02-09 16:40:00,2010-02-16,723 days 16:40:00
3,2005-04-15 15:03:00,2003-03-18,759 days 15:03:00
4,2007-07-19 04:28:00,2006-06-11,403 days 04:28:00


In [111]:
#use itterows and relative delta to find the Age in years
from dateutil.relativedelta import relativedelta
years = []
for index, row in ages_df.iterrows():
    difference_in_years = relativedelta(row["AdoptionDate"], row["DOB"]).years
    years.append(difference_in_years)

In [112]:
len(years)

53104

In [114]:
#Assign Age to dog dataframe
dog_df = dog_df.assign(Age= years)
#dog_df
#dog_df["Age"] = years
#dog_df.head()

In [115]:
#create Age bins
age_bins = [-1,1,3,5,7,9, 25]
age_labels = ["<1", "1-3", "3-5", "5-7", "7-9", ">9"]
dog_df["Age Range"] = pd.cut(dog_df["Age"], age_bins, labels=age_labels)
#dog_adopt= dog_df.loc[(dog_df['OutcomeType'] == "Adoption"), : ]
#dog_group = dog_df.groupby("Age Range")
#dogs_groups = dog_group["OutcomeType"].value_counts()
#dogs_groups
#dogs_groups_df = pd.DataFrame(dogs_groups)
#dogs_groups_df
#dogs_groups_df.unstack(level = 0)

In [116]:
# Get total number of dogs for each age range
total = dog_df.groupby("Age Range")
total_counts = total.OutcomeType.agg(["count"])
total_counts = total_counts.rename(columns={"count" : "Adoptions"})
total_list = total_counts["Adoptions"]

In [117]:
#Euthanizations
dog_euth_df = dog_df.loc[(dog_df['OutcomeType'] == "EUTH"), : ]
euth = dog_euth_df.groupby("Age Range")
euths = euth.OutcomeType.agg(["count"])
euths = euths.rename(columns={"count" : "Euthanizations"})
euths_list = euths['Euthanizations']

In [118]:
#Adoptions for Each age range, create a new counts data frame
dog_adopt_df = dog_df.loc[(dog_df['OutcomeType'] == "ADOPTION"), : ]
group = dog_adopt_df.groupby("Age Range")
counts = group.OutcomeType.agg(["count"])
counts = counts.rename(columns={"count" : "Adoptions"})
counts["Euthanizations"] = euths_list
counts["Total Dogs"] = total_list
counts["% of Animals Adopted"] = round(counts["Adoptions"] / counts["Total Dogs"] * 100,0)
counts["% of Animals Euthenized"] = round(counts["Euthanizations"] / counts["Total Dogs"] * 100,0)
counts_louisville = counts
counts_louisville.to_csv("raw data/Louisville_Counts.csv")
counts_louisville
#value_counts()
#counts_df = pd.DataFrame(counts)
#counts_df

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Dogs,% of Animals Adopted,% of Animals Euthenized
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-3,3039,3038,12166,25.0,25.0
3-5,1161,1243,5460,21.0,23.0
5-7,465,862,3202,15.0,27.0
7-9,207,775,2417,9.0,32.0
<1,8349,5607,24596,34.0,23.0
>9,186,3027,5150,4.0,59.0


In [119]:
#Totals Breed
total_breed = breed_df.groupby("Dog Breed")
breed_count = total_breed.OutcomeType.agg(["count"])
breed_count = breed_count.rename(columns={"count" : "Total Dogs"})
breed_total= breed_count["Total Dogs"]
breed_count

Unnamed: 0_level_0,Total Dogs
Dog Breed,Unnamed: 1_level_1
Chihuahua,3788
German Shepherd,1335
Labrador Retriever,3330
Pit Bull Terrier,3793


In [120]:
#Euthanizations breed
breed_euth_df = breed_df.loc[(breed_df['OutcomeType'] == "EUTH"), : ]
breed_euth_df = breed_euth_df.groupby("Dog Breed")
breed_euths = breed_euth_df.OutcomeType.agg(["count"])
breed_euths = breed_euths.rename(columns={"count" : "Euthanizations"})
breed_euths_list = breed_euths['Euthanizations']

In [121]:
breed_adopt_df = breed_df.loc[(breed_df['OutcomeType'] == "ADOPTION"), : ]
breed_adopt_df = breed_adopt_df.groupby("Dog Breed")
breed_adopts = breed_adopt_df.OutcomeType.agg(["count"])
breed_adopts = breed_adopts.rename(columns={"count" : "Adoptions"})
breed_adopts["Euthanizations"] = breed_euths_list
breed_adopts["Total Dogs"] = breed_total
breed_adopts["% of Animals Adopted"] = round(breed_adopts["Adoptions"] / breed_adopts["Total Dogs"] * 100,0)
breed_adopts["% of Animals Euthenized"] = round(breed_adopts["Euthanizations"] / breed_adopts["Total Dogs"] * 100,0)
breed_adopts_louisville = breed_adopts
breed_adopts_louisville.to_csv("raw data/Louisville_Counts_Breed.csv")
breed_adopts_louisville

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Dogs,% of Animals Adopted,% of Animals Euthenized
Dog Breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,,,3788,,
German Shepherd,,,1335,,
Labrador Retriever,,,3330,,
Pit Bull Terrier,,,3793,,


In [122]:
#CAT ANALYSIS
cat_df = louisville_df.loc[(louisville_df['AnimalType'] == "CAT"), : ]
cat_df = cat_df.rename(columns={"Outcome Type" : "OutcomeType"})
#df.rename(columns={'oldName1': 'newName1', 'oldName2': 'newName2'}, inplace=True)
#cat_df

In [123]:
#Drop all entries without a DOB
cat_df['DOB'].replace('', np.nan, inplace=True)
cat_df.dropna(subset=['DOB'], inplace=True)
cat_df.dropna(subset=['OutcomeDate'], inplace=True)
cat_df.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeInternalStatus,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome
100,A317457,CAT,2007-07-06 16:44:00,OWNER SUR,OTC,GRAY,DOMESTIC SHORTHAIR,,UNKNOWN,WHITE,...,NORMAL,HEALTHY,UNKNOWN,2007-07-06 20:52:00,EUTH,TIME/SPACE,,,HEALTHY,UNKNOWN
123,A521349,CAT,2014-03-24 12:37:00,OWNER SUR,EUTH REQ,ORANGE,DOMESTIC LONGHAIR,,NEUTERED MALE,,...,TERITORIAL,HEALTHY,ALTERED,2014-03-27 10:53:00,TRANSFER,RESCUE GRP,,,HEALTHY,ALTERED
234,A528376,CAT,2014-06-06 12:13:00,STRAY,OTC,BROWN TABBY,DOMESTIC SHORTHAIR,,MALE,,...,NORMAL,HEALTHY,FERTILE,2014-07-03 23:59:00,TNR,,,,HEALTHY,FERTILE
252,A359696,CAT,2008-08-15 20:43:00,STRAY,OTC,GRAY,DOMESTIC SHORTHAIR,,UNKNOWN,,...,FERAL,UNHEALTHY/UNTREATABLE,UNKNOWN,2008-08-22 23:59:00,EUTH,BEHAV OBSV,,,HEALTHY,UNKNOWN
261,A244435,CAT,2005-07-15 20:23:00,STRAY,FIELD,GRAY,BRITISH SHORTHAIR,,NEUTERED MALE,,...,NORMAL,HEALTHY,ALTERED,2005-07-26 09:04:00,RTO,,,,HEALTHY,ALTERED


In [124]:
#Crearte lists four outcome date and dob to find the Age of the cat
time_outcome_cat = cat_df["OutcomeDate"].tolist()
time_dob_cat = cat_df["DOB"].tolist()
outcome_list_cat = []
dob_list_cat = []

len(time_dob_cat)

38773

In [125]:
#For loops for datetime conversions
for time in time_outcome_cat:
    
    converted_outcome_cat = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    outcome_list_cat.append(converted_outcome_cat)

for time in time_dob_cat:
    converted_dob_cat = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
    dob_list_cat.append(converted_dob_cat)

In [126]:
#list comprehension to find the age Cat
zip(outcome_list_cat, dob_list_cat) # Just to demonstrate
age_cat = [x - y for x, y in zip(outcome_list_cat, dob_list_cat)]

In [127]:
#Create a dataframe with delta cat
ages_cat_df = pd.DataFrame(
    {'DOB': dob_list_cat,
     'AdoptionDate': outcome_list_cat,
     'Delta': age_cat
    })
ages_cat_df.head()

Unnamed: 0,AdoptionDate,DOB,Delta
0,2007-07-06 20:52:00,2007-05-06,61 days 20:52:00
1,2014-03-27 10:53:00,2008-03-24,2194 days 10:53:00
2,2014-07-03 23:59:00,2014-02-06,147 days 23:59:00
3,2008-08-22 23:59:00,2007-10-15,312 days 23:59:00
4,2005-07-26 09:04:00,1995-07-26,3653 days 09:04:00


In [128]:
#use itterows and relative delta to find the Age in years cat
from dateutil.relativedelta import relativedelta
years_cat = []
for index, row in ages_cat_df.iterrows():
    difference_in_years_cat = relativedelta(row["AdoptionDate"], row["DOB"]).years
    years_cat.append(difference_in_years_cat)

In [129]:
len(years_cat)

38773

In [131]:
#Assign Age to dog dataframe
cat_df = cat_df.assign(Age= years_cat)
#cat_df
#dog_df["Age"] = years
#dog_df.head()

In [132]:
#Use Boolean mask to find dates from 2013-2017 to get a direct comparision with Austin Datae
cat_df['OutcomeDate'] = pd.to_datetime(cat_df["OutcomeDate"])
mask = (cat_df['OutcomeDate'] > '2013-1-1') & (cat_df['OutcomeDate'] <= '2017-12-31')
cat_df = cat_df.loc[mask]
#cat_df
#df['date'] = pd.to_datetime(df['date']) 
#mask = (df['date'] > start_date) & (df['date'] <= end_date)

In [133]:
#create Age bins
age_bins = [-1,1,3,5,7,9, 25]
age_labels = ["<1", "1-3", "3-5", "5-7", "7-9", ">9"]
cat_df["Age Range"] = pd.cut(cat_df["Age"], age_bins, labels=age_labels)
cat_df.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome,Age,Age Range
123,A521349,CAT,2014-03-24 12:37:00,OWNER SUR,EUTH REQ,ORANGE,DOMESTIC LONGHAIR,,NEUTERED MALE,,...,ALTERED,2014-03-27 10:53:00,TRANSFER,RESCUE GRP,,,HEALTHY,ALTERED,6,5-7
234,A528376,CAT,2014-06-06 12:13:00,STRAY,OTC,BROWN TABBY,DOMESTIC SHORTHAIR,,MALE,,...,FERTILE,2014-07-03 23:59:00,TNR,,,,HEALTHY,FERTILE,0,<1
293,A528461,CAT,2014-07-02 10:26:00,FOSTER,RETURN,CALICO,DOMESTIC SHORTHAIR,,SPAYED FEMALE,,...,ALTERED,2014-07-02 23:59:00,FOSTER,,,,HEALTHY,ALTERED,0,<1
317,A522686,CAT,2014-06-04 09:42:00,FOSTER,RETURN,GRAY,DOMESTIC SHORTHAIR,,UNKNOWN,WHITE,...,UNKNOWN,2014-06-04 09:43:00,EUTH,MEDICAL,,SICK,HEALTHY,UNKNOWN,0,<1
393,A506007,CAT,2013-10-07 14:18:00,STRAY,OTC,BROWN TABBY,DOMESTIC MEDIUMHAIR,,UNKNOWN,,...,UNKNOWN,2013-10-08 23:59:00,TNR,,,,HEALTHY,UNKNOWN,0,<1


In [134]:
# Get total number of dogs for each age range
total_cat = cat_df.groupby("Age Range")
total_cat_counts = total_cat.OutcomeType.agg(["count"])
total_cat_counts = total_cat_counts.rename(columns={"count" : "Adoptions"})
total_cat_list = total_cat_counts["Adoptions"]
total_cat_counts

Unnamed: 0_level_0,Adoptions
Age Range,Unnamed: 1_level_1
1-3,3280
3-5,1149
5-7,574
7-9,405
<1,13708
>9,836


In [135]:
#Euthanizations
cat_euth_df = cat_df.loc[(cat_df['OutcomeType'] == "EUTH"), : ]
cat_euth = cat_euth_df.groupby("Age Range")
cat_euths = cat_euth.OutcomeType.agg(["count"])
cat_euths = cat_euths.rename(columns={"count" : "Euthanizations"})
cat_euths_list = cat_euths['Euthanizations']
cat_euths

Unnamed: 0_level_0,Euthanizations
Age Range,Unnamed: 1_level_1
1-3,559
3-5,259
5-7,200
7-9,181
<1,2038
>9,554


In [137]:
#Adoptions for Each age range, create a new counts data frame
cat_adopt_df = cat_df.loc[(cat_df['OutcomeType'] == "ADOPTION"), : ]
cat_group = cat_adopt_df.groupby("Age Range")
cat_counts = cat_group.OutcomeType.agg(["count"])
cat_counts = cat_counts.rename(columns={"count" : "Adoptions"})
cat_counts["Euthanizations"] = cat_euths_list
cat_counts["Total Cats"] = total_cat_list
cat_counts["% of Cats Adopted"] = round(cat_counts["Adoptions"] / cat_counts["Total Cats"] * 100,0)
cat_counts["% of Cats Euthenized"] = round(cat_counts["Euthanizations"] / cat_counts["Total Cats"] * 100,0)
cat_counts_louisville = cat_counts
cat_counts_louisville.to_csv("raw data/Louisville_Counts_Cat.csv")
cat_counts_louisville
#value_counts()
#counts_df = pd.DataFrame(counts)
#counts_df

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Cats,% of Cats Adopted,% of Cats Euthenized
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-3,732,559,3280,22.0,17.0
3-5,315,259,1149,27.0,23.0
5-7,155,200,574,27.0,35.0
7-9,96,181,405,24.0,45.0
<1,3515,2038,13708,26.0,15.0
>9,80,554,836,10.0,66.0


In [138]:
#Select 4 Popular Cat Colors
cat_df["PrimaryColor"].value_counts()
color_df = cat_df[cat_df["PrimaryColor"].isin(["BROWN TABBY", "BLACK", "CALICO", "ORANGE TABBY"])]
color_df["PrimaryColor"].value_counts()
color_df["PrimaryColor"] = color_df["PrimaryColor"].replace({"BROWN TABBY": 'Brown Tabby', 'BLACK': 'Black', 'CALICO': 'Calico', 'ORANGE TABBY' : 'Orange Tabby'})
color_df = color_df.rename(columns={"PrimaryColor" : "Cat Color"})
#color_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,Cat Color,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome,Age,Age Range
234,A528376,CAT,2014-06-06 12:13:00,STRAY,OTC,Brown Tabby,DOMESTIC SHORTHAIR,,MALE,,...,FERTILE,2014-07-03 23:59:00,TNR,,,,HEALTHY,FERTILE,0,<1
293,A528461,CAT,2014-07-02 10:26:00,FOSTER,RETURN,Calico,DOMESTIC SHORTHAIR,,SPAYED FEMALE,,...,ALTERED,2014-07-02 23:59:00,FOSTER,,,,HEALTHY,ALTERED,0,<1
393,A506007,CAT,2013-10-07 14:18:00,STRAY,OTC,Brown Tabby,DOMESTIC MEDIUMHAIR,,UNKNOWN,,...,UNKNOWN,2013-10-08 23:59:00,TNR,,,,HEALTHY,UNKNOWN,0,<1
693,A533266,CAT,2014-07-30 12:29:00,DISPOSAL,OWNER,Brown Tabby,DOMESTIC SHORTHAIR,,NEUTERED MALE,,...,ALTERED,2014-07-30 23:59:00,DISPOSAL,DEAD ARRIV,,DEAD,HEALTHY,ALTERED,2,1-3
696,A528456,CAT,2014-06-07 13:49:00,OWNER SUR,OTC,Black,DOMESTIC SHORTHAIR,,MALE,,...,FERTILE,2014-06-09 15:26:00,TRANSFER,KHS,,,HEALTHY,FERTILE,0,<1
955,A523707,CAT,2014-04-23 09:00:00,FOSTER,RETURN,Black,DOMESTIC SHORTHAIR,,FEMALE,WHITE,...,FERTILE,2014-04-23 23:59:00,TRANSFER,KHS,,,HEALTHY,FERTILE,1,<1
1137,A513038,CAT,2013-12-18 12:42:00,OWNER SUR,EUTH REQ,Brown Tabby,MAINE COON,MIX,FEMALE,BLACK,...,FERTILE,2013-12-18 13:44:00,EUTH,REQUESTED,,SICK,HEALTHY,FERTILE,1,<1
1216,A566623,CAT,2015-07-27 14:24:00,OWNER SUR,OTC,Black,DOMESTIC SHORTHAIR,,MALE,WHITE,...,FERTILE,2015-11-22 23:59:00,FOSTER,TIME/SPACE,,NORMAL,HEALTHY,FERTILE,0,<1
1317,A532690,CAT,2014-07-28 12:03:00,STRAY,OTC,Brown Tabby,DOMESTIC SHORTHAIR,,FEMALE,,...,FERTILE,2014-07-29 06:25:00,TNR,,,NORMAL,HEALTHY,FERTILE,1,<1
1401,A566473,CAT,2015-07-25 11:53:00,OWNER SUR,OTC,Brown Tabby,DOMESTIC SHORTHAIR,,NEUTERED MALE,,...,ALTERED,2015-12-10 12:56:00,ADOPTION,PS OUTER,,NORMAL,HEALTHY,ALTERED,11,>9


In [139]:
#Totals color
total_color = color_df.groupby("Cat Color")
color_count = total_color.OutcomeType.agg(["count"])
color_count = color_count.rename(columns={"count" : "Total Cats"})
color_total= color_count["Total Cats"]
color_count

Unnamed: 0_level_0,Total Cats
Cat Color,Unnamed: 1_level_1
Black,4861
Brown Tabby,3096
Calico,1057
Orange Tabby,1212


In [140]:
#Euthanizations color
color_euth_df = color_df.loc[(color_df['OutcomeType'] == "EUTH"), : ]
color_euth_df = color_euth_df.groupby("Cat Color")
color_euths = color_euth_df.OutcomeType.agg(["count"])
color_euths = color_euths.rename(columns={"count" : "Euthanizations"})
color_euths_list = color_euths['Euthanizations']
color_euths

Unnamed: 0_level_0,Euthanizations
Cat Color,Unnamed: 1_level_1
Black,979
Brown Tabby,494
Calico,197
Orange Tabby,216


In [142]:
#Colors Total
color_adopt_df = color_df.loc[(color_df['OutcomeType'] == "ADOPTION"), : ]
color_adopt_df = color_adopt_df.groupby("Cat Color")
color_adopts = color_adopt_df.OutcomeType.agg(["count"])
color_adopts = color_adopts.rename(columns={"count" : "Adoptions"})
color_adopts["Euthanizations"] = color_euths_list
color_adopts["Total Cats"] = color_total
color_adopts["% of Cats Adopted"] = round(color_adopts["Adoptions"] / color_adopts["Total Cats"] * 100,0)
color_adopts["% of Cats Euthenized"] = round(color_adopts["Euthanizations"] / color_adopts["Total Cats"] * 100,0)
color_adopts_louisville = color_adopts
color_adopts_louisville.to_csv("raw data/Louisville_Counts_Color.csv")
color_adopts_louisville

Unnamed: 0_level_0,Adoptions,Euthanizations,Total Cats,% of Cats Adopted,% of Cats Euthenized
Cat Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Black,1110,979,4861,23.0,20.0
Brown Tabby,834,494,3096,27.0,16.0
Calico,271,197,1057,26.0,19.0
Orange Tabby,303,216,1212,25.0,18.0
