# Dependencies 

In [38]:
# Dependencies
import pandas as pd
import numpy as np
import datetime
import re
from datetime import datetime

# Clean up raw City of Austin Animal Shelter Data

#### Marya Crigler, Group 2 Team 

## Intake an Outcome data files downloaded from City of Austin on November 29,2017

In [2]:
# Set filepaths
csv_filepath1 = "raw data/Austin_Animal_Center_Intakes.csv"
csv_filepath2 = "raw data/Austin_Animal_Center_Outcomes.csv"

In [3]:
#Define reusable functions for cleanse

#Determine purebred status
def GetPurebred(x):
  if "Mix" in x["Breed"] : return 0
  elif "/" in x["Breed"] : return 0
  else: return 1


#Convert the Age to a consistent unit (days)
def GetAgeDays(x):
    if x["AgeUnits"] == "day":
        numDays = x["NumAge"]
    elif x["AgeUnits"] == "week":
        numDays = x["NumAge"] * 7
    elif x["AgeUnits"] == "month":
        numDays = x["NumAge"] * 30
    elif x["AgeUnits"] == "year":
        numDays = x["NumAge"] * 365
    else:
        numDays = 0
    return numDays

#Get intake outcome day length
def get_days_length(val):
    val = str(val)
    days = re.findall('\d*',val)[0]
    return days


In [4]:
#  ****    Cleanse the Intake file  ****
# Read the csv files into new dataframe
intakes_df = pd.read_csv(csv_filepath1, encoding="iso-8859-1", low_memory=False)
intakes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A748291,*Madison,05/01/2017 02:26:00 PM,05/01/2017 02:26:00 PM,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,Black
1,A750529,,05/28/2017 01:22:00 PM,05/28/2017 01:22:00 PM,8312 North Ih 35 in Austin (TX),Stray,Normal,Dog,Intact Female,5 months,Miniature Schnauzer Mix,White/Cream
2,A730601,,07/07/2016 12:11:00 PM,07/07/2016 12:11:00 PM,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,Blue Tabby
3,A748238,,05/01/2017 10:53:00 AM,05/01/2017 10:53:00 AM,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,White
4,A683644,*Zoey,07/13/2014 11:02:00 AM,07/13/2014 11:02:00 AM,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,Brown/White


In [5]:
# Drop redundant columns
intakes_df.drop(['MonthYear'], axis = 1, inplace = True)
    #intakes_df.head()

In [6]:
# Transform date field and create new Month and Year columns
intakes_df["DateTime"] = pd.to_datetime(intakes_df["DateTime"])
intakes_df["Month"]= intakes_df["DateTime"].dt.month
intakes_df["Year"]= intakes_df["DateTime"].dt.year

In [7]:
# Identify unique values in the Sex upon Intake column
intakes_df["Sex upon Intake"].unique()

array(['Intact Female', 'Intact Male', 'Spayed Female', 'Unknown',
       'Neutered Male', nan], dtype=object)

In [8]:
# Split Sex upon Intake to an IntakeSex and IntakeSpayNeuter columns
intakes_df["Sex"] = intakes_df["Sex upon Intake"].map({"Intact Female":"Female", "Intact Male":"Male", "Spayed Female":"Female", "Neutered Male":"Male","Unknown":"Unknown Sex"})
intakes_df["Female"] = intakes_df["Sex upon Intake"].map({"Intact Female":1, "Intact Male":0, "Spayed Female":1, "Neutered Male":0,"Unknown":0})
intakes_df["Male"] = intakes_df["Sex upon Intake"].map({"Intact Female":0, "Intact Male":1, "Spayed Female":0, "Neutered Male":1,"Unknown":0})
intakes_df["SpayNeuter"] = intakes_df["Sex upon Intake"].map({"Intact Female":"No", "Intact Male":"No", "Spayed Female":"Yes", "Neutered Male":"Yes","Unknown":"No"})
    #intakes_df.head()

In [9]:
# Set Purebred Status
intakes_df['Purebred'] = intakes_df.apply(GetPurebred, axis=1)
    #intakes_df.head()

In [10]:
#Split Age Upon Intake
intakes_df['NumAge'], intakes_df['AgeUnits'] = intakes_df['Age upon Intake'].str.split(' ', 1).str
    #intakes_df.head()

In [11]:
#Make NumAge a numeric colunm for later calclations
intakes_df["NumAge"] = pd.to_numeric(intakes_df["NumAge"])
    #intakes_df.head()

In [12]:
# Identify unique values in the intake age units
intakes_df["AgeUnits"].unique()

array(['months', 'years', 'weeks', 'month', 'year', 'week', 'days', 'day',
       nan], dtype=object)

In [13]:
#Make the intake age unit values consistent
intakes_df["AgeUnits"].replace(["days","weeks","months","years"],["day","week","month","year"], inplace=True)
    #intakes_df["IntakeAgeUnits"].unique()

In [14]:
#Set the Age Days column
intakes_df['AgeDays'] = intakes_df.apply(GetAgeDays, axis=1)
    #intakes_df.head()

In [15]:
# Get metrics on the raw intake file
intakes_df.describe()

Unnamed: 0,Month,Year,Female,Male,Purebred,NumAge,AgeDays
count,75577.0,75577.0,75576.0,75576.0,75577.0,75576.0,75577.0
mean,6.689601,2015.309009,0.437388,0.47654,0.067322,3.4453,761.731559
std,3.257848,1.213225,0.496067,0.499453,0.250581,2.908907,1052.134727
min,1.0,2013.0,0.0,0.0,0.0,0.0,0.0
25%,4.0,2014.0,0.0,0.0,0.0,1.0,60.0
50%,7.0,2015.0,0.0,0.0,0.0,2.0,365.0
75%,10.0,2016.0,1.0,1.0,0.0,5.0,1095.0
max,12.0,2017.0,1.0,1.0,1.0,25.0,9125.0


In [16]:
#Metrics cont'd
intake_recs = len(intakes_df)
intake_dups = intakes_df.duplicated("Animal ID").sum()
intake_types = intakes_df.groupby(["Animal Type"]).size()
intakeOther = intakes_df[intakes_df["Animal Type"]=="Other"]
intakeOtherGrp = intakeOther.groupby(["Breed"], as_index=False)
intakeOtherCnts = intakeOtherGrp["Animal ID"].count()
sort_intakeOtherCnts = intakeOtherCnts.sort_values("Animal ID", ascending=False)

print(f"Number of records in Intakes file: {intake_recs}")
print(f"Duplicate Intake Animal IDs: {intake_dups}")
print(f"Intakes by Animal Type: {intake_types}")
#print(f"Intake of Other group by Animal Type: {sort_intakeOtherCnts}")

Number of records in Intakes file: 75577
Duplicate Intake Animal IDs: 7031
Intakes by Animal Type: Animal Type
Bird           328
Cat          28489
Dog          42590
Livestock        8
Other         4162
dtype: int64


In [17]:
#  *** Cleanse the Outcomes file  ***
# Read the csv files into new dataframes
outcomes_df = pd.read_csv(csv_filepath2, encoding="iso-8859-1", low_memory=False)
outcomes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A741715,*Pebbles,01/11/2017 06:17:00 PM,01/11/2017 06:17:00 PM,03/07/2016,Adoption,,Cat,Spayed Female,10 months,Domestic Shorthair Mix,Calico
1,A658751,Benji,11/13/2016 01:38:00 PM,11/13/2016 01:38:00 PM,07/14/2011,Return to Owner,,Dog,Neutered Male,5 years,Border Terrier Mix,Tan
2,A721285,,02/24/2016 02:42:00 PM,02/24/2016 02:42:00 PM,02/24/2014,Euthanasia,Suffering,Other,Unknown,2 years,Raccoon Mix,Black/Gray
3,A746650,Rose,04/07/2017 11:58:00 AM,04/07/2017 11:58:00 AM,04/06/2016,Return to Owner,,Dog,Intact Female,1 year,Labrador Retriever/Jack Russell Terrier,Yellow
4,A750122,Happy Camper,05/24/2017 06:36:00 PM,05/24/2017 06:36:00 PM,04/08/2017,Transfer,Partner,Dog,Intact Male,1 month,Labrador Retriever Mix,Black


In [18]:
# Drop redundant columns
outcomes_df.drop(['MonthYear'], axis = 1, inplace = True)
    #outcomes_df.head()

In [19]:
# Transform date field and create new Month and Year columns
outcomes_df["DateTime"] = pd.to_datetime(outcomes_df["DateTime"])
outcomes_df["Month"]= outcomes_df["DateTime"].dt.month
outcomes_df["Year"]= outcomes_df["DateTime"].dt.year

In [20]:
# Identify unique values in the Sex upon Outcome column
outcomes_df["Sex upon Outcome"].unique()

array(['Spayed Female', 'Neutered Male', 'Unknown', 'Intact Female',
       'Intact Male', nan], dtype=object)

In [21]:
# Split Sex upon Outcome to an OutcomeSex and OutcomeSpayNeuter columns
outcomes_df["Sex"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":"Female", "Intact Male":"Male", "Spayed Female":"Female", "Neutered Male":"Male","Unknown":"Unknown Sex"})
outcomes_df["Female"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":1, "Intact Male":0, "Spayed Female":1, "Neutered Male":0,"Unknown":0})
outcomes_df["Male"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":0, "Intact Male":1, "Spayed Female":0, "Neutered Male":1,"Unknown":0})
outcomes_df["SpayNeuter"] = outcomes_df["Sex upon Outcome"].map({"Intact Female":"No", "Intact Male":"No", "Spayed Female":"Yes", "Neutered Male":"Yes","Unknown":"No"})
    #outcomes_df.head()

In [22]:
# Get purebred 
outcomes_df['Purebred'] = outcomes_df.apply(GetPurebred, axis=1)

In [23]:
# Split out age
outcomes_df['NumAge'], outcomes_df['AgeUnits'] = outcomes_df['Age upon Outcome'].str.split(' ', 1).str

#Make NumAge a numeric colunm for later calclations
outcomes_df["NumAge"] = pd.to_numeric(outcomes_df["NumAge"])

#Make the intake age unit values consistent
outcomes_df["AgeUnits"].replace(["days","weeks","months","years"],["day","week","month","year"], inplace=True)

#Set the Age Days column
outcomes_df['AgeDays'] = outcomes_df.apply(GetAgeDays, axis=1)

In [24]:
# Get metrics on the raw outcomes file
outcomes_df.describe()

Unnamed: 0,Month,Year,Female,Male,Purebred,NumAge,AgeDays
count,75508.0,75508.0,75506.0,75506.0,75508.0,75502.0,75508.0
mean,6.799902,2015.29377,0.437767,0.476492,0.067198,3.528211,772.159175
std,3.301991,1.219788,0.496115,0.49945,0.250367,2.901128,1052.357298
min,1.0,2013.0,0.0,0.0,0.0,0.0,0.0
25%,4.0,2014.0,0.0,0.0,0.0,2.0,90.0
50%,7.0,2015.0,0.0,0.0,0.0,2.0,365.0
75%,10.0,2016.0,1.0,1.0,0.0,5.0,1095.0
max,12.0,2017.0,1.0,1.0,1.0,25.0,9125.0


In [25]:
#Metrics cont'd
outcome_recs = len(outcomes_df)
outcome_dups = outcomes_df.duplicated("Animal ID").sum()
outcome_types = outcomes_df.groupby(["Animal Type"]).size()

print(f"Number of records in Outcomes file: {outcome_recs}")
print(f"Duplicate Outcome Animal IDs: {outcome_dups}")
print(f"Outcomes by Animal Type: {outcome_types}")

Number of records in Outcomes file: 75508
Duplicate Outcome Animal IDs: 6975
Outcomes by Animal Type: Animal Type
Bird           327
Cat          28519
Dog          42498
Livestock        9
Other         4155
dtype: int64


In [26]:
# **** Merge intakes and outcomes files
animalData_df = pd.merge(intakes_df, outcomes_df, on="Animal ID", how="inner", suffixes=('_intake', '_outcome'))
animalData_df.head()

Unnamed: 0,Animal ID,Name_intake,DateTime_intake,Found Location,Intake Type,Intake Condition,Animal Type_intake,Sex upon Intake,Age upon Intake,Breed_intake,...,Month_outcome,Year_outcome,Sex_outcome,Female_outcome,Male_outcome,SpayNeuter_outcome,Purebred_outcome,NumAge_outcome,AgeUnits_outcome,AgeDays_outcome
0,A748291,*Madison,2017-05-01 14:26:00,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,...,9,2017,Female,1.0,0.0,Yes,0,1.0,year,365.0
1,A750529,,2017-05-28 13:22:00,8312 North Ih 35 in Austin (TX),Stray,Normal,Dog,Intact Female,5 months,Miniature Schnauzer Mix,...,6,2017,Female,1.0,0.0,Yes,0,5.0,month,150.0
2,A730601,,2016-07-07 12:11:00,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,...,7,2016,Male,0.0,1.0,Yes,0,7.0,month,210.0
3,A748238,,2017-05-01 10:53:00,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,...,5,2017,Male,0.0,1.0,Yes,0,3.0,year,1095.0
4,A683644,*Zoey,2014-07-13 11:02:00,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,...,11,2014,Female,1.0,0.0,Yes,0,4.0,month,120.0


In [27]:
print(animalData_df.columns)

Index(['Animal ID', 'Name_intake', 'DateTime_intake', 'Found Location',
       'Intake Type', 'Intake Condition', 'Animal Type_intake',
       'Sex upon Intake', 'Age upon Intake', 'Breed_intake', 'Color_intake',
       'Month_intake', 'Year_intake', 'Sex_intake', 'Female_intake',
       'Male_intake', 'SpayNeuter_intake', 'Purebred_intake', 'NumAge_intake',
       'AgeUnits_intake', 'AgeDays_intake', 'Name_outcome', 'DateTime_outcome',
       'Date of Birth', 'Outcome Type', 'Outcome Subtype',
       'Animal Type_outcome', 'Sex upon Outcome', 'Age upon Outcome',
       'Breed_outcome', 'Color_outcome', 'Month_outcome', 'Year_outcome',
       'Sex_outcome', 'Female_outcome', 'Male_outcome', 'SpayNeuter_outcome',
       'Purebred_outcome', 'NumAge_outcome', 'AgeUnits_outcome',
       'AgeDays_outcome'],
      dtype='object')


In [28]:
#Remove rows with duplicate animal IDs
animalData_df=animalData_df.drop_duplicates("Animal ID",keep=False)
len(animalData_df)

62049

In [29]:
#Test to see if any other columns are redundant
animalData_df["Color_intake"].equals(animalData_df["Color_outcome"])

False

In [30]:
#Filter for only Dogs and Cats
animalData_df = animalData_df.loc[animalData_df["Animal Type_intake"].isin(["Dog","Cat"])]
len(animalData_df)

57593

In [31]:
# Calc date diff between intakes and outcomes
animalData_df["Intake Outcome Days"] = animalData_df["DateTime_outcome"]-animalData_df["DateTime_intake"]
#animalData_df.head()

In [32]:
animalData_df["IODays"] = animalData_df['Intake Outcome Days'].apply(get_days_length)
animalData_df["IODays"]=pd.to_numeric(animalData_df["IODays"])
animalData_df.head()

Unnamed: 0,Animal ID,Name_intake,DateTime_intake,Found Location,Intake Type,Intake Condition,Animal Type_intake,Sex upon Intake,Age upon Intake,Breed_intake,...,Sex_outcome,Female_outcome,Male_outcome,SpayNeuter_outcome,Purebred_outcome,NumAge_outcome,AgeUnits_outcome,AgeDays_outcome,Intake Outcome Days,IODays
0,A748291,*Madison,2017-05-01 14:26:00,S Pleasant Valley Rd And E Riverside Dr in Aus...,Stray,Normal,Dog,Intact Female,10 months,Pit Bull Mix,...,Female,1.0,0.0,Yes,0,1.0,year,365.0,125 days 09:34:00,125.0
1,A750529,,2017-05-28 13:22:00,8312 North Ih 35 in Austin (TX),Stray,Normal,Dog,Intact Female,5 months,Miniature Schnauzer Mix,...,Female,1.0,0.0,Yes,0,5.0,month,150.0,4 days 03:20:00,4.0
2,A730601,,2016-07-07 12:11:00,1109 Shady Ln in Austin (TX),Stray,Normal,Cat,Intact Male,7 months,Domestic Shorthair Mix,...,Male,0.0,1.0,Yes,0,7.0,month,210.0,0 days 20:49:00,0.0
3,A748238,,2017-05-01 10:53:00,Airport Blvd And Oak Springs Dr in Austin (TX),Stray,Normal,Dog,Intact Male,3 years,Bichon Frise Mix,...,Male,0.0,1.0,Yes,0,3.0,year,1095.0,5 days 05:04:00,5.0
4,A683644,*Zoey,2014-07-13 11:02:00,Austin (TX),Owner Surrender,Nursing,Dog,Intact Female,4 weeks,Border Collie Mix,...,Female,1.0,0.0,Yes,0,4.0,month,120.0,115 days 23:04:00,115.0


In [33]:
#Export to CSV
animalData_df.to_csv("raw data/MergedData.csv", encoding="utf-8", index=False)

# Cleanup of Austin vs Louisville Data

## Computing time deltas for Louisville data

In [36]:
louisville_data = pd.read_csv("raw data/Louisville.csv")
louisville_data.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeInternalStatus,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome
0,A366370,CAT,2008-11-07 10:50:00,STRAY,OTC,WHITE,DOMESTIC SHORTHAIR,,NEUTERED MALE,BROWN,...,FEARFUL,HEALTHY,ALTERED,2008-11-12 15:46:00,EUTH,FERAL,,,UNHEALTHY/UNTREATABLE,ALTERED
1,A366531,CAT,2008-11-10 10:20:00,STRAY,OTC,BLACK,DOMESTIC SHORTHAIR,DOMESTIC SHORTHAIR,UNKNOWN,,...,NORMAL,HEALTHY,UNKNOWN,2008-11-19 20:10:00,EUTH,CONTAG DIS,,SICK,HEALTHY,UNKNOWN
2,A532367,BIRD,2014-07-23 23:21:00,CONFISCATE,CRUELTY,RED,CHICKEN,,MALE,BLACK,...,OTHER,HEALTHY,FERTILE,2014-11-05 15:49:00,TRANSFER,,,,HEALTHY,FERTILE
3,A532474,OTHER,2014-07-24 18:29:00,ET REQUEST,,BROWN,BAT,,UNKNOWN,,...,OTHER,HEALTHY,UNKNOWN,2014-07-24 23:59:00,EUTH,MEDICAL,,OTHER,HEALTHY,UNKNOWN
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,NORMAL,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE


In [39]:
louisville_data['Days from Intake to Outcome']=''

for row in range(0,len(louisville_data)):
    #print(row)
    try: 
        intake_date=str(louisville_data.iloc[row,2])
        outcome_date=str(louisville_data.iloc[row,15])                 
        intake_datetime = datetime.strptime(intake_date, '%Y-%m-%d %H:%M:%S')
        outcome_datetime = datetime.strptime(outcome_date, '%Y-%m-%d %H:%M:%S')                 
        days_to_outcome=(outcome_datetime-intake_datetime).total_seconds()/86400
        louisville_data.iloc[row,22]=days_to_outcome
    except ValueError:
        continue

In [42]:
print(louisville_data.columns)

Index(['AnimalID', 'AnimalType', 'IntakeDate', 'IntakeType', 'IntakeSubtype',
       'PrimaryColor', 'PrimaryBreed', 'SecondaryBreed', 'Gender',
       'SecondaryColor', 'DOB', 'IntakeReason', 'IntakeInternalStatus',
       'IntakeAsilomarStatus', 'ReproductiveStatusAtIntake', 'OutcomeDate',
       'OutcomeType', 'OutcomeSubtype', 'OutcomeReason',
       'OutcomeInternalStatus', 'OutcomeAsilomarStatus',
       'ReproductiveStatusAtOutcome', 'Days from Intake to Outcome'],
      dtype='object')


In [43]:
#louisville_data.rename(columns={"Time from Intake to Outcome":"Days between Intake and Outcome"})

In [46]:
#louisville_data=louisville_data.drop(columns=['Days from Intake to Outcome'])

In [47]:
#louisville_data=louisville_data.rename(columns={"Time from Intake to Outcome":"Days between Intake and Outcome"})
louisville_data.head()

Unnamed: 0,AnimalID,AnimalType,IntakeDate,IntakeType,IntakeSubtype,PrimaryColor,PrimaryBreed,SecondaryBreed,Gender,SecondaryColor,...,IntakeAsilomarStatus,ReproductiveStatusAtIntake,OutcomeDate,OutcomeType,OutcomeSubtype,OutcomeReason,OutcomeInternalStatus,OutcomeAsilomarStatus,ReproductiveStatusAtOutcome,Days from Intake to Outcome
0,A366370,CAT,2008-11-07 10:50:00,STRAY,OTC,WHITE,DOMESTIC SHORTHAIR,,NEUTERED MALE,BROWN,...,HEALTHY,ALTERED,2008-11-12 15:46:00,EUTH,FERAL,,,UNHEALTHY/UNTREATABLE,ALTERED,5.20556
1,A366531,CAT,2008-11-10 10:20:00,STRAY,OTC,BLACK,DOMESTIC SHORTHAIR,DOMESTIC SHORTHAIR,UNKNOWN,,...,HEALTHY,UNKNOWN,2008-11-19 20:10:00,EUTH,CONTAG DIS,,SICK,HEALTHY,UNKNOWN,9.40972
2,A532367,BIRD,2014-07-23 23:21:00,CONFISCATE,CRUELTY,RED,CHICKEN,,MALE,BLACK,...,HEALTHY,FERTILE,2014-11-05 15:49:00,TRANSFER,,,,HEALTHY,FERTILE,104.686
3,A532474,OTHER,2014-07-24 18:29:00,ET REQUEST,,BROWN,BAT,,UNKNOWN,,...,HEALTHY,UNKNOWN,2014-07-24 23:59:00,EUTH,MEDICAL,,OTHER,HEALTHY,UNKNOWN,0.229167
4,A281756,DOG,2006-09-11 18:10:00,OWNER SUR,OTC,WHITE,PIT BULL TERRIER,,MALE,BROWN,...,HEALTHY,FERTILE,2006-09-12 13:44:00,EUTH,TIME/SPACE,,,HEALTHY,FERTILE,0.815278


In [48]:
#louisville_data.to_csv('raw data/Louisville_with_time_deltas.csv')

In [49]:
louisville_data_dogs_cats_only=louisville_data[(louisville_data['AnimalType']=='CAT') | (louisville_data['AnimalType']=='DOG')]

In [50]:
#confirm it worked
louisville_data_dogs_cats_only['AnimalType'].value_counts()

DOG    76687
CAT    68212
Name: AnimalType, dtype: int64

In [51]:
louisville_data_dogs_cats_only.to_csv('raw data/Louisville_with_time_deltas_dogs_cats_only.csv')

##  Computing time deltas for Austin data

In [54]:
#Read in the data
austin_data = pd.read_csv("raw data/MergedData.csv")

In [55]:
austin_data['Days from Intake to Outcome']=''

for row in range(len(austin_data)):
    #print(f"Processing row {row}")
    try: 
        intake_date=str(austin_data.loc[row,'DateTime_intake'])
        outcome_date=str(austin_data.loc[row,'DateTime_outcome'])    
        intake_datetime = datetime.strptime(intake_date, '%m/%d/%Y %H:%M:%S %p')
        outcome_datetime = datetime.strptime(outcome_date, '%m/%d/%Y %H:%M:%S %p')                 
        days_to_outcome=(outcome_datetime-intake_datetime).total_seconds()/86400
        austin_data.loc[row,'Days from Intake to Outcome']=days_to_outcome
    except ValueError:
        continue

In [56]:
#confirm there are only cats and dogs
austin_data['Animal Type_intake'].value_counts()

Dog    31697
Cat    25896
Name: Animal Type_intake, dtype: int64

In [58]:
austin_data.to_csv('raw data/Austin_merged_data_with_time_deltas.csv')