# Case Studies in Machine Learning Final Paper

## Topic: Predict animal length-of-stay at adoption center

## Paper Overview

1) Abstract
2) Introduction
3) Literature Review
4) Data Background and Description
5) Data Preliminary Analysis
6) Model Objective and Training
7) Results and Explanations
8) Conclusions

# Import Data from data.austintexas.gov
## AAC Outcomes: https://data.austintexas.gov/Health-and-Community-Services/Austin-Animal-Center-Outcomes/9t4d-g238/about_data
## AAC Intakes: https://data.austintexas.gov/Health-and-Community-Services/Austin-Animal-Center-Intakes/wter-evkm/about_data

In [1]:
from datetime import date, datetime
import os
import pandas as pd
# Create a date for november 11 2024
download_date = date(2024, 11, 5)

# format download_date into a string with YYYYMMDD format
download_date = download_date.strftime('%Y%m%d')

# Insert date string in YYYYMMDD format into the filename
outcomes_filename = os.path.join('data', f'Austin_Animal_Center_Outcomes_{download_date}.csv')
intakes_filename = os.path.join('data', f'Austin_Animal_Center_Intakes_{download_date}.csv')

df_outcomes = pd.read_csv(outcomes_filename)
df_intakes = pd.read_csv(intakes_filename)

display(df_intakes)
display(df_outcomes)

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby
...,...,...,...,...,...,...,...,...,...,...,...,...
168579,A917267,Betsy,11/05/2024 10:05:00 AM,November 2024,1015 W William Cannon Dr in Austin (TX),Public Assist,Normal,Dog,Spayed Female,2 years,American Pit Bull Terrier,White/Blue
168580,A916486,*Ricky Bobby,10/27/2024 12:49:00 PM,October 2024,1319 Fm 973 in Austin (TX),Stray,Normal,Dog,Intact Male,2 years,Labrador Retriever Mix,Tan/White
168581,A915878,*Maple,10/21/2024 12:54:00 PM,October 2024,7206 Fence Line Drive in Austin (TX),Stray,Normal,Cat,Intact Female,1 month,Domestic Shorthair,Brown Tabby
168582,A916667,*Michael Scott,10/29/2024 12:02:00 PM,October 2024,5530 Killingsworth Ln in Travis (TX),Stray,Unknown,Dog,Intact Male,2 years,Alaskan Husky,Black/White


Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A882831,*Hamilton,07/01/2023 06:12:00 PM,Jul 2023,03/25/2023,Adoption,,Cat,Neutered Male,3 months,Domestic Shorthair Mix,Black/White
1,A794011,Chunk,05/08/2019 06:20:00 PM,May 2019,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
2,A776359,Gizmo,07/18/2018 04:02:00 PM,Jul 2018,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
3,A821648,,08/16/2020 11:38:00 AM,Aug 2020,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
4,A720371,Moose,02/13/2016 05:59:00 PM,Feb 2016,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
...,...,...,...,...,...,...,...,...,...,...,...,...
168506,A916289,Shirley Temple,11/04/2024 06:15:00 PM,Nov 2024,10/24/2022,Adoption,,Dog,Spayed Female,2 years,American Bulldog Mix,Tan
168507,A916990,,11/05/2024 04:27:00 PM,Nov 2024,11/01/2022,Euthanasia,Rabies Risk,Other,Unknown,2 years,Bat,Brown/Brown
168508,A869055,Peanut Butter Cup,11/17/2022 12:14:00 PM,Nov 2022,06/12/2022,Transfer,Partner,Dog,Neutered Male,5 months,Pit Bull,Brown
168509,A915460,*Toro,11/05/2024 09:31:00 AM,Nov 2024,09/01/2024,Adoption,Foster,Cat,Spayed Female,2 months,Domestic Shorthair,Brown Tabby/White


In [2]:
# Join dataframes on Animal ID
pd.options.display.max_columns = 50
df_joined = pd.merge(df_intakes,df_outcomes,on=["Animal ID"],suffixes=('_intake','_outcome'))

# Drop duplicate columns
cols_intakes = df_intakes.columns
cols_outcomes = df_outcomes.columns

duplicate_prefixes = ["Name", "Animal Type", "Breed", "Color"]

for pref in duplicate_prefixes:
    if (df_joined[pref + "_intake"].dropna() == df_joined[pref + "_outcome"].dropna()).all():
        df_joined[pref] = df_joined[pref + "_intake"]
        df_joined = df_joined.drop(columns=[pref + "_intake", pref + "_outcome"])


# Calculate the duration between intake and outcome
df_joined["DateTime_outcome"] = pd.to_datetime(df_joined["DateTime_outcome"], format="%m/%d/%Y %I:%M:%S %p")
df_joined["DateTime_intake"] = pd.to_datetime(df_joined["DateTime_intake"], format="%m/%d/%Y %I:%M:%S %p")
df_joined["duration_in_shelter"] = df_joined["DateTime_outcome"] - df_joined["DateTime_intake"]


Unnamed: 0,Animal ID,DateTime_intake,MonthYear_intake,Found Location,Intake Type,Intake Condition,Sex upon Intake,Age upon Intake,DateTime_outcome,MonthYear_outcome,Date of Birth,Outcome Type,Outcome Subtype,Sex upon Outcome,Age upon Outcome,Name,Animal Type,Breed,Color,duration_in_shelter
0,A786884,2019-01-03 16:19:00,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Neutered Male,2 years,2019-01-08 15:11:00,Jan 2019,01/03/2017,Transfer,Partner,Neutered Male,2 years,*Brock,Dog,Beagle Mix,Tricolor,4 days 22:52:00
1,A706918,2015-07-05 12:59:00,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Spayed Female,8 years,2015-07-05 15:13:00,Jul 2015,07/05/2007,Return to Owner,,Spayed Female,8 years,Belle,Dog,English Springer Spaniel,White/Liver,0 days 02:14:00
2,A724273,2016-04-14 18:43:00,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Intact Male,11 months,2016-04-21 17:17:00,Apr 2016,04/17/2015,Return to Owner,,Neutered Male,1 year,Runster,Dog,Basenji Mix,Sable/White,6 days 22:34:00
3,A665644,2013-10-21 07:59:00,October 2013,Austin (TX),Stray,Sick,Intact Female,4 weeks,2013-10-21 11:39:00,Oct 2013,09/21/2013,Transfer,Partner,Intact Female,4 weeks,,Cat,Domestic Shorthair Mix,Calico,0 days 03:40:00
4,A857105,2022-05-12 00:23:00,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Neutered Male,2 years,2022-05-12 14:35:00,May 2022,05/12/2020,Transfer,Partner,Neutered Male,2 years,Johnny Ringo,Cat,Domestic Shorthair,Orange Tabby,0 days 14:12:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214840,A915292,2024-10-14 12:27:00,October 2024,9009 Galewood Dr in Austin (TX),Stray,Normal,Intact Male,1 month,2024-11-04 17:20:00,Nov 2024,09/04/2024,Adoption,,Neutered Male,1 month,Chozen Toguchi,Cat,Domestic Shorthair,Black/White,21 days 04:53:00
214841,A916560,2024-10-28 11:25:00,October 2024,Lavaca St in Austin (TX),Stray,Normal,Intact Female,3 months,2024-11-04 17:57:00,Nov 2024,07/28/2024,Adoption,,Spayed Female,3 months,Fajr,Cat,Domestic Shorthair,Brown Tabby,7 days 06:32:00
214842,A915792,2024-10-19 18:41:00,October 2024,183 And Loyola in Austin (TX),Stray,Normal,Intact Female,1 month,2024-11-04 18:02:00,Nov 2024,08/29/2024,Adoption,,Spayed Female,2 months,Muffin,Cat,Domestic Shorthair,Brown Tabby/White,15 days 23:21:00
214843,A869055,2024-10-31 11:36:00,October 2024,9323 Menchaca Road in Austin (TX),Stray,Medical,Neutered Male,2 years,2022-11-17 12:14:00,Nov 2022,06/12/2022,Transfer,Partner,Neutered Male,5 months,Peanut Butter Cup,Dog,Pit Bull,Brown,-714 days +00:38:00


In [37]:
# Drop rows where sex upon outcome is nan
df_joined = df_joined.dropna(axis=0,subset=["Sex upon Outcome","Sex upon Intake"])
df_joined = df_joined.loc[df_joined["Sex upon Intake"].str.contains("Male|Female")]

# Display Unique Values of categorical variables
categorical_columns = ["Intake Type", "Intake Condition", "Sex upon Intake", "Age upon Intake", "Outcome Type", "Outcome Subtype", "Sex upon Outcome", "Age upon Outcome", "Animal Type", "Breed"]

for col in categorical_columns:
    print(f"Unique values for {col}: {df_joined[col].unique()}")
    print("\n")

# Display the number of missing values in each column
print(df_joined.isna().sum())


display(df_joined.loc[df_joined['Sex upon Outcome'] == "Unknown"])

Unique values for Intake Type: ['Stray' 'Public Assist' 'Owner Surrender' 'Abandoned' 'Wildlife'
 'Euthanasia Request']


Unique values for Intake Condition: ['Normal' 'Sick' 'Injured' 'Pregnant' 'Neonatal' 'Nursing' 'Aged'
 'Unknown' 'Med Attn' 'Medical' 'Other' 'Feral' 'Behavior' 'Med Urgent'
 'Parvo' 'Space' 'Agonal' 'Neurologic' 'Panleuk' 'Congenital']


Unique values for Sex upon Intake: ['Neutered Male' 'Spayed Female' 'Intact Male' 'Intact Female']


Unique values for Age upon Intake: ['2 years' '8 years' '11 months' '4 weeks' '4 years' '6 years' '6 months'
 '5 months' '1 month' '14 years' '2 weeks' '1 week' '2 months' '18 years'
 '9 years' '4 months' '1 day' '1 year' '3 years' '5 years' '15 years'
 '8 months' '6 days' '7 years' '3 months' '12 years' '3 weeks' '9 months'
 '10 years' '10 months' '7 months' '0 years' '1 weeks' '5 days' '17 years'
 '11 years' '4 days' '2 days' '3 days' '13 years' '5 weeks' '16 years'
 '19 years' '20 years' '-1 years' '-3 years' '-4 years' '22 years

Unnamed: 0,Animal ID,DateTime_intake,MonthYear_intake,Found Location,Intake Type,Intake Condition,Sex upon Intake,Age upon Intake,DateTime_outcome,MonthYear_outcome,Date of Birth,Outcome Type,Outcome Subtype,Sex upon Outcome,Age upon Outcome,Name,Animal Type,Breed,Color,duration_in_shelter,age_upon_intake_years,fixed,fixed Intake,fixed Outcome


In [29]:
# TODO:
### 1) Convert Age upon intake from string to numeric, parse the strings of the form X (years/months) 

age_strings = df_joined['Age upon Intake'].str.split(' ', expand=True)
age_strings.columns = ['age', 'unit']
age_strings['age'] = pd.to_numeric(age_strings['age'])
age_strings['unit'] = age_strings['unit'].str.replace('s', '')

# Convert to years
age_strings.loc[age_strings['unit'] == 'month', 'age'] /= 12
age_strings.loc[age_strings['unit'] == 'week', 'age'] /= 52
age_strings.loc[age_strings['unit'] == 'day', 'age'] /= 365

df_joined['age_upon_intake_years'] = age_strings['age']

### 2) Convert NaN Names to Unknown name or stay as nan

### 3) Create binary indicator flags for fixed/intact, male/female
df_joined['fixed Intake'] = df_joined['Sex upon Intake'].str.contains('Neutered|Spayed')
df_joined['fixed Outcome'] = df_joined['Sex upon Outcome'].str.contains('Neutered|Spayed')

# OR fixed Intake and fixed Outcome columns together
df_joined['fixed'] = df_joined['fixed Intake'] | df_joined['fixed Outcome']

df_joined[]
display(df_joined)
# Drop nan sex upon intake and age upon intake rows
# Figure out how many durations are negative and drop from dataframe
# Convert Found location to coordinates

Unnamed: 0,Animal ID,DateTime_intake,MonthYear_intake,Found Location,Intake Type,Intake Condition,Sex upon Intake,Age upon Intake,DateTime_outcome,MonthYear_outcome,Date of Birth,Outcome Type,Outcome Subtype,Sex upon Outcome,Age upon Outcome,Name,Animal Type,Breed,Color,duration_in_shelter,age_upon_intake_years,fixed,fixed Intake,fixed Outcome
0,A786884,2019-01-03 16:19:00,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Neutered Male,2 years,2019-01-08 15:11:00,Jan 2019,01/03/2017,Transfer,Partner,Neutered Male,2 years,*Brock,Dog,Beagle Mix,Tricolor,4 days 22:52:00,2.000000,True,True,True
1,A706918,2015-07-05 12:59:00,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Spayed Female,8 years,2015-07-05 15:13:00,Jul 2015,07/05/2007,Return to Owner,,Spayed Female,8 years,Belle,Dog,English Springer Spaniel,White/Liver,0 days 02:14:00,8.000000,True,True,True
2,A724273,2016-04-14 18:43:00,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Intact Male,11 months,2016-04-21 17:17:00,Apr 2016,04/17/2015,Return to Owner,,Neutered Male,1 year,Runster,Dog,Basenji Mix,Sable/White,6 days 22:34:00,0.916667,True,False,True
3,A665644,2013-10-21 07:59:00,October 2013,Austin (TX),Stray,Sick,Intact Female,4 weeks,2013-10-21 11:39:00,Oct 2013,09/21/2013,Transfer,Partner,Intact Female,4 weeks,,Cat,Domestic Shorthair Mix,Calico,0 days 03:40:00,0.076923,False,False,False
4,A857105,2022-05-12 00:23:00,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Neutered Male,2 years,2022-05-12 14:35:00,May 2022,05/12/2020,Transfer,Partner,Neutered Male,2 years,Johnny Ringo,Cat,Domestic Shorthair,Orange Tabby,0 days 14:12:00,2.000000,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214840,A915292,2024-10-14 12:27:00,October 2024,9009 Galewood Dr in Austin (TX),Stray,Normal,Intact Male,1 month,2024-11-04 17:20:00,Nov 2024,09/04/2024,Adoption,,Neutered Male,1 month,Chozen Toguchi,Cat,Domestic Shorthair,Black/White,21 days 04:53:00,0.083333,True,False,True
214841,A916560,2024-10-28 11:25:00,October 2024,Lavaca St in Austin (TX),Stray,Normal,Intact Female,3 months,2024-11-04 17:57:00,Nov 2024,07/28/2024,Adoption,,Spayed Female,3 months,Fajr,Cat,Domestic Shorthair,Brown Tabby,7 days 06:32:00,0.250000,True,False,True
214842,A915792,2024-10-19 18:41:00,October 2024,183 And Loyola in Austin (TX),Stray,Normal,Intact Female,1 month,2024-11-04 18:02:00,Nov 2024,08/29/2024,Adoption,,Spayed Female,2 months,Muffin,Cat,Domestic Shorthair,Brown Tabby/White,15 days 23:21:00,0.083333,True,False,True
214843,A869055,2024-10-31 11:36:00,October 2024,9323 Menchaca Road in Austin (TX),Stray,Medical,Neutered Male,2 years,2022-11-17 12:14:00,Nov 2022,06/12/2022,Transfer,Partner,Neutered Male,5 months,Peanut Butter Cup,Dog,Pit Bull,Brown,-714 days +00:38:00,2.000000,True,True,True
