In [66]:
import pandas as pd
import numpy as np

In [47]:
# Load the CSV file
file_path = 'Austin_Animal_Center_Outcomes_20241205.csv'
data = pd.read_csv(file_path)

In [48]:
# Display data types of each column
data_types = data.dtypes
print("Data types of each column:")
print(data_types)

Data types of each column:
Animal ID           object
Name                object
DateTime            object
MonthYear           object
Date of Birth       object
Outcome Type        object
Outcome Subtype     object
Animal Type         object
Sex upon Outcome    object
Age upon Outcome    object
Breed               object
Color               object
dtype: object


In [49]:
# Count missing entries for each column
missing_entries = data.isnull().sum()
print("\nCount of missing entries for each column:")
print(missing_entries)



Count of missing entries for each column:
Animal ID               0
Name                48588
DateTime                0
MonthYear               0
Date of Birth           0
Outcome Type           45
Outcome Subtype     91639
Animal Type             0
Sex upon Outcome        3
Age upon Outcome        7
Breed                   0
Color                   0
dtype: int64


In [50]:
# Remove rows with missing values in 'Outcome Type', 'Sex upon Outcome', and 'Age upon Outcome'
data = data.dropna(subset=['Outcome Type', 'Sex upon Outcome', 'Age upon Outcome'])

# Verify if there are still missing values in the dataset after cleaning
missing_summary_after_cleaning = cleaned_data.isnull().sum()
print(missing_summary_after_cleaning)

Animal ID               0
Name                48566
DateTime                0
MonthYear               0
Date of Birth           0
Outcome Type            0
Outcome Subtype     91594
Animal Type             0
Sex upon Outcome        0
Age upon Outcome        0
Breed                   0
Color                   0
dtype: int64


In [51]:
unique_age = data['Age upon Outcome'].unique()
unique_age


array(['3 months', '2 years', '1 year', '4 months', '6 days', '7 years',
       '2 months', '3 years', '2 days', '3 weeks', '11 years', '9 months',
       '4 weeks', '2 weeks', '6 months', '9 years', '10 years', '1 month',
       '8 years', '7 months', '6 years', '4 years', '12 years', '5 years',
       '1 day', '1 weeks', '5 months', '5 days', '15 years', '10 months',
       '4 days', '16 years', '8 months', '13 years', '1 week', '14 years',
       '11 months', '3 days', '0 years', '5 weeks', '17 years',
       '18 years', '20 years', '22 years', '-2 years', '19 years',
       '28 years', '-3 years', '23 years', '25 years', '30 years',
       '-1 years', '24 years', '21 years', '-4 years'], dtype=object)

In [52]:
# Drop rows with negative values in the "Age upon Outcome" column
data= data[~data['Age upon Outcome'].str.contains('-', na=False)]
unique_age

array(['3 months', '2 years', '1 year', '4 months', '6 days', '7 years',
       '2 months', '3 years', '2 days', '3 weeks', '11 years', '9 months',
       '4 weeks', '2 weeks', '6 months', '9 years', '10 years', '1 month',
       '8 years', '7 months', '6 years', '4 years', '12 years', '5 years',
       '1 day', '1 weeks', '5 months', '5 days', '15 years', '10 months',
       '4 days', '16 years', '8 months', '13 years', '1 week', '14 years',
       '11 months', '3 days', '0 years', '5 weeks', '17 years',
       '18 years', '20 years', '22 years', '-2 years', '19 years',
       '28 years', '-3 years', '23 years', '25 years', '30 years',
       '-1 years', '24 years', '21 years', '-4 years'], dtype=object)

In [53]:
# Show the number of different outcome types
num_unique_outcome_types = data['Outcome Type'].nunique()
print("\nNumber of different outcome types:")
print(num_unique_outcome_types)


Number of different outcome types:
11


In [54]:
# Show unique values in 'Outcome Type' column
unique_outcome_types = data['Outcome Type'].unique()
print("\nUnique values in 'Outcome Type' column:")
print(unique_outcome_types)


Unique values in 'Outcome Type' column:
['Adoption' 'Rto-Adopt' 'Euthanasia' 'Transfer' 'Return to Owner' 'Died'
 'Disposal' 'Missing' 'Relocate' 'Stolen' 'Lost']


In [55]:
# Create a new column with 2 outcome types: 'Adopted' or 'Not Adopted'
simplified_outcome = []
for outcome in data['Outcome Type']:
    if outcome in ['Adoption', 'Rto-Adopt', 'Return to Owner']:
        simplified_outcome.append('Adopted')
    else:
        simplified_outcome.append('Not Adopted')

data['Simplified Outcome'] = simplified_outcome

In [56]:
data.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Simplified Outcome
0,A882831,*Hamilton,07/01/2023 06:12:00 PM,Jul 2023,03/25/2023,Adoption,,Cat,Neutered Male,3 months,Domestic Shorthair Mix,Black/White,Adopted
1,A794011,Chunk,05/08/2019 06:20:00 PM,May 2019,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,Adopted
2,A776359,Gizmo,07/18/2018 04:02:00 PM,Jul 2018,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,Adopted
3,A821648,,08/16/2020 11:38:00 AM,Aug 2020,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,Not Adopted
4,A720371,Moose,02/13/2016 05:59:00 PM,Feb 2016,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,Adopted


In [57]:
# Save the cleaned data to a new CSV file
data.to_csv('Austin_Animal_Center_Outcomes_Cleaned.csv', index=False)


In [58]:
# unique values in Animal Type
unique_animal_types = data['Animal Type'].unique()
unique_animal_types

array(['Cat', 'Dog', 'Other', 'Bird', 'Livestock'], dtype=object)

In [59]:
other_breed_counts = data[data['Animal Type'] == 'Other']['Breed'].value_counts()
other_breed_counts

Breed
Bat               2267
Bat Mix           1754
Raccoon            800
Raccoon Mix        539
Guinea Pig         536
                  ... 
Lop-Mini/Hotot       1
Ringtail             1
Otter Mix            1
Deer Mix             1
Frog                 1
Name: count, Length: 137, dtype: int64

In [60]:
#Count total simplified outcome and group by 'Animal Type', 'Sex upon Outcome', and 'Age upon Outcome'
simplified_outcome_counts = data['Simplified Outcome'].value_counts()
counts_by_animal_type = data.groupby(['Animal Type', 'Simplified Outcome']).size()
counts_by_sex = data.groupby(['Sex upon Outcome', 'Simplified Outcome']).size()
counts_by_age = data.groupby(['Age upon Outcome', 'Simplified Outcome']).size()

simplified_outcome_counts


Simplified Outcome
Adopted        108384
Not Adopted     60917
Name: count, dtype: int64

In [61]:
counts_by_animal_type

Animal Type  Simplified Outcome
Bird         Adopted                 366
             Not Adopted             487
Cat          Adopted               37502
             Not Adopted           29883
Dog          Adopted               69455
             Not Adopted           22823
Livestock    Adopted                  23
             Not Adopted              11
Other        Adopted                1038
             Not Adopted            7713
dtype: int64

In [62]:
counts_by_sex

Sex upon Outcome  Simplified Outcome
Intact Female     Adopted                5221
                  Not Adopted           16105
Intact Male       Adopted                6524
                  Not Adopted           15257
Neutered Male     Adopted               49943
                  Not Adopted            9377
Spayed Female     Adopted               46165
                  Not Adopted            7590
Unknown           Adopted                 531
                  Not Adopted           12588
dtype: int64

In [63]:
counts_by_age

Age upon Outcome  Simplified Outcome
0 years           Adopted                 16
                  Not Adopted            251
1 day             Adopted                 18
                  Not Adopted            474
1 month           Adopted               3210
                                        ... 
8 years           Not Adopted            794
9 months          Adopted               1228
                  Not Adopted            483
9 years           Adopted               1252
                  Not Adopted            365
Length: 95, dtype: int64

In [68]:
# Define a mapping of age units to days
age_mapping = {
    'year': 365,
    'years': 365,
    'month': 30,
    'months': 30,
    'week': 7,
    'weeks': 7,
    'day': 1,
    'days': 1
}
# Function to convert age to days
def convert_age_to_days(age_str):
    if pd.isnull(age_str):
        return np.nan
    parts = age_str.split()
    if len(parts) != 2:
        return np.nan
    number, unit = int(parts[0]), parts[1].lower()
    return number * age_mapping.get(unit, np.nan)
# Apply the function to the "Age upon Outcome" column
data['Age in Days'] = data['Age upon Outcome'].apply(convert_age_to_days)
data.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Simplified Outcome,Age in Days
0,A882831,*Hamilton,07/01/2023 06:12:00 PM,Jul 2023,03/25/2023,Adoption,,Cat,Neutered Male,3 months,Domestic Shorthair Mix,Black/White,Adopted,90
1,A794011,Chunk,05/08/2019 06:20:00 PM,May 2019,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,Adopted,730
2,A776359,Gizmo,07/18/2018 04:02:00 PM,Jul 2018,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,Adopted,365
3,A821648,,08/16/2020 11:38:00 AM,Aug 2020,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,Not Adopted,365
4,A720371,Moose,02/13/2016 05:59:00 PM,Feb 2016,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,Adopted,120


In [69]:
# Save the cleaned data to a new CSV file again
data.to_csv('Austin_Animal_Center_Outcomes_Cleaned.csv', index=False)
