# Imports

In [1]:
import pandas as pd
import numpy as np
import plotly as plt
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Reading Raw Data

In [2]:
aac_df = pd.read_csv("../raw_data/aac_shelter_outcomes.csv")

In [3]:
aac_df.duplicated().sum()

10

In [4]:
aac_df.drop_duplicates(inplace=True)

In [5]:
aac_df.rename(columns = {'datetime':'outcome_datetime', 'monthyear':'outcome_monthyear', }, inplace = True)

In [6]:
aac_df['animal_type'].value_counts()

Dog          44240
Cat          29415
Other         4248
Bird           334
Livestock        9
Name: animal_type, dtype: int64

In [None]:
intakes_df = pd.read_csv("../raw_data/aac_shelter_intakes.csv")

In [None]:
intakes_df.columns = ['animal_id', 'name', 'intake_datetime', 'intake_monthyear', 'found_location', 'intake_type', 'intake_condition', 'animal_type', 'sex_upon_intake', 'age_upon_intake', 'breed', 'color']

In [None]:
intakes_df.drop_duplicates(inplace=True)

In [None]:
intakes_df.drop(labels=['name','animal_type', 'breed', 'color'], axis=1, inplace=True)

In [None]:
aac_new_df = aac_df.merge(intakes_df, on='animal_id', how='left')

In [None]:
aac_new_df.isna().sum()

# Extracting Dog Data Only

In [None]:
dogs_df = aac_new_df[aac_new_df['animal_type'] == 'Dog'].reset_index()
del dogs_df['index']

In [None]:
dogs_df.head()

In [None]:
dogs_df.isna().sum()

# Feature Engineering

Each row represents one outcome, i.e. a dog leaving the shelter. Adding a count column helps to denote the number of observations in each group.

In [None]:
dogs_df['count'] = 1

The sex_upon_outcome column combines two information, the sex of the dog and whether it was Spayed/Neutered. This Variable could be split in two to allow for more granular data analysis.

In [None]:
dogs_df['sex'] = np.where(dogs_df['sex_upon_outcome'].str.contains('Male'), 'Male', 'Female')
dogs_df['sterilized'] = np.where(dogs_df['sex_upon_outcome'].str.contains('Intact'), 'No', 'Yes')

The dog's age upon outcome is given in a format that is not very suitable for data analysis. It is given as an estimation in "days", "weeks", "months" and "years". 

In [None]:
dogs_df['age_upon_outcome'].head()

To make it more suitable for analysis, the data needs to be converted to total number of days and then can be converted to months or years as needed.

In [None]:
dogs_df = dogs_df[dogs_df['age_upon_outcome'] != 'NULL']
dogs_df['periods'], dogs_df['period_range'] = dogs_df['age_upon_outcome'].str.split(' ').str[0].fillna(0).astype(int), dogs_df['age_upon_outcome'].str.split(' ').str[1].fillna(0)

In [None]:
dogs_df['period_range'].unique()

In [None]:
dogs_df['period_range'] = np.where(dogs_df['period_range'].str.contains('day'), 1, 
                                   np.where(dogs_df['period_range'].str.contains('week'), 7, 
                                            np.where(dogs_df['period_range'].str.contains('month'), 30, 
                                                     np.where(dogs_df['period_range'].str.contains('year'), 365, 0)))).astype(int)

dogs_df['outcome_age_days'] = dogs_df['period_range'] * dogs_df['periods']
dogs_df['outcome_age_years'] = dogs_df['outcome_age_days'] / 365

In [None]:
dogs_df

In [None]:
fig = px.histogram(dogs_df, x="outcome_age_years",  nbins=15)
fig.show()

In the shelter environment, puppies, defined as dogs of approximately 1 year of age or less, are adopted much quicker than adult dogs.

In [None]:
dogs_df['classification_upon_outcome'] = np.where(dogs_df['outcome_age_years'] >= 1, 'Dog', 'Puppy')

In [None]:
dogs_df

Dog breed

In [None]:
dogs_df['breed'].value_counts()

In [None]:
fig = px.histogram(dogs_df, x="breed", )
fig.show()

In [None]:
dogs_df['breed'] = dogs_df['breed'].str.lower().str.replace('mix', '')

In [None]:
dogs_df['breed'].value_counts()

In [None]:
dogs_df = pd.concat([dogs_df, 
                     dogs_df['breed'].str.split('/', expand=True).rename(columns={0:'breed1', 1:'breed2'})], 
                    axis=1)

In [None]:
dogs_df['breed1'].value_counts()

In [None]:
dogs_df['breed2'].value_counts()