# Austin Animal Center Shelter - Plotly Summer Challenge

## Imports

In [81]:
import pandas as pd
import numpy as np
from AAC_challenge import data
from AAC_challenge import plots

import plotly as plt
import seaborn as sns
import plotly.express as px

import warnings
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data loading and basic cleaning

In [89]:
aac_eng_df = data.get_clean_cat_dataset('cats')

In [91]:
aac_eng_df

Unnamed: 0,animal_id,breed,color,date_of_birth,outcome_datetime,outcome_type,sex,sterilized,periods,period_range,...,cfa_breed,domestic_breed,coat_pattern,main_color,coat,has_name,top_breeds,top_coats,adopted_or_not,outcome_year_month
0,A684346,domestic shorthair,orange,2014-07-07,2014-07-22,Transfer,0,0,2,7,...,0,1,tabby,orange,orange,0,domestic shorthair,orange,0,2014-07-01
1,A685067,domestic shorthair,blue /white,2014-06-16,2014-08-14,Adoption,1,0,1,30,...,0,1,tabby,blue,blue,1,domestic shorthair,blue,1,2014-08-01
2,A678580,domestic shorthair,white/black,2014-03-26,2014-06-29,Adoption,1,1,3,30,...,0,1,tabby,white,white,1,domestic shorthair,white,1,2014-06-01
3,A675405,domestic mediumhair,black/white,2013-03-27,2014-03-28,Return to Owner,1,1,1,365,...,0,1,tabby,black,black,1,domestic mediumhair,black,0,2014-03-01
4,A670420,domestic shorthair,black/white,2013-12-16,2014-01-09,Transfer,0,0,3,7,...,0,1,tabby,black,black,0,domestic shorthair,black,0,2014-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29406,A758112,american shorthair,blue /white,2017-09-14,2017-12-10,Adoption,1,0,2,30,...,1,0,tabby,blue,blue,1,american shorthair,blue,1,2017-12-01
29407,A758569,domestic shorthair,brown /white,2017-08-03,2017-09-24,Adoption,0,0,1,30,...,0,1,tabby,brown,brown,1,domestic shorthair,brown,1,2017-09-01
29408,A765938,domestic shorthair,brown,2017-01-30,2018-02-01,Transfer,0,1,1,365,...,0,1,tabby,brown,brown,0,domestic shorthair,brown,0,2018-02-01
29409,A765832,domestic shorthair,brown,2017-07-28,2018-02-01,Adoption,1,1,6,30,...,0,1,tortie,Breed Specific,tortie,1,domestic shorthair,tortie,1,2018-02-01


## Get and clean intake data

In [92]:
intakes_df = data.load_csv('intakes')
intakes_df = data.clean_intake_dataset(intakes_df, 'cats')
intakes_df.head()

Unnamed: 0,animal_id,intake_datetime,intake_month_year,found_location,intake_type,intake_condition,age_upon_intake,has_name,sex_intake,sterilized_intake
3,A665644,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,4 weeks,0,1,0
7,A844350,10/15/2021 11:40:00 AM,October 2021,2112 East William Cannon Drive in Austin (TX),Stray,Normal,6 months,1,1,0
9,A818975,06/18/2020 02:53:00 PM,June 2020,Braker Lane And Metric in Travis (TX),Stray,Normal,4 weeks,0,0,0
10,A774147,06/11/2018 07:45:00 AM,June 2018,6600 Elm Creek in Austin (TX),Stray,Injured,4 weeks,0,1,0
11,A731435,08/08/2016 05:52:00 PM,August 2016,Austin (TX),Owner Surrender,Normal,5 months,1,0,1


## Merge outcomes to intakes on animal_id

In [115]:
merged_df = aac_eng_df.merge(intakes_df, how='left', on='animal_id')
merged_df.shape

(32373, 35)

In [116]:
merged_df = data.merge_intakes_outcomes()
merged_df

Unnamed: 0,animal_id,breed,color,date_of_birth,outcome_datetime,outcome_type,sex,sterilized_outcome,periods,period_range,...,top_coats,adopted_or_not,outcome_year_month,intake_datetime,intake_month_year,found_location,intake_type,intake_condition,intake_age_days,sterilized_intake
0,A684346,domestic shorthair,orange,2014-07-07,2014-07-22,Transfer,0,0,2,7,...,orange,0,2014-07-01,07/22/2014 12:00:00 PM,July 2014,45065 Felicity Ln in Austin (TX),Stray,Nursing,14.0,0.0
1,A685067,domestic shorthair,blue /white,2014-06-16,2014-08-14,Adoption,1,0,1,30,...,blue,1,2014-08-01,08/01/2014 04:52:00 PM,August 2014,1901 Crossing Place in Austin (TX),Stray,Normal,30.0,0.0
2,A678580,domestic shorthair,white/black,2014-03-26,2014-06-29,Adoption,1,1,3,30,...,white,1,2014-06-01,05/10/2014 01:54:00 PM,May 2014,7707 S Ih 35 in Austin (TX),Stray,Normal,30.0,0.0
3,A675405,domestic mediumhair,black/white,2013-03-27,2014-03-28,Return to Owner,1,1,1,365,...,black,0,2014-03-01,03/27/2014 11:06:00 AM,March 2014,Ohlen And Research in Austin (TX),Stray,Normal,365.0,1.0
4,A670420,domestic shorthair,black/white,2013-12-16,2014-01-09,Transfer,0,0,3,7,...,black,0,2014-01-01,01/07/2014 06:27:00 PM,January 2014,4400 S Ih 35 in Austin (TX),Stray,Normal,21.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32368,A758112,american shorthair,blue /white,2017-09-14,2017-12-10,Adoption,1,0,2,30,...,blue,1,2017-12-01,09/10/2017 04:21:00 PM,September 2017,5600 Abilene Trail in Austin (TX),Stray,Normal,0.0,0.0
32369,A758569,domestic shorthair,brown /white,2017-08-03,2017-09-24,Adoption,0,0,1,30,...,brown,1,2017-09-01,09/18/2017 05:13:00 PM,September 2017,727 West Stassney Lane in Austin (TX),Stray,Normal,30.0,0.0
32370,A765938,domestic shorthair,brown,2017-01-30,2018-02-01,Transfer,0,1,1,365,...,brown,0,2018-02-01,01/30/2018 12:28:00 PM,January 2018,3462 Willowrun in Austin (TX),Stray,Sick,365.0,0.0
32371,A765832,domestic shorthair,brown,2017-07-28,2018-02-01,Adoption,1,1,6,30,...,tortie,1,2018-02-01,01/28/2018 11:52:00 AM,January 2018,Burton Drive And Oltorf Lane in Austin (TX),Stray,Normal,180.0,0.0


In [117]:
merged_df.columns

Index(['animal_id', 'breed', 'color', 'date_of_birth', 'outcome_datetime',
       'outcome_type', 'sex', 'sterilized_outcome', 'periods', 'period_range',
       'outcome_age_(days)', 'outcome_age_(years)', 'kitten_outcome',
       'outcome_month', 'outcome_year', 'outcome_weekday', 'cfa_breed',
       'domestic_breed', 'coat_pattern', 'main_color', 'coat', 'has_name',
       'top_breeds', 'top_coats', 'adopted_or_not', 'outcome_year_month',
       'intake_datetime', 'intake_month_year', 'found_location', 'intake_type',
       'intake_condition', 'intake_age_days', 'sterilized_intake'],
      dtype='object')

## Basic data visualization

### `coat` and `sex`


The four most common coats are `black`, `brown`, `blue`, and `orange` and fairly even between male and female.

The coats `tortie` and `calico` are exclusively females.

In [49]:
coats_sorted = aac_eng_df.groupby('coat', as_index=False).count()[['coat', 'sex']].sort_values('sex', ascending=False)
top_coats = list(coats_sorted[coats_sorted['sex'] >= 100]['coat'])

In [53]:
fig = px.histogram(aac_eng_df, x="top_breeds", color = 'sex', title= "All outcomes")
fig.show()

In [52]:
fig = px.histogram(aac_eng_adopted, x="coat", color = 'sex', title= "Adoptions only")
fig.show()

In [11]:
fig = px.histogram(aac_eng_df, x="coat_pattern", color = 'adopted_or_not', title= "Adopted or not")
fig.show()

In [12]:
fig = px.histogram(aac_eng_df, x="cfa_breed", color = 'adopted_or_not', title= "Adopted or not")
fig.show()

In [13]:
fig = px.histogram(aac_eng_df, x="domestic_breed", color = 'adopted_or_not', title= "adopted_or_not")
fig.show()

### `age`

Most outcomes are less than `6 months` old and exponentially decrease with age. Across all ages, outcomes are fairly evenly distributed between male and female.

In [14]:
plots.get_age_histogram(aac_eng_df)

In [15]:
plots.get_age_histogram(aac_eng_df, adoptions_only=True)

### `sterilized` 

Outcomes are primarily transfers and adoptions. For adoptions, 90% of the cats are spayed/neutered.

In [16]:
plots.get_sterilized_histogram(aac_eng_df)

### `outcome_weekday`

In [17]:
plots.get_weekday_histogram(aac_eng_df)

**Overall, there are more adoptions on Saturdays (and Sundays) and less adoptions on Thurdays.**

### `breed`

In [18]:
plots.get_top_breeds_pie(aac_eng_df, adoptions_only=True)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



### `outcome_datetime`

Adoptions periodically peak in `July` and are the lowest in `March/April`.

In [19]:
plots.get_outcome_timeseries(aac_eng_df)

### `has_name`

**Most cats adopted have names!!**

In [20]:
plots.get_has_name_histogram(aac_eng_df)

## EDA summary

* There are as much males as there are females;
* Adoptions represent around 43% of all outcomes;
* The great majority of adopted cats are spayed/neutered and have a name;
* The most cats (therefore most adopted) have black, brown, and blue coats;
* The great majority of cats (therefore adopted cats) are 0 to 6 months old;
* There are more adoptions on Saturdays (and Sundays) and less on Thurdays;
* The most represented breeds of cats are domestic short hair, medium hair, long hair, and siamese;
* Adoptions peak in June/July and are the lowest in March/April;

**Next step -> transfer cleaning and plotting code to VS code** OK

**Next step -> suggest actions to increase chances of adoption**

**Next step -> predict outcome 1 if adopted else 0**