# Basic classification model of outcome

This notebook performs a simple binary classification on the outcomes of cats using a `logistic regression`.

## Imports

In [28]:
import datetime
import pandas as pd
import numpy as np
import plotly as plt
import seaborn as sns
import plotly.express as px

from sklearn.impute import SimpleImputer

## Data loading and cleaning

In [47]:
aac_eng_df = pd.read_csv("../raw_data/aac_data/aac_shelter_cat_outcome_eng.csv")

In [48]:
# Quick cleaning of aac_eng_df
aac_eng_df.drop(columns=['count'], inplace = True)
aac_eng_df.drop(columns = ['breed2', 'color2'], inplace = True)
aac_eng_df.coat.replace('brown ', 'brown', inplace = True)
rows_to_drop = aac_eng_df[aac_eng_df.outcome_type.isna()]['outcome_type'].index
aac_eng_df.drop(index=rows_to_drop, inplace = True)
aac_eng_df = aac_eng_df.drop_duplicates(inplace=False).reset_index()

def has_name(x):
    if x != 0:
        return 1
    return x

#aac_eng_df.name = aac_eng_df.name.replace(np.nan, 0)
aac_eng_df['has_name'] = aac_eng_df.name
aac_eng_df['has_name'] = aac_eng_df['has_name'].replace(np.nan, 0)
aac_eng_df['has_name'] = aac_eng_df['has_name'].apply(has_name)

# Converting True False to 1 0
aac_eng_df.cfa_breed = aac_eng_df.cfa_breed.apply(lambda x: 0 if (x == False) else 1)

# Converting True False to 1 0
aac_eng_df.domestic_breed = aac_eng_df.domestic_breed.apply(lambda x: 0 if (x == False) else 1)

In [49]:
# Get aac_eng_binary
aac_eng_binary = aac_eng_df

def get_adopted_or_not(x):
    if x == 'Adoption':
        return 1
    return 0

aac_eng_binary.outcome_type = aac_eng_binary.outcome_type.apply(get_adopted_or_not)
aac_eng_binary.drop(columns='outcome_subtype', inplace = True)

## Data preparation for modelling

### Imputing missing values with most frequent

In [50]:
aac_eng_binary.isna().sum()

index                       0
age_upon_outcome            0
animal_id                   0
animal_type                 0
breed                       0
color                    3625
date_of_birth               0
datetime                    0
monthyear                   0
name                    12771
outcome_type                0
sex_upon_outcome            0
sex                         0
Spay/Neuter                 0
Periods                     0
Period Range                0
outcome_age_(days)          0
outcome_age_(years)         0
Cat/Kitten (outcome)        0
sex_age_outcome             0
age_group                   0
dob_year                    0
dob_month                   0
dob_monthyear               0
outcome_month               0
outcome_year                0
outcome_weekday             0
outcome_hour                0
breed1                      0
cfa_breed                   0
domestic_breed              0
coat_pattern            10261
color1                      0
coat      

In [51]:
# Remove name column for now
aac_eng_binary.drop(columns='name', inplace = True)
columns = aac_eng_binary.columns

In [52]:
imp_most_fq = SimpleImputer(missing_values = np.nan, strategy='most_frequent')

In [53]:
aac_eng_binary = pd.DataFrame(imp_most_fq.fit_transform(aac_eng_binary), columns=columns)

In [54]:
aac_eng_binary

Unnamed: 0,index,age_upon_outcome,animal_id,animal_type,breed,color,date_of_birth,datetime,monthyear,outcome_type,...,outcome_year,outcome_weekday,outcome_hour,breed1,cfa_breed,domestic_breed,coat_pattern,color1,coat,has_name
0,0,2 weeks,A684346,Cat,domestic shorthair,orange,2014-07-07 00:00:00,2014-07-22 16:04:00,2014-07-22T16:04:00,0,...,2014,Tuesday,16,domestic shorthair,0,1,tabby,orange,orange,0
1,1,1 month,A685067,Cat,domestic shorthair,blue /white,2014-06-16 00:00:00,2014-08-14 18:45:00,2014-08-14T18:45:00,1,...,2014,Thursday,18,domestic shorthair,0,1,tabby,blue,blue,1
2,2,3 months,A678580,Cat,domestic shorthair,white/black,2014-03-26 00:00:00,2014-06-29 17:45:00,2014-06-29T17:45:00,1,...,2014,Sunday,17,domestic shorthair,0,1,tabby,white,white,1
3,3,1 year,A675405,Cat,domestic mediumhair,black/white,2013-03-27 00:00:00,2014-03-28 14:55:00,2014-03-28T14:55:00,0,...,2014,Friday,14,domestic mediumhair,0,1,tabby,black,black,1
4,4,3 weeks,A670420,Cat,domestic shorthair,black/white,2013-12-16 00:00:00,2014-01-09 19:29:00,2014-01-09T19:29:00,0,...,2014,Thursday,19,domestic shorthair,0,1,tabby,black,black,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29406,29416,2 months,A758112,Cat,american shorthair,blue /white,2017-09-14 00:00:00,2017-12-10 17:44:00,2017-12-10T17:44:00,1,...,2017,Sunday,17,american shorthair,1,0,tabby,blue,blue,1
29407,29417,1 month,A758569,Cat,domestic shorthair,brown /white,2017-08-03 00:00:00,2017-09-24 11:57:00,2017-09-24T11:57:00,1,...,2017,Sunday,11,domestic shorthair,0,1,tabby,brown,brown,1
29408,29418,1 year,A765938,Cat,domestic shorthair,brown,2017-01-30 00:00:00,2018-02-01 17:51:00,2018-02-01T17:51:00,0,...,2018,Thursday,17,domestic shorthair,0,1,tabby,brown,brown,0
29409,29419,6 months,A765832,Cat,domestic shorthair,brown,2017-07-28 00:00:00,2018-02-01 16:37:00,2018-02-01T16:37:00,1,...,2018,Thursday,16,domestic shorthair,0,1,tortie,Breed Specific,tortie,1


In [55]:
aac_eng_binary.isna().sum()

index                   0
age_upon_outcome        0
animal_id               0
animal_type             0
breed                   0
color                   0
date_of_birth           0
datetime                0
monthyear               0
outcome_type            0
sex_upon_outcome        0
sex                     0
Spay/Neuter             0
Periods                 0
Period Range            0
outcome_age_(days)      0
outcome_age_(years)     0
Cat/Kitten (outcome)    0
sex_age_outcome         0
age_group               0
dob_year                0
dob_month               0
dob_monthyear           0
outcome_month           0
outcome_year            0
outcome_weekday         0
outcome_hour            0
breed1                  0
cfa_breed               0
domestic_breed          0
coat_pattern            0
color1                  0
coat                    0
has_name                0
dtype: int64

### Dropping useless columns

In [56]:
aac_eng_binary.drop(columns=['index', 'animal_id', 'animal_type'], inplace = True)

### Handle datetime columns

In [57]:
aac_eng_binary.dtypes

age_upon_outcome        object
breed                   object
color                   object
date_of_birth           object
datetime                object
monthyear               object
outcome_type            object
sex_upon_outcome        object
sex                     object
Spay/Neuter             object
Periods                 object
Period Range            object
outcome_age_(days)      object
outcome_age_(years)     object
Cat/Kitten (outcome)    object
sex_age_outcome         object
age_group               object
dob_year                object
dob_month               object
dob_monthyear           object
outcome_month           object
outcome_year            object
outcome_weekday         object
outcome_hour            object
breed1                  object
cfa_breed               object
domestic_breed          object
coat_pattern            object
color1                  object
coat                    object
has_name                object
dtype: object