  ![ImdbIcon](images/pets.jpg)
# Petfinder: Predicting Adoption Speed 

In [41]:
import petpy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction import text

from nltk.sentiment.vader import SentimentIntensityAnalyzer

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import scipy.stats as stats
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [2]:
pd.set_option('display.max_columns', 500)

In [3]:
# Using petpy wrapper with petfinder API keys
#pf = petpy.Petfinder(key='mVBo96VLQBNZjBQJrO37FI52Rh32Hzl7x01Ie9VOGsuTBHdlga', secret='voFRNPPlmWoCIz8BtGaiLFlJgD58j6NaOu3kVqYp')

In [4]:
# Requesting my data from 2019 through 2020
#status3 = pf.animals(animal_type='dog', status='adopted', results_per_page=50, pages=100, location='CA', before_date = '2019-01-10 18:29:10', after_date='2018-12-31', return_df=True)

In [5]:
#status2.drop(columns=['primary_photo_cropped'], inplace=True)

In [6]:
# Had to make multiple requests, so concatenating them
#dogs1 = pd.concat((status2, status3))

In [7]:
#dogs.sort_values(by=['published_at'], inplace=True)

---------

## Imports and Data Cleaning

id: Unique pet ID \
organization_id: ID of shelter \
url: unique petfinder post url \
age: puppy(0-6 months), young(6-24 months), Adult(2-8 years), Senior(>8 years) \
gender: male(), female() \
size: small(0-25lbs), medium(26-60lbs), large(61-100lbs), extra large(>101lbs) \
coat: short, medium, long, wire, curly, hairless \
tags: key word descriptors for animal \
name \
description: unique description of dog, sometimes contains background, health, etc. \
photos: \
videos: has video (n:0, y:1) \
breeds.primary: primary dog breed \ 
breeds.secondary: secondary breed if applicable \
breeds.mixed: if a dog is purebred
colors.primary: black, white/cream, brown/chocolate, bicolor, apricot/beige, tricolor(brown, black, white), yellow/tan/blond/fawn, gray/blue/silver, brindle, red/chestnut/orange, golden, sable, merle(blue), merle(red), harlequin \
colors.secondary: coat color, dog is seen to have secondary (usually less dominant) color \
colors.tertiary: coat color, if dog is seen to have third (usually least dominant) color \
fixed: spayed/neutered n:0, y:1 \
house_trained: n:0, y:1 \
special_needs: n:0, y:1 \
shots_current: n:0, y:1 \
good_with_kids: n:0, y:1 \
good_with_dogs: n:0, y:1 \
good_with_cats: n:0, y:1 \
primary_photo_cropped.small:
primary_photo_cropped.medium:
primary_photo_cropped.large:
primary_photo_cropped.full: \
city: \
days_on_petfinder: 


In [8]:
dogs = pd.read_csv('./petfinder_data/dogsca.csv')

In [9]:
dogs.head()

Unnamed: 0,id,organization_id,url,age,gender,size,coat,tags,name,description,photos,videos,status,status_changed_at,published_at,breeds.primary,breeds.secondary,breeds.mixed,breeds.unknown,colors.primary,colors.secondary,colors.tertiary,attributes.spayed_neutered,attributes.house_trained,attributes.special_needs,attributes.shots_current,environment.children,environment.dogs,environment.cats,primary_photo_cropped.small,primary_photo_cropped.medium,primary_photo_cropped.large,primary_photo_cropped.full,contact.address.city,contact.address.state,contact.address.postcode,animal_id,organization_id.1
0,46980182,CA2604,https://www.petfinder.com/dog/candy-46980182/c...,Baby,Female,Small,Short,['Active playful sweet'],Candy,"Sweet and playful, loves to cuddle. Fixed utd ...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,[],adopted,2020-01-20T16:29:25+0000,2019-12-31T17:41:05+0000,Chihuahua,Miniature Pinscher,True,False,Black,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,Clovis,CA,93612,46980182,ca2604
1,46980481,CA2604,https://www.petfinder.com/dog/scott-46980481/c...,Baby,Male,Small,Short,['Loving playful'],Scott,Playful loving pup. \nLoves to snuggle \nFixed...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,[],adopted,2020-04-01T05:09:17+0000,2019-12-31T17:53:52+0000,Chihuahua,Miniature Pinscher,True,False,Black,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,Clovis,CA,93612,46980481,ca2604
2,46980586,CA2604,https://www.petfinder.com/dog/holly-46980586/c...,Baby,Female,Small,Short,['Loving playful active'],Holly,"Fun loving, sweet, loves to cuddle. \nFixed, ...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,[],adopted,2020-01-20T16:28:46+0000,2019-12-31T18:10:09+0000,Chihuahua,Miniature Pinscher,True,False,Black,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,Clovis,CA,93612,46980586,ca2604
3,47000121,CA487,https://www.petfinder.com/dog/chevy-chase-4700...,Adult,Male,Medium,Short,[],Chevy Chase!,Chevy will be at Petsmart in Tracy from 12-3 o...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,"[{'embed': '<iframe title=""Video"" src=""https:/...",adopted,2020-01-22T22:12:34+0000,2020-01-01T03:53:13+0000,Pit Bull Terrier,,True,False,Gray / Blue / Silver,White / Cream,,True,True,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,Tracy,CA,95304,47000121,ca487
4,47002335,CA2678,https://www.petfinder.com/dog/mocha-47002335/c...,Baby,Female,Medium,Short,"['Playfull', 'active', 'loving']",Mocha,Mocha is very much a puppy! We are very happy...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,[],adopted,2020-03-27T19:05:06+0000,2020-01-01T18:04:17+0000,Boxer,Shepherd,True,False,"Tricolor (Brown, Black, & White)",,,True,False,False,True,,True,True,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,modesto,CA,95358,47002335,ca2678


In [10]:
#dogs.drop(columns=['type', 'species', 'organization_animal_id', 'distance', 'attributes.declawed', 'contact.email', 'contact.phone', 'contact.address.address1',
                 # 'contact.address.address2', 'contact.address.country', 'animal_type', 'animal_id'], inplace=True)

In [11]:
#dogs.to_csv('./petfinder_data/dogsca.csv', index = False)

In [12]:
dogs['colors.tertiary'].value_counts()

White / Cream                       91
Yellow / Tan / Blond / Fawn         61
Brown / Chocolate                   36
Red / Chestnut / Orange              8
Tricolor (Brown, Black, & White)     6
Gray / Blue / Silver                 4
Golden                               4
Black                                3
Merle (Blue)                         2
Brindle                              1
Sable                                1
Harlequin                            1
Name: colors.tertiary, dtype: int64

In [13]:
dogs['published_at']= pd.to_datetime(dogs['published_at'])
dogs['status_changed_at']= pd.to_datetime(dogs['status_changed_at'])

In [14]:
dogs.status.value_counts()

adopted    4538
Name: status, dtype: int64

In [42]:
#dogs.isnull().sum()

In [16]:
dogs.rename(columns={"attributes.spayed_neutered": "fixed", "attributes.house_trained": "house_trained", 'attributes.special_needs': 'special_needs', 'attributes.shots_current': 'shots_current',
                  'environment.children': 'good_with_kids', 'environment.dogs': 'good_with_dogs', 'environment.cats': 'good_with_cats', 'contact.address.city': 'city', 'contact.address.postcode': 'zipcode' }, inplace=True)
dogs.head(1)

Unnamed: 0,id,organization_id,url,age,gender,size,coat,tags,name,description,photos,videos,status,status_changed_at,published_at,breeds.primary,breeds.secondary,breeds.mixed,breeds.unknown,colors.primary,colors.secondary,colors.tertiary,fixed,house_trained,special_needs,shots_current,good_with_kids,good_with_dogs,good_with_cats,primary_photo_cropped.small,primary_photo_cropped.medium,primary_photo_cropped.large,primary_photo_cropped.full,city,contact.address.state,zipcode,animal_id,organization_id.1
0,46980182,CA2604,https://www.petfinder.com/dog/candy-46980182/c...,Baby,Female,Small,Short,['Active playful sweet'],Candy,"Sweet and playful, loves to cuddle. Fixed utd ...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,[],adopted,2020-01-20 16:29:25+00:00,2019-12-31 17:41:05+00:00,Chihuahua,Miniature Pinscher,True,False,Black,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,Clovis,CA,93612,46980182,ca2604


In [17]:
dogs['city']=dogs['city'].replace('modesto','Modesto')

In [18]:
dogs.age.value_counts()

Adult     1761
Baby      1515
Young      994
Senior     268
Name: age, dtype: int64

In [19]:
#dogs.drop(columns=['animal_id', 'organization_id.1', 'status', 'status_changed_at', 'published_at', 'breeds.unknown', 'contact.address.state'], inplace=True)

## Feature Engineering

In [20]:
# Converting videos column to 1 if post has a video, 0 if not
dogs['videos'] = np.where(dogs['videos']!= '[]', 1, 0)

In [21]:
# Adding a column, 1 if post has photos, 0 if not
dogs['has_photo'] = np.where(dogs['photos']!= '[]', 1, 0)

In [22]:
# Dummifying categorical columns
pd.get_dummies(dogs, columns=['age', 'gender', 'size', 'coat', 'city'])

Unnamed: 0,id,organization_id,url,tags,name,description,photos,videos,status,status_changed_at,published_at,breeds.primary,breeds.secondary,breeds.mixed,breeds.unknown,colors.primary,colors.secondary,colors.tertiary,fixed,house_trained,special_needs,shots_current,good_with_kids,good_with_dogs,good_with_cats,primary_photo_cropped.small,primary_photo_cropped.medium,primary_photo_cropped.large,primary_photo_cropped.full,contact.address.state,zipcode,animal_id,organization_id.1,has_photo,age_Adult,age_Baby,age_Senior,age_Young,gender_Female,gender_Male,size_Extra Large,size_Large,size_Medium,size_Small,coat_Curly,coat_Hairless,coat_Long,coat_Medium,coat_Short,coat_Wire,city_Atwater,city_Cantua Creek,city_Chowchilla,city_Clovis,city_Escalon,city_Fowler,city_Fresno,city_Gustine,city_Hollister,city_Jackson,city_Jamestown,city_Kingsburg,city_LINDSAY,city_Lemoore,city_Los Banos,city_Madera,city_Manteca,city_Mariposa,city_Merced,city_Modesto,city_Modesto.1,city_Oakdale,city_Oakhurst,city_Paicines,city_Parlier,city_Patterson,city_San Andreas,city_Sonora,city_Stockton,city_Stratford,city_Tracy,city_Tres Pinos,city_Tulare,city_Turlock,city_Visalia,city_Waterford
0,46980182,CA2604,https://www.petfinder.com/dog/candy-46980182/c...,['Active playful sweet'],Candy,"Sweet and playful, loves to cuddle. Fixed utd ...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2020-01-20 16:29:25+00:00,2019-12-31 17:41:05+00:00,Chihuahua,Miniature Pinscher,True,False,Black,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,93612,46980182,ca2604,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,46980481,CA2604,https://www.petfinder.com/dog/scott-46980481/c...,['Loving playful'],Scott,Playful loving pup. \nLoves to snuggle \nFixed...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2020-04-01 05:09:17+00:00,2019-12-31 17:53:52+00:00,Chihuahua,Miniature Pinscher,True,False,Black,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,93612,46980481,ca2604,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,46980586,CA2604,https://www.petfinder.com/dog/holly-46980586/c...,['Loving playful active'],Holly,"Fun loving, sweet, loves to cuddle. \nFixed, ...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2020-01-20 16:28:46+00:00,2019-12-31 18:10:09+00:00,Chihuahua,Miniature Pinscher,True,False,Black,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,93612,46980586,ca2604,1,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,47000121,CA487,https://www.petfinder.com/dog/chevy-chase-4700...,[],Chevy Chase!,Chevy will be at Petsmart in Tracy from 12-3 o...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,1,adopted,2020-01-22 22:12:34+00:00,2020-01-01 03:53:13+00:00,Pit Bull Terrier,,True,False,Gray / Blue / Silver,White / Cream,,True,True,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,95304,47000121,ca487,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
4,47002335,CA2678,https://www.petfinder.com/dog/mocha-47002335/c...,"['Playfull', 'active', 'loving']",Mocha,Mocha is very much a puppy! We are very happy...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2020-03-27 19:05:06+00:00,2020-01-01 18:04:17+00:00,Boxer,Shepherd,True,False,"Tricolor (Brown, Black, & White)",,,True,False,False,True,,True,True,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,95358,47002335,ca2678,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4533,43699493,CA249,https://www.petfinder.com/dog/lightning-436994...,[],LIGHTNING,"New FOAC girl, just coming in. Currently in f...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2019-01-12 00:52:40+00:00,2019-01-05 19:03:26+00:00,Bluetick Coonhound,Border Collie,True,False,"Tricolor (Brown, Black, & White)",,,False,False,False,True,,,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,95370,43699493,ca249,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4534,43699482,CA249,https://www.petfinder.com/dog/bunny-43699482/c...,[],BUNNY,"Bunny is a new FOAC girl, just 4 1/2 months o...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2019-01-14 14:41:47+00:00,2019-01-05 19:01:00+00:00,Bluetick Coonhound,Border Collie,True,False,White / Cream,,,False,False,False,True,,,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,95370,43699482,ca249,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4535,43699181,CA569,https://www.petfinder.com/dog/kandi-in-ohio-43...,"['loving', 'active', 'playful']",Kandi in OHIO,"Hi everyone, meet Kandi! Kandi is looking for ...",[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2019-01-31 16:45:48+00:00,2019-01-05 17:30:37+00:00,Toy Fox Terrier,,False,False,Bicolor,,,True,True,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,93274,43699181,ca569,1,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
4536,43694712,CA156,https://www.petfinder.com/dog/toby-43694712/ca...,[],TOBY,Look at this boy he is simply an adorable guy ...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,0,adopted,2019-03-07 03:33:34+00:00,2019-01-05 05:52:19+00:00,Chihuahua,,False,False,Apricot / Beige,,,True,False,False,True,True,True,,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA,93726,43694712,ca156,1,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [23]:
dogs['good_with_kids'] = dogs['good_with_kids'].fillna(2)
dogs['good_with_dogs'] = dogs['good_with_dogs'].fillna(2)
dogs['good_with_cats'] = dogs['good_with_cats'].fillna(2)

dogs['fixed'] = (dogs['fixed'] * 1).astype(int)
dogs['house_trained'] = (dogs['house_trained'] * 1).astype(int)
dogs['shots_current'] = (dogs['shots_current'] * 1).astype(int)
dogs['good_with_kids'] = (dogs['good_with_kids'] * 1).astype(int)
dogs['good_with_dogs'] = (dogs['good_with_dogs'] * 1).astype(int)
dogs['good_with_cats'] = (dogs['good_with_cats'] * 1).astype(int)
dogs['special_needs'] = (dogs['special_needs'] * 1).astype(int)

In [24]:
def dummy(df, label):
    cols = [col for col in df if label in col]
    dummy_dict = {}
    main_df = pd.get_dummies(df[cols[0]],prefix = label)
    for col in cols[1:]:
        dummy_dict[col] = pd.get_dummies(df[col],prefix = label)
    for col in dummy_dict:
        main_df = main_df.add(dummy_dict[col], fill_value = 0).gt(0)*1
    return main_df

In [25]:
# Running function and merging dummified df back to original df

dummy_color_dog = dummy(dogs, 'color') 
dogs = pd.merge(dogs, dummy_color_dog, left_index=True, right_index=True)

In [26]:
dogs.drop(columns=['breeds.unknown'], inplace=True) # All breeds are known or guessed at so dropping this column
dummy_color_dog = dummy(dogs, 'breeds') 
dogs = pd.merge(dogs, dummy_color_dog, left_index=True, right_index=True)

In [27]:
dogs['days_on_petfinder'] = dogs['status_changed_at'] - dogs['published_at']
dogs['days_on_petfinder'] = dogs['days_on_petfinder'] / np.timedelta64(1, 'D')
dogs['days_on_petfinder'] = dogs['days_on_petfinder'].round()

In [28]:
dogs.drop(columns=['animal_id', 'organization_id.1', 'status', 'status_changed_at', 'published_at', 'contact.address.state', 'tags', 'status', 
                   'colors.primary', 'colors.secondary', 'colors.tertiary',
                  'breeds.primary', 'breeds.secondary', 'breeds.mixed', 'age', 'gender', 'size', 'coat', 'breeds_True', 'city' ], inplace=True)

In [29]:
dogs.drop(columns=['url', 'photos', 'name', 'photos', 'primary_photo_cropped.small', 'primary_photo_cropped.medium', 'primary_photo_cropped.large', 'primary_photo_cropped.full', 
               'zipcode'], inplace=True)

In [35]:
dogs.loc[dogs['days_on_petfinder'] <= 10, 'days_on_petfinder'] = 1

In [36]:
dogs.loc[dogs['days_on_petfinder'] > 10, 'days_on_petfinder'] = 0

In [37]:
dogs.days_on_petfinder.value_counts()

0.0    2312
1.0    2226
Name: days_on_petfinder, dtype: int64

In [40]:
dogs.to_csv('./petfinder_data/cleandogs.csv', index = False)

In [43]:
dogs.head()

Unnamed: 0,id,organization_id,description,videos,fixed,house_trained,special_needs,shots_current,good_with_kids,good_with_dogs,good_with_cats,has_photo,color_Apricot / Beige,color_Bicolor,color_Black,color_Brindle,color_Brown / Chocolate,color_Golden,color_Gray / Blue / Silver,color_Harlequin,color_Merle (Blue),color_Merle (Red),color_Red / Chestnut / Orange,color_Sable,"color_Tricolor (Brown, Black, & White)",color_White / Cream,color_Yellow / Tan / Blond / Fawn,breeds_Affenpinscher,breeds_Airedale Terrier,breeds_Akbash,breeds_Akita,breeds_Alaskan Malamute,breeds_American Bulldog,breeds_American Bully,breeds_American Eskimo Dog,breeds_American Foxhound,breeds_American Hairless Terrier,breeds_American Staffordshire Terrier,breeds_Anatolian Shepherd,breeds_Australian Cattle Dog / Blue Heeler,breeds_Australian Kelpie,breeds_Australian Shepherd,breeds_Australian Terrier,breeds_Basenji,breeds_Basset Hound,breeds_Beagle,breeds_Bearded Collie,breeds_Belgian Shepherd / Malinois,breeds_Belgian Shepherd / Sheepdog,breeds_Belgian Shepherd / Tervuren,breeds_Bernese Mountain Dog,breeds_Bichon Frise,breeds_Black Labrador Retriever,breeds_Bloodhound,breeds_Bluetick Coonhound,breeds_Border Collie,breeds_Border Terrier,breeds_Boston Terrier,breeds_Boxer,breeds_Brittany Spaniel,breeds_Brussels Griffon,breeds_Bull Terrier,breeds_Bullmastiff,breeds_Cairn Terrier,breeds_Canaan Dog,breeds_Cane Corso,breeds_Cardigan Welsh Corgi,breeds_Catahoula Leopard Dog,breeds_Cattle Dog,breeds_Cavalier King Charles Spaniel,breeds_Chihuahua,breeds_Chinese Crested Dog,breeds_Chiweenie,breeds_Chocolate Labrador Retriever,breeds_Chow Chow,breeds_Cockapoo,breeds_Cocker Spaniel,breeds_Collie,breeds_Coonhound,breeds_Corgi,breeds_Dachshund,breeds_Dalmatian,breeds_Doberman Pinscher,breeds_Dogo Argentino,breeds_Dogue de Bordeaux,breeds_Dutch Shepherd,breeds_English Bulldog,breeds_English Cocker Spaniel,breeds_English Foxhound,breeds_English Springer Spaniel,breeds_English Toy Spaniel,breeds_Eskimo Dog,breeds_False,breeds_Finnish Spitz,breeds_Fox Terrier,breeds_French Bulldog,breeds_German Shepherd Dog,breeds_German Shorthaired Pointer,breeds_German Spitz,breeds_German Wirehaired Pointer,breeds_Giant Schnauzer,breeds_Golden Retriever,breeds_Great Dane,breeds_Great Pyrenees,breeds_Greyhound,breeds_Harrier,breeds_Havanese,breeds_Hound,breeds_Husky,breeds_Ibizan Hound,breeds_Irish Setter,breeds_Irish Terrier,breeds_Irish Wolfhound,breeds_Italian Greyhound,breeds_Jack Russell Terrier,breeds_Japanese Chin,breeds_Jindo,breeds_Klee Kai,breeds_Labradoodle,breeds_Labrador Retriever,breeds_Lancashire Heeler,breeds_Lhasa Apso,breeds_Maltese,breeds_Maltipoo,breeds_Manchester Terrier,breeds_Mastiff,breeds_McNab,breeds_Miniature Dachshund,breeds_Miniature Pinscher,breeds_Miniature Poodle,breeds_Miniature Schnauzer,breeds_Mixed Breed,breeds_Newfoundland Dog,breeds_Norfolk Terrier,breeds_Norwich Terrier,breeds_Nova Scotia Duck Tolling Retriever,breeds_Papillon,breeds_Parson Russell Terrier,breeds_Pekingese,breeds_Pharaoh Hound,breeds_Pit Bull Terrier,breeds_Plott Hound,breeds_Pointer,breeds_Pomeranian,breeds_Poodle,breeds_Portuguese Water Dog,breeds_Pug,breeds_Puggle,breeds_Pyrenean Shepherd,breeds_Rat Terrier,breeds_Redbone Coonhound,breeds_Retriever,breeds_Rhodesian Ridgeback,breeds_Rottweiler,breeds_Saint Bernard,breeds_Samoyed,breeds_Schipperke,breeds_Schnauzer,breeds_Schnoodle,breeds_Scottish Terrier,breeds_Shar-Pei,breeds_Shepherd,breeds_Shetland Sheepdog / Sheltie,breeds_Shiba Inu,breeds_Shih Tzu,breeds_Shih poo,breeds_Siberian Husky,breeds_Silky Terrier,breeds_Sloughi,breeds_Smooth Collie,breeds_Smooth Fox Terrier,breeds_Spaniel,breeds_Staffordshire Bull Terrier,breeds_Standard Poodle,breeds_Standard Schnauzer,breeds_Terrier,breeds_Tibetan Spaniel,breeds_Tibetan Terrier,breeds_Toy Fox Terrier,breeds_Treeing Walker Coonhound,breeds_Vizsla,breeds_Weimaraner,breeds_Welsh Terrier,breeds_West Highland White Terrier / Westie,breeds_Wheaten Terrier,breeds_Whippet,breeds_Wire Fox Terrier,breeds_Wirehaired Dachshund,breeds_Wirehaired Pointing Griffon,breeds_Wirehaired Terrier,breeds_Xoloitzcuintli / Mexican Hairless,breeds_Yellow Labrador Retriever,breeds_Yorkshire Terrier,days_on_petfinder
0,46980182,CA2604,"Sweet and playful, loves to cuddle. Fixed utd ...",0,1,0,0,1,1,1,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
1,46980481,CA2604,Playful loving pup. \nLoves to snuggle \nFixed...,0,1,0,0,1,1,1,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
2,46980586,CA2604,"Fun loving, sweet, loves to cuddle. \nFixed, ...",0,1,0,0,1,1,1,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
3,47000121,CA487,Chevy will be at Petsmart in Tracy from 12-3 o...,1,1,1,0,1,1,1,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
4,47002335,CA2678,Mocha is very much a puppy! We are very happy...,0,1,0,0,1,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0


In [55]:
# Creating list of columns to drop
drops = ['id', 'organization_id', 'description', 'days_on_petfinder']

# Dropping columns from data frame and dummifying categorical columns
X = dogs.drop(columns=drops)
y = dogs['days_on_petfinder']

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.30,
    stratify=y,
    random_state=42)

In [56]:
ss =StandardScaler()
Z_train = ss.fit_transform(X_train)
Z_test = ss.transform(X_test)

In [57]:
%%time
lr = LogisticRegression(max_iter=5000)
lr.fit(X_train, y_train)
lr_train = lr.score(X_train, y_train)
lr_test = lr.score(X_test, y_test)

CPU times: user 445 ms, sys: 7.3 ms, total: 453 ms
Wall time: 80.8 ms


In [58]:
print(f'train score: {lr_train}')
print(f'test score: {lr_test}')

train score: 0.6464105793450882
test score: 0.591042584434655


In [59]:
# Baseline
dogs['days_on_petfinder'].value_counts(normalize=True)

0.0    0.509476
1.0    0.490524
Name: days_on_petfinder, dtype: float64

In [61]:
y_pred=lr.predict(X_test)
y_pred[0:20]

array([0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0.,
       0., 0., 1.])

In [62]:
confusion = confusion_matrix(y_test, y_pred)
print('Confusion Matrix\n')
print(confusion)

Confusion Matrix

[[453 241]
 [316 352]]
