In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import time
import seaborn as sns
import numpy as np

%matplotlib inline
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv('..//data/cleaned_pets.csv')

  df = pd.read_csv('..//data/cleaned_pets.csv')


In [3]:
df.shape

(34009, 42)

Calculate adoption time

In [None]:
# Convert to datetime format
df['published_date'] = pd.to_datetime(df['published_date'], format='%m/%d/%y')
df['status_change_date'] = pd.to_datetime(df['status_change_date'], format='%m/%d/%y')

# Calculate adoption time (time available for adoption)
df['adoption_time'] = (df['status_change_date'] - df['published_date']).dt.days

In [None]:
# Create adopted and adoptable dataframes
adopted = df[(df['year_status_change'] == 2023) & (df['status'] == 'adopted')]
adoptable = df[(df['year_status_change'] == 2023) & (df['status'] == 'adoptable')]

# All Pets Analysis

How many pets were published/put up for adoption in 2023? 

In [None]:
# Subset data for 2023 
published = df[df['year_published'] == 2023]

print('Number of pets placed for adoption: ', published['id'].count())

How many pets were adopted in 2023?

In [None]:
# Subset data by year 2023 and status = adopted 
adopted = df[(df['year_status_change'] == 2023) & (df['status'] == 'adopted')]

print('Number of pets adopted in 2023: ', adopted['id'].count())

How many pets were published but NOT adopted in 2023? 

In [None]:
# Subset year status was changed to not equal to 2023
available = df[df['year_status_change'] != 2023]

print('Pets published but not adopted in 2023: ', available['id'].count())

In [None]:
# Check 
#available.groupby('year_status_change')['id'].count()

How many pets of each species were placed for adoption in 2023?

In [None]:
# Group ids by species and count
published_species = df.groupby('species')['id'].count().reset_index(name='count published').sort_values('count published', ascending=False)

published_species

How many pets of each species were adopted in 2023?

In [None]:
# Group adopted by species and count ids
adopted_species = adopted.groupby('species')['id'].count().reset_index(name='count adopted').sort_values('count adopted', ascending=False)

adopted_species

What is the overall adoption rate for 2023?

In [None]:
# Calculate adoption rate by dividing number of adopted pets by total pets and multipy * 100 to get overall adoption rate
adoption_rate = (adopted['id'].count() / df['id'].count()) * 100

print('Overall Adoption Rate: ', round(adoption_rate, 2),'%')

What is the adoption rate by species? 

In [None]:
# Outer join published species and adopted species
adoption_rate = pd.merge(published_species, adopted_species, on='species', how='outer')

# Fill NaN values with zero
adoption_rate = adoption_rate.fillna(0)

# Convert 'count adopted' column to integer
adoption_rate['count adopted'] = adoption_rate['count adopted'].astype(int)

# Calculate the adoption rate %
adoption_rate['adoption rate %'] = round((adoption_rate['count adopted'] / adoption_rate['count published']) * 100, 2)
adoption_rate.sort_values('adoption rate %', ascending=False)

What is the adoption rate by age? What age group has the highest and lowest adoption rate? 

Answer: Highest - Baby, Lowest - Seniors 

In [None]:
# Group id by age and count for published and adopted dfs
pub_age_count = df.groupby('age')['id'].count().reset_index(name='count published')
adopted_age_count = adopted.groupby('age')['id'].count().reset_index(name='count adopted')

# Merge two dfs together
age_adoption_rate = pd.merge(pub_age_count, adopted_age_count, on='age')

# Calculate adoption rate
age_adoption_rate['adoption rate %'] = round((adoption_rate['count adopted'] / adoption_rate['count published']) * 100, 2)

age_adoption_rate.sort_values('adoption rate %', ascending=False)

In [None]:
df.columns

What is the adoption rate by month?

In [None]:
# Create column for adopted months

# Create a new column named 'adopted_month' and initialize all values to 0
df['adopted_month'] = 0  

# Loop through each row in the DataFrame and check for the following conditions
# If true - update adopted_month column with month status was changed, if false - insert zero
for index, row in df.iterrows():
    if row['status'] == 'adopted' and row['year_status_change'] == 2023:
        df.at[index, 'adopted_month'] = row['month_status_change']
    else:
        df.at[index, 'adopted_month'] = 0

What is the average adoption time (in days) per species? Which species has the longest adoption time?

In [None]:
# Calculate average adoption time by species
avg_time = adopted.groupby('species')['adoption_time'].mean().reset_index(name='avg_adoption_time').sort_values('avg_adoption_time', ascending=False)

print('Answer: Rabbits:', round(avg_time['avg_adoption_time'].max()), 'days')
avg_time

What cities in TN are pets available in TN?

# Dog Analysis

In [None]:
dogs = df[df['species'] == 'Dog']
print(dogs.shape)
dogs.groupby('type')['id'].count()

How many full breeds vs mixed breeds were listed for adoption?

In [None]:
full = dogs[dogs['breeds.mixed'] == False]
mixed = dogs[dogs['breeds.mixed'] == True]

print('Full Breed Dogs: ', full['id'].count())
print('Mixed Breed Dogs: ', mixed['id'].count())

What are the top 20 breeds listed for adoption? Are they full or mixed breeds?

In [None]:
# Group dogs df by primary breed and mixed breed columns and count ids
top_20 = dogs.groupby(['breeds.primary', 'breeds.mixed'])['id'].count().reset_index(name='count').sort_values('count', ascending=False).head(20)

top_20

What are the top 20 full breeds and top 20 mixed breeds (include secondary breed)?

In [None]:
# Top 20 full breeds

# Group ids by primary breed
top_20_fb = full.groupby('breeds.primary')['id'].count().reset_index(name='count').sort_values('count', ascending=False).head(21)

# Drop index 100 because it is not relevant
#top_20_fb = top_20_fb.drop(100)
top_20_fb