## Which animals/breeds are the most adopted? Statistical Analysis

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dallas_data = pd.read_csv('Resources/DallasMergedClean.csv', 
                          dtype={'Council District': 'string', 'Intake Total': 'string', 'Outcome Subtype': 'string'})
#pf_data = pd.read_csv('Project1/Resources/Animals.csv')

In [None]:
# All adopted animal types
dal_adopted = dallas_data.loc[(dallas_data['Outcome Type'] == 'RETURNED TO OWNER') 
                            | (dallas_data['Outcome Type'] == 'ADOPTION'), 
                              ['Outcome Type','Animal Type', 'Animal Breed']]
dal_adopted 

In [None]:
# Creating dataframe for animal type adopted bar chart
adopt_count = dal_adopted.groupby('Animal Type').count()
adopt_count = adopt_count.drop(columns='Animal Breed')
adopt_count = adopt_count.rename(columns={'Outcome Type':'Total Adopted'})
dal_count = dallas_data.groupby('Animal Type').count()
adopt_count['Total Available'] = dal_count['Outcome Type']
adopt_count['Percent Adopted'] = (adopt_count['Total Adopted'] / adopt_count['Total Available']) * 100
adopt_count['Percent Adopted'] = adopt_count['Percent Adopted'].map("{:.2f}%".format)
adopt_count

In [None]:
adopt_count.plot(kind="bar", stacked=True, rot=45, figsize=(12,6))
plt.tight_layout()
plt.show()

In [None]:
colors = ['r', 'b', 'g', 'c', 'y']
labels = ['Bird','Cat','Dog','Livestock','Wildlife']

#ax1,ax2 refer to two pies - 1, 2 is 1 row, 2 columns; 2, 1 is 2 rows, 1 column
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,15)) 

# Pie showing total number of animal types available for adoption
ax1.pie(adopt_count['Total Available'], colors=colors, labels=labels, labeldistance=1.05, 
        explode=(0, 0, 0.1, 0, 0), autopct="%1.1f%%", pctdistance=1.5)
#for wedge in ax1[0]:
#    wedge.set_edgecolor('white')
ax1.set_title("Animal Types Available for Adoption")

# Pie showing total number of animal types adopted
ax2.pie(adopt_count['Total Adopted'], colors=colors, labels=labels, labeldistance=1.05, 
        explode=(0, 0, 0.1, 0, 0), autopct="%1.1f%%", pctdistance=1.5)
#for wedge in adopt_pie[0]:
#    wedge.set_edgecolor('white')
ax2.set_title("Animal Types Adopted")

plt.show()

## Dogs are by far the most animal available for adoption (72.71%), followed by cats (22.87%).

In [None]:
# Create dataframe for dog breed adoptions
dogs_only = dal_adopted.loc[dal_adopted['Animal Type'] == 'DOG']
dog_group = dogs_only.groupby(['Animal Breed']).count()
dog_group.sort_values(by='Outcome Type', ascending=False, inplace=True)
dog_group = dog_group.rename(columns={'Outcome Type':'Total Adopted'})
dog_group = dog_group.drop('Animal Type', axis=1)

# Finds top 10 dog breeds adopted
dog_group = dog_group.iloc[0:10, :]
dog_group

In [None]:
# Create dataframe for dog breeds available
avail_dogs_only = dallas_data.loc[dallas_data['Animal Type'] == 'DOG']
avail_dogs_only = avail_dogs_only[['Animal Type', 'Animal Breed', 'Outcome Type']]
avail_dog_group = avail_dogs_only.groupby(['Animal Breed']).count()
avail_dog_group.sort_values(by='Animal Type', ascending=False, inplace=True)
avail_dog_group = avail_dog_group.rename(columns={'Animal Type':'Total Available'})
avail_dog_group = avail_dog_group.drop('Outcome Type', axis=1)

# Finds top 10 dog breeds available
avail_dog_group = avail_dog_group.iloc[0:10, :]
avail_dog_group

In [None]:
# Merge dogs available and dogs adopted data frames
dog_merge = pd.merge(dog_group, avail_dog_group, on='Animal Breed', how='outer')
dog_merge['Percent Adopted'] = (dog_merge['Total Adopted'] / dog_merge['Total Available']) * 100
dog_merge['Percent Adopted'] = dog_merge['Percent Adopted'].map("{:.2f}%".format)

dog_merge

In [None]:
# Create bar plot for top 10 dog breeds adopted
dog_group.plot(kind="bar", rot=45, figsize=(12,6))
plt.title("Dog Breed Adoptions")
plt.xlabel("Dog Breed")
plt.ylabel("Total Adopted")

plt.tight_layout()
plt.show()

In [None]:
# Creating dataframe for cat breed adoptions
cats_only = dal_adopted.loc[dal_adopted['Animal Type'] == 'CAT']
cat_group = cats_only.groupby(['Animal Breed']).count()
cat_group.sort_values(by='Outcome Type', ascending=False, inplace=True)
cat_group = cat_group.rename(columns={'Outcome Type':'Total Adopted'})
cat_group = cat_group.drop('Animal Type', axis=1)

# Finds top 10 cat breeds adopted
cat_group = cat_group.iloc[0:10, :]
cat_group

In [None]:
# Create dataframe for cat breeds available
avail_cats_only = dallas_data.loc[dallas_data['Animal Type'] == 'CAT']
avail_cats_only = avail_cats_only[['Animal Type', 'Animal Breed', 'Outcome Type']]
avail_cat_group = avail_cats_only.groupby(['Animal Breed']).count()
avail_cat_group.sort_values(by='Animal Type', ascending=False, inplace=True)
avail_cat_group = avail_cat_group.rename(columns={'Animal Type':'Total Available'})
avail_cat_group = avail_cat_group.drop('Outcome Type', axis=1)

# Finds top 10 cat breeds available
avail_cat_group = avail_cat_group.iloc[0:10, :]
avail_cat_group

In [None]:
# Merge cats available and cats adopted data frames
cat_merge = pd.merge(cat_group, avail_cat_group, how='outer', on='Animal Breed')
cat_merge['Percent Adopted'] = (cat_merge['Total Adopted'] / cat_merge['Total Available']) * 100
cat_merge['Percent Adopted'] = cat_merge['Percent Adopted'].map("{:.2f}%".format)
cat_merge.dropna(inplace=True)
cat_merge['Total Adopted'] = cat_merge['Total Adopted'].astype(int)
cat_merge['Total Available'] = cat_merge['Total Available'].astype(int)

cat_merge

In [None]:
# Create bar plot for top 10 cat breeds adopted
cat_group.plot(kind="bar", rot=45, figsize=(12,6))
plt.title("Cat Breed Adoptions")
plt.xlabel("Cat Breed")
plt.ylabel("Total Adopted")

plt.tight_layout()
plt.show()

## The most adopted breeds for dogs are Pit Bulls, Chihuahuas, German Shepards and Labrador Retrievers.  The most adopted breeds for cats are Domestic Shorthair by far.  

### Null Hypothesis:
### Alternate Hypothesis: