# Zoo Extended Data Merge

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
from pathlib import Path

# Files to Load 
zoo_data_load = Path("Clean_Data/zoo_extended_list_cleaned.csv")

# Read Zoo Data File and store into Pandas DataFrame
zoo_data = pd.read_csv(zoo_data_load)
zoo_data.head()

Unnamed: 0,animal_name,class_type,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,air_breather,water_breather,venomous,fins,tail,legs
0,aardvark,1,True,False,False,True,False,False,True,True,True,True,False,False,False,False,4
1,aardwolf,1,True,False,False,True,False,False,True,True,True,True,False,False,False,True,4
2,african_elephant,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4
3,alligator,3,True,False,True,False,False,True,True,True,True,True,False,False,True,True,4
4,alpaca,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4


In [2]:
# Files to Load 
class_data_load = Path("Clean_Data/class_cleaned.csv")

# Read Class Data File and store into Pandas DataFrame
class_data = pd.read_csv(class_data_load)
class_data.head()

Unnamed: 0,Class_Number,Number_Of_Animal_Species_In_Class,Class_Type,Animal_Names
0,1,40,Mammal,"aardvark, antelope, bear, boar, buffalo, calf,..."
1,2,20,Bird,"chicken, crow, dove, duck, flamingo, gull, haw..."
2,3,5,Reptile,"pitviper, seasnake, slowworm, tortoise, tuatara"
3,4,13,Fish,"bass, carp, catfish, chub, dogfish, haddock, h..."
4,5,4,Amphibian,"newt, toad, frog, salamander"


In [3]:
# Rename columns to match zoo_data
class_data = class_data.rename(columns={'Class_Type': 'Class_Name', 'Class_Number': 'class_type'})
class_data.head()

Unnamed: 0,class_type,Number_Of_Animal_Species_In_Class,Class_Name,Animal_Names
0,1,40,Mammal,"aardvark, antelope, bear, boar, buffalo, calf,..."
1,2,20,Bird,"chicken, crow, dove, duck, flamingo, gull, haw..."
2,3,5,Reptile,"pitviper, seasnake, slowworm, tortoise, tuatara"
3,4,13,Fish,"bass, carp, catfish, chub, dogfish, haddock, h..."
4,5,4,Amphibian,"newt, toad, frog, salamander"


In [4]:
# Merge the two DataFrames.
merged_data = pd.merge(zoo_data, class_data, how='left', on=["class_type", "class_type"])
merged_data.head()

Unnamed: 0,animal_name,class_type,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,air_breather,water_breather,venomous,fins,tail,legs,Number_Of_Animal_Species_In_Class,Class_Name,Animal_Names
0,aardvark,1,True,False,False,True,False,False,True,True,True,True,False,False,False,False,4,40,Mammal,"aardvark, antelope, bear, boar, buffalo, calf,..."
1,aardwolf,1,True,False,False,True,False,False,True,True,True,True,False,False,False,True,4,40,Mammal,"aardvark, antelope, bear, boar, buffalo, calf,..."
2,african_elephant,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4,40,Mammal,"aardvark, antelope, bear, boar, buffalo, calf,..."
3,alligator,3,True,False,True,False,False,True,True,True,True,True,False,False,True,True,4,5,Reptile,"pitviper, seasnake, slowworm, tortoise, tuatara"
4,alpaca,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4,40,Mammal,"aardvark, antelope, bear, boar, buffalo, calf,..."


In [5]:
# Drop the extra columns
columns_to_drop = ['Number_Of_Animal_Species_In_Class', 'Animal_Names']
merged_data = merged_data.drop(columns=columns_to_drop)
merged_data.head()


Unnamed: 0,animal_name,class_type,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,air_breather,water_breather,venomous,fins,tail,legs,Class_Name
0,aardvark,1,True,False,False,True,False,False,True,True,True,True,False,False,False,False,4,Mammal
1,aardwolf,1,True,False,False,True,False,False,True,True,True,True,False,False,False,True,4,Mammal
2,african_elephant,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4,Mammal
3,alligator,3,True,False,True,False,False,True,True,True,True,True,False,False,True,True,4,Reptile
4,alpaca,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4,Mammal


In [6]:
# Reorder columns
new_columns = ['animal_name', 'Class_Name','class_type', 'hair', 'feathers', 'eggs', 'milk', 'airborne', 'aquatic', 'predator', 'toothed', 'backbone', 'air_breather', 'water_breather', 'venomous', 'fins', 'tail', 'legs']
merged_data = merged_data.reindex(columns=new_columns)
merged_data.head()

Unnamed: 0,animal_name,Class_Name,class_type,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,air_breather,water_breather,venomous,fins,tail,legs
0,aardvark,Mammal,1,True,False,False,True,False,False,True,True,True,True,False,False,False,False,4
1,aardwolf,Mammal,1,True,False,False,True,False,False,True,True,True,True,False,False,False,True,4
2,african_elephant,Mammal,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4
3,alligator,Reptile,3,True,False,True,False,False,True,True,True,True,True,False,False,True,True,4
4,alpaca,Mammal,1,True,False,False,True,False,False,False,True,True,True,False,False,False,True,4


In [7]:
# Add animal_name and Class_Name to a new dataframe.
animal_name_list = merged_data['animal_name'].tolist()
class_name_list = merged_data['Class_Name'].tolist()

animal_list_df = pd.DataFrame({'animal_name': animal_name_list, 'Class_Name': class_name_list})
animal_list_df.head()


Unnamed: 0,animal_name,Class_Name
0,aardvark,Mammal
1,aardwolf,Mammal
2,african_elephant,Mammal
3,alligator,Reptile
4,alpaca,Mammal


In [8]:
# Drop animal_name from merged_data
# merged_data = merged_data.drop(columns=['animal_name'])
# merged_data.head()

In [9]:
# Export the merged_data to a csv file
merged_data.to_csv('Clean_Data/merged_extended_data.csv', index=False)

# Export the animal_list_df to a csv file
animal_list_df.to_csv('Clean_Data/animal_extended_list.csv', index=False)