### Starter Section
- [x] set up imports
- [x] API - set-up, call, display
- [x] verify at least 100 records

In [30]:
import requests
import pandas as pd
from pprint import pprint

In [31]:
# API KEY - though it's not needed?
api_key = 'eeec1e46-9491-4fc5-aada-f8cd1955cf46'

In [32]:
#API URL
url = 'https://api.thedogapi.com/v1/breeds'

In [33]:
# get API results
doggos = requests.get(url).json()

In [34]:
# display raw json

doggos

[{'weight': {'imperial': '6 - 13', 'metric': '3 - 6'},
  'height': {'imperial': '9 - 11.5', 'metric': '23 - 29'},
  'id': 1,
  'name': 'Affenpinscher',
  'bred_for': 'Small rodent hunting, lapdog',
  'breed_group': 'Toy',
  'life_span': '10 - 12 years',
  'temperament': 'Stubborn, Curious, Playful, Adventurous, Active, Fun-loving',
  'origin': 'Germany, France',
  'reference_image_id': 'BJa4kxc4X',
  'image': {'id': 'BJa4kxc4X',
   'width': 1600,
   'height': 1199,
   'url': 'https://cdn2.thedogapi.com/images/BJa4kxc4X.jpg'}},
 {'weight': {'imperial': '50 - 60', 'metric': '23 - 27'},
  'height': {'imperial': '25 - 27', 'metric': '64 - 69'},
  'id': 2,
  'name': 'Afghan Hound',
  'country_code': 'AG',
  'bred_for': 'Coursing and hunting',
  'breed_group': 'Hound',
  'life_span': '10 - 13 years',
  'temperament': 'Aloof, Clownish, Dignified, Independent, Happy',
  'origin': 'Afghanistan, Iran, Pakistan',
  'reference_image_id': 'hMyT4CDXR',
  'image': {'id': 'hMyT4CDXR',
   'width': 606,

In [35]:
# checking that records meet 100 minimum requirement

len(doggos)

172

### Data Cleaning
- [x] get list of all attributes used and count to evaluate columns to be used
- [x] reference image id VS image id (resolved - deleted image id)

In [36]:
# create a list of all attributes for dataframe to evaluate appropriate columns for dataframe
all_attributes = []
for group in doggos:
    for line in group:
        if line not in all_attributes:
            all_attributes.append(line)
# all_attributes

In [37]:
# count occurances of attributes
att_count = {}
for attribute in all_attributes:
    count = 0
    for group in doggos:
        if attribute in group:
            count += 1
            att_count[attribute] = count
            
# att_count

In [38]:
# check reference image id VS image id to see if either are missing
# RESULT: none were missing (counter = 0), so deleted image id 

# OLD CODE taken out of dogs list  --  img_id = puppers['image'].get('id','none')

# counter = 0
# for index, row in dogs_df.iterrows():
#     if row[9] != row[10]:
#         counter += 1
# print(counter)

### Create Main Dataframe
- [x] set-up: for loops to create list of lists
- [x] dataframe: assign columns
- [x] save dataframe as csv

In [39]:
# Set-up Dog Dataframe (list of lists)
dogs = []

for puppers in doggos:
    entry = []
    name = puppers.get('name','none')
    id_code = puppers.get('id', 'none')
    breedgroup = puppers.get('breed_group','none')
    temperament = puppers.get('temperament', 'none')
    weight = puppers['weight'].get('imperial','none')
    height = puppers['height'].get('imperial', 'none')
    bred_for = puppers.get('bred_for','none')
    life_span = puppers.get('life_span','none')
    origin = puppers.get('origin','none')
#     country_code = puppers.get('country_code')
    ref_img_id = puppers.get('reference_image_id', 'none')
    
    entry.append(name)
    entry.append(id_code)
    entry.append(breedgroup)
    entry.append(temperament)
    entry.append(weight)
    entry.append(height)
    entry.append(bred_for)
    entry.append(life_span)
    entry.append(origin)
#     entry.append(country_code)
    entry.append(ref_img_id)
    dogs.append(entry)





# THESE are attributes I have not included
#  'description'
#  'history'
# 'image id'

In [40]:
# create dataframe
dogs_df = pd.DataFrame(dogs, columns =['Dog Breed', 
                                       'ID Code',
                                       'Breed Group', 
                                       'Temperament', 
                                       'Weight', 
                                       'Height', 
                                       'Bred For', 
                                       'Life Span', 
                                       'Origin', 
#                                        'Country Code',
                                       'Reference Image ID'
                                      ])
# dogs_df.head(20)

In [41]:
# save dogs dataframe to csv file
dogs_df.to_csv(r'C:\Users\chels\OneDrive\Desktop\UNC Boot Camp\PROJECTS\Project-2\Input\dogs_df.csv')

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\chels\\OneDrive\\Desktop\\UNC Boot Camp\\PROJECTS\\Project-2\\Input\\dogs_df.csv'

### Temperaments
- [x] create list of all temperaments (resolved: 124 unique temperaments used)
- [x] count the occurance of each temperament in dataset
- [x] sort occurances from greatest occurance to least
- [x] create temperament (columns) V dog breeds (rows) table

In [42]:
# create list of all temperaments (listed once)
temp_list = []
for dog in doggos:
    if 'temperament' in dog:
        temp = dog['temperament'].split(',')
        for att in temp:
            att = att.strip()
            if att not in temp_list:
                temp_list.append(att)
                
# print(len(temp_list))
# pprint(temp_list)

In [43]:
# count number of times temperaments appear in dataset
temp_counts = {}

for temperament in temp_list:
    counter = 0
    for dog in doggos:
        if 'temperament' in dog:
            if temperament in dog['temperament']:
                counter += 1
                temp_counts[temperament] = counter

# temp_counts

In [44]:
# sort temp_counts from greatest to least
temp_count_sort = pd.DataFrame.from_dict(temp_counts, 
                       orient='index', 
                       columns=['Temperament Count']).sort_values(by='Temperament Count', 
                                                                  ascending=False)
# temp_count_sort.head(30)

In [45]:
## create dataframe of temperaments, each dog as rows temps as columns
# list of lists, dog name as first list value, t/f for each temperament
all_dogs = []
for dog in doggos:
    dog_list = [dog['name']]
    if 'temperament' in dog:
        for temperament in temp_list:
            if temperament in dog['temperament']:
                dog_list.append('True')
            else:
                dog_list.append('False')
    all_dogs.append(dog_list)

# create table with list of lists, add 'Name' to temp_list to accommodate first list value
column_names = ['Name'] + temp_list
temp_dogs_table = pd.DataFrame(all_dogs, columns= column_names)

### Group-Bys
- [x]  group by breed group
- [ ]  group by weight
    - split string into min/max, store as min max vals (if 1 value min/max same, if 0 'no age' or NaN)
    - create list of weights based on lowest highest 
    - create new table with weights as columns V names as rows, t/f values 
    - count weights 
- [ ]  group by height
    - split string into min/max, store as min max vals (if 1 value then min/max same, if 0 then 'no age' or NaN)
    - create list of heights based on lowest + highest
    - create new table with heights as columns V names as rows, t/f values
    - count heights 

In [57]:
# Breed group
gb_breed_group = dogs_df.groupby('Breed Group')
breed_groups = pd.DataFrame(gb_breed_group['Dog Breed'].count()).reset_index().drop([0])

# breed_groups