In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

intakes_file_path = os.path.join("center_data", "Austin_Animal_Center_Intakes.csv")

dog_intakes_df = pd.read_csv(intakes_file_path)\
    .rename(columns = lambda df: df.lower().replace(" ", "_"))\
    .drop("monthyear", axis=1)

dog_intakes_df = dog_intakes_df[dog_intakes_df["animal_type"] == 'Dog']\
    .rename(columns = {
    "intake_type":"type",
    "sex_upon_intake":"sex",
    "age_upon_intake":"age"
    })\
    .assign(is_intake=True)\
    .assign(datetime = lambda df: pd.to_datetime(df["datetime"]))\
    .reset_index(drop=True)

# dogs_intakes_df.to_csv('dog_intakes.csv')
dog_intakes_df.head()

Unnamed: 0,animal_id,name,datetime,found_location,type,intake_condition,animal_type,sex,age,breed,color,is_intake
0,A786884,*Brock,2019-01-03 16:19:00,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,True
1,A706918,Belle,2015-07-05 12:59:00,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,True
2,A724273,Runster,2016-04-14 18:43:00,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,True
3,A682524,Rio,2014-06-29 10:38:00,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray,True
4,A743852,Odin,2017-02-18 12:46:00,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,2 years,Labrador Retriever Mix,Chocolate,True


In [5]:
outcomes_file_path = os.path.join("center_data", "Austin_Animal_Center_Outcomes.csv")

dog_outcomes_df = pd.read_csv(outcomes_file_path)\
    .rename(columns = lambda df: df.lower().replace(" ", "_"))\
    .drop("monthyear", axis=1)

dog_outcomes_df = dog_outcomes_df[
    (dog_outcomes_df["animal_type"] == 'Dog') & (dog_outcomes_df['outcome_type'] == 'Adoption')]\
    .rename(columns = {
        "outcome_type":"type",
        "sex_upon_outcome":"sex",
        "age_upon_outcome":"age"
        })\
    .assign(is_intake=False)\
    .assign(datetime = lambda df: pd.to_datetime(df["datetime"]))\
    .reset_index(drop=True)    

# dogs_intakes_df.to_csv('dog_intakes.csv')
dog_outcomes_df.head(5)

Unnamed: 0,animal_id,name,datetime,date_of_birth,type,outcome_subtype,animal_type,sex,age,breed,color,is_intake
0,A789027,Lennie,2019-02-17 11:44:00,02/13/2017,Adoption,,Dog,Neutered Male,2 years,Chihuahua Shorthair Mix,Cream,False
1,A720371,Moose,2016-02-13 17:59:00,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,False
2,A789027,Lennie,2019-03-10 12:25:00,02/13/2017,Adoption,,Dog,Neutered Male,2 years,Chihuahua Shorthair Mix,Cream,False
3,A794494,Zoey,2019-05-14 16:27:00,06/14/2018,Adoption,,Dog,Spayed Female,10 months,Wire Hair Fox Terrier Mix,White/Black,False
4,A764361,Astro,2018-01-03 19:15:00,12/28/2016,Adoption,,Dog,Neutered Male,1 year,Norwich Terrier Mix,Black/Tan,False


In [6]:
dog_concat_df = \
    pd\
    .concat([dog_intakes_df, dog_outcomes_df], sort=False)\
    .sort_values(by=["animal_id", "datetime"])\

# dog_concat_df = dog_concat_df[dog_concat_df["Animal ID"] == "A754989"]
dog_concat_df.head(5)

Unnamed: 0,animal_id,name,datetime,found_location,type,intake_condition,animal_type,sex,age,breed,color,is_intake,date_of_birth,outcome_subtype
39209,A006100,Scamp,2014-03-07 14:26:00,8700 Research in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White,True,,
1859,A006100,Scamp,2014-12-19 10:21:00,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White,True,,
8663,A006100,Scamp,2017-12-07 14:07:00,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White,True,,
30497,A047759,Oreo,2014-04-02 15:55:00,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,10 years,Dachshund,Tricolor,True,,
41439,A134067,Bandit,2013-11-16 09:02:00,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White,True,,


In [7]:
describe_df = dog_concat_df.describe()
describe_df

Unnamed: 0,animal_id,name,datetime,found_location,type,intake_condition,animal_type,sex,age,breed,color,is_intake,date_of_birth,outcome_subtype
count,94698,78245,94698,64554,94698,64554,94698,94696,94697,94698,94698,94698,30144,4020
unique,54763,13181,80000,32647,6,10,1,5,47,2245,369,2,4706,2
top,A721033,Max,2014-02-19 13:51:00,Austin (TX),Stray,Normal,Dog,Neutered Male,1 year,Pit Bull Mix,Black/White,True,03/03/2018,Foster
freq,33,648,49,12689,45513,59170,94698,28155,19042,11341,11418,64554,32,3754
first,,,2013-10-01 07:51:00,,,,,,,,,,,
last,,,2020-01-07 18:35:00,,,,,,,,,,,


In [24]:
# dog_concat_df.groupby(dog_concat_df["animal_id"], as_index=False).count()
num_records_series = dog_concat_df["animal_id"]\
    .value_counts()\
    .to_frame()\
    .reset_index()\
    .rename(columns = {
        "index":"animal_id",
        "animal_id":"num_records"
    })

num_records_series

Unnamed: 0,animal_id,num_records
0,A721033,33
1,A754989,16
2,A718223,14
3,A770009,14
4,A737854,13
...,...,...
54758,A796220,1
54759,A758925,1
54760,A809693,1
54761,A717397,1


In [None]:
boomerang_df = pd.DataFrame(columns = ["animal_id", "num_records"])

# for record in num_records_series:
#     if (not record%2 == 0) and record>1:
#         boomerang_df.append({"animal_id":record.ix,"num_records":record})
#     else:
#         print("still in")

In [None]:
# make sure to install these packages before running:
# pip install pandas
# pip install sodapy

import pandas as pd
from sodapy import Socrata
import requests

app_token="3mubvsyw1rybaisquwqluss7zqf6c3nqxu5g4x56odov7ggexx"
def get_soda_api_data(endpoint, app_token, count=1000, offset=0, return_df=True):
    params = {'$$app_token': app_token, '$limit': count, '$offset': offset, }
    
    results = []

    while True:

        try:
            r = requests.get(endpoint, params=params)
            rcontent = r.json()

            if rcontent == []:
                break

            results.append(rcontent)
            offset += count 
            params['$offset'] = offset

        except HTTPError as err:

            if err.response.status_code == '404':
                break
            else:
                print(err.response.status_code)
    
    if return_df:
        results_df = pd.DataFrame()

        for i in results:
            results_df = results_df.append(pd.io.json.json_normalize(i))
        
        return results_df
    
    else:
        return results
    
endpoint="https://data.austintexas.gov/resource/wter-evkm.json"

get_soda_api_data(endpoint, app_token)