In [6]:
import pandas as pd
import requests
import os

from socrata_token import *

In [8]:
header = {"client_id":f"{client_id}", "client_secret":f"{client_secret}"}

def get_count(api_url, credentials):
    """ Finds the cardinality of the dataset 
    
        api_url: str, in this case intakes or outcomes link
        
        credentials: dict, Socrata public and private tokens
        
        returns int
    """
    payload = {"$select":"count(*)"}
    return int(requests.get(api_url, headers=credentials, params=payload).json()[0]["count"])

def get_full_dataset(api_url,credentials, limit=10000):
    """ Recreates whole dataset in a pandas DataFrame
    
        api_url: str, in this case intakes or outcomes link
        
        credentials: dict, Socrata public and private tokens
        
        limit: int, number of "results" per "page". 10k is least amount without overloading API and getting weird feature abberations from Socrata
        
    """
    
    offset=0    
    status_code = 200
    df = pd.DataFrame()
    
    payload = {"$order":":id", "$limit":limit, "$offset":offset}

    while status_code == 200 and offset < get_count(api_url, credentials):
        
        print(f"offset={offset}, status={status_code}, shape={df.shape}")
        
        r = requests.get(intakes_api, headers=header, params=payload)
        df = pd.concat([df, pd.DataFrame(r.json())], ignore_index=True)
        
        offset+=limit
        payload.update({"$offset":offset})
        status_code = r.status_code
    
    return df

def combine_sets(in_df, out_df):
    return pd.concat([in_df, out_df], ignore_index=True)

In [4]:
# intial request
intakes_api = "https://data.austintexas.gov/resource/wter-evkm.json"
header = {"client_id":f"{client_id}", "client_secret":f"{client_secret}"}

count_payload = {"$select":"count(*)"}
entry_count = int(requests.get(intakes_api, headers=header, params=count_payload).json()[0]["count"])

limit = 10000
offset = 0
status_code = 200
intakes_df = pd.DataFrame()
payload = {"$order":":id", "$limit":limit, "$offset":offset}
# intake_r = requests.get(intakes_api, headers=header, params=payload)
# intakes_df = pd.DataFrame(intake_r.json())

while status_code == 200 and offset < entry_count:
    payload.update({"$offset":offset})
    intake_r = requests.get(intakes_api, headers=header, params=payload)
    status_code = intake_r.status_code
    intakes_df = pd.concat([intakes_df, pd.DataFrame(intake_r.json())], ignore_index=True)
    print(f"offset={offset}, status={intake_r.status_code}, shape={intakes_df.shape}")
    offset+=limit

intakes_df

offset=0, status=200, shape=(10000, 12)
offset=10000, status=200, shape=(20000, 12)
offset=20000, status=200, shape=(30000, 12)
offset=30000, status=200, shape=(40000, 12)
offset=40000, status=200, shape=(50000, 12)
offset=50000, status=200, shape=(60000, 12)
offset=60000, status=200, shape=(70000, 12)
offset=70000, status=200, shape=(80000, 12)
offset=80000, status=200, shape=(90000, 12)
offset=90000, status=200, shape=(100000, 12)
offset=100000, status=200, shape=(110000, 12)
offset=110000, status=200, shape=(117702, 12)


Unnamed: 0,animal_id,name,datetime,datetime2,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color
0,A786884,*Brock,2019-01-03T16:19:00.000,2019-01-03T16:19:00.000,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,2015-07-05T12:59:00.000,2015-07-05T12:59:00.000,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,2016-04-14T18:43:00.000,2016-04-14T18:43:00.000,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,2013-10-21T07:59:00.000,2013-10-21T07:59:00.000,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A682524,Rio,2014-06-29T10:38:00.000,2014-06-29T10:38:00.000,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray
...,...,...,...,...,...,...,...,...,...,...,...,...
117697,A818304,Sally,2020-06-05T11:38:00.000,2020-06-05T11:38:00.000,Travis (TX),Owner Surrender,Normal,Dog,Spayed Female,9 months,Labrador Retriever/Border Collie,Black/White
117698,A602060,*Blue Old Man,2020-05-15T14:24:00.000,2020-05-15T14:24:00.000,11005 American Mustang Loop in Austin (TX),Stray,Injured,Dog,Intact Male,10 years,Pit Bull Mix,Blue/White
117699,A818172,*Crabapple,2020-06-03T11:33:00.000,2020-06-03T11:33:00.000,8512 Silverthorne Street in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Pit Bull,Fawn/White
117700,A818203,*Veri-Beri,2020-06-03T15:34:00.000,2020-06-03T15:34:00.000,1602 E M Franklin Avenue in Austin (TX),Stray,Nursing,Dog,Intact Female,2 years,Pit Bull,Brown Brindle/White


In [5]:
entry_count

117702

In [9]:
header = {"client_id":f"{client_id}", "client_secret":f"{client_secret}"}
outcomes_api = "https://data.austintexas.gov/resource/9t4d-g238.json"
outcomes_df = get_full_dataset(outcomes_api, header)
outcomes_df

offset=0, status=200, shape=(0, 0)
offset=10000, status=200, shape=(10000, 12)
offset=20000, status=200, shape=(20000, 12)
offset=30000, status=200, shape=(30000, 12)
offset=40000, status=200, shape=(40000, 12)
offset=50000, status=200, shape=(50000, 12)
offset=60000, status=200, shape=(60000, 12)
offset=70000, status=200, shape=(70000, 12)
offset=80000, status=200, shape=(80000, 12)
offset=90000, status=200, shape=(90000, 12)
offset=100000, status=200, shape=(100000, 12)
offset=110000, status=200, shape=(110000, 12)


Unnamed: 0,animal_id,name,datetime,datetime2,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color
0,A786884,*Brock,2019-01-03T16:19:00.000,2019-01-03T16:19:00.000,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,2015-07-05T12:59:00.000,2015-07-05T12:59:00.000,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,2016-04-14T18:43:00.000,2016-04-14T18:43:00.000,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,2013-10-21T07:59:00.000,2013-10-21T07:59:00.000,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A682524,Rio,2014-06-29T10:38:00.000,2014-06-29T10:38:00.000,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray
...,...,...,...,...,...,...,...,...,...,...,...,...
117697,A818172,*Crabapple,2020-06-03T11:33:00.000,2020-06-03T11:33:00.000,8512 Silverthorne Street in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Pit Bull,Fawn/White
117698,A818203,*Veri-Beri,2020-06-03T15:34:00.000,2020-06-03T15:34:00.000,1602 E M Franklin Avenue in Austin (TX),Stray,Nursing,Dog,Intact Female,2 years,Pit Bull,Brown Brindle/White
117699,A818376,,2020-06-07T13:13:00.000,2020-06-07T13:13:00.000,"13332 High Sierra Street, in Manor (TX)",Stray,Normal,Dog,Neutered Male,4 years,Labrador Retriever,Black
117700,A818368,,2020-06-07T09:51:00.000,2020-06-07T09:51:00.000,Oak Springs Dr/ Gunter St. in Austin (TX),Stray,Normal,Dog,Intact Female,3 years,Queensland Heeler/Australian Shepherd,Brown Merle/White


In [12]:
intakes_df.sort_values(by=["animal_id", "datetime"])

Unnamed: 0,animal_id,name,datetime,datetime2,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color
79436,A006100,Scamp,2014-03-07T14:26:00.000,2014-03-07T14:26:00.000,8700 Research in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White
3777,A006100,Scamp,2014-12-19T10:21:00.000,2014-12-19T10:21:00.000,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White
17612,A006100,Scamp,2017-12-07T14:07:00.000,2017-12-07T14:07:00.000,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White
61777,A047759,Oreo,2014-04-02T15:55:00.000,2014-04-02T15:55:00.000,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,10 years,Dachshund,Tricolor
83961,A134067,Bandit,2013-11-16T09:02:00.000,2013-11-16T09:02:00.000,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White
...,...,...,...,...,...,...,...,...,...,...,...,...
117691,A818366,Cookie,2020-06-07T09:06:00.000,2020-06-07T09:06:00.000,14608 Pearce Ln in Austin (TX),Owner Surrender,Sick,Dog,Intact Male,2 months,Labrador Retriever,Black/Tan
117689,A818368,,2020-06-07T09:51:00.000,2020-06-07T09:51:00.000,Oak Springs Dr/ Gunter St. in Austin (TX),Stray,Normal,Dog,Intact Female,3 years,Queensland Heeler,Brown Merle
117695,A818369,,2020-06-07T11:59:00.000,2020-06-07T11:59:00.000,5000 Maufrais Ln in Austin (TX),Stray,Normal,Dog,Intact Female,1 year,German Shepherd,Tan
117696,A818370,,2020-06-07T12:07:00.000,2020-06-07T12:07:00.000,1215 W Slaughter in Austin (TX),Stray,Injured,Cat,Spayed Female,8 years,Domestic Shorthair,Tortie


In [None]:
# intial request
intakes_url = "https://data.austintexas.gov/resource/wter-evkm.json"
header = {"$client_id":client_id, "$client_secret":client_secret}
offset=0
limit=50000
payload = {"$order":":id", "$limit":limit, "$offset":offset}

intake_r = requests.get(intakes_url, headers=header, params=payload)
print(intake_r.url)
intakes_df = pd.DataFrame(intake_r.json())

offset+=limit
payload = {"$order":":id", "$limit":limit, "$offset":offset}
intake_r = requests.get(intakes_url, headers=header, params=payload)
print(intake_r.url)
intakes_2_df = pd.DataFrame(intake_r.json())


In [None]:
pd.concat([intakes_df, intakes_2_df],ignore_index=True).nunique()

In [None]:
intakes_2_df

In [None]:
intakes_df.sort_values(by = ["animal_id"]).to_csv("intakes_duplicates.csv")

In [22]:
pd.concat([intakes_df, outcomes_df], keys=["in", "out"])

Unnamed: 0,Unnamed: 1,animal_id,name,datetime,datetime2,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color
in,0,A786884,*Brock,2019-01-03T16:19:00.000,2019-01-03T16:19:00.000,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
in,1,A706918,Belle,2015-07-05T12:59:00.000,2015-07-05T12:59:00.000,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
in,2,A724273,Runster,2016-04-14T18:43:00.000,2016-04-14T18:43:00.000,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
in,3,A665644,,2013-10-21T07:59:00.000,2013-10-21T07:59:00.000,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
in,4,A682524,Rio,2014-06-29T10:38:00.000,2014-06-29T10:38:00.000,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray
...,...,...,...,...,...,...,...,...,...,...,...,...,...
out,117697,A818172,*Crabapple,2020-06-03T11:33:00.000,2020-06-03T11:33:00.000,8512 Silverthorne Street in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Pit Bull,Fawn/White
out,117698,A818203,*Veri-Beri,2020-06-03T15:34:00.000,2020-06-03T15:34:00.000,1602 E M Franklin Avenue in Austin (TX),Stray,Nursing,Dog,Intact Female,2 years,Pit Bull,Brown Brindle/White
out,117699,A818376,,2020-06-07T13:13:00.000,2020-06-07T13:13:00.000,"13332 High Sierra Street, in Manor (TX)",Stray,Normal,Dog,Neutered Male,4 years,Labrador Retriever,Black
out,117700,A818368,,2020-06-07T09:51:00.000,2020-06-07T09:51:00.000,Oak Springs Dr/ Gunter St. in Austin (TX),Stray,Normal,Dog,Intact Female,3 years,Queensland Heeler/Australian Shepherd,Brown Merle/White


In [18]:
intakes_df.columns == outcomes_df.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])