In [1]:
# Import dependencies
import os
import pandas as pd
import numpy as np
from sodapy import Socrata

# Import App Token
from config import SODAPY_APPTOKEN

# Socrata API
API endpoint: https://information.stpaul.gov/resource/fgbn-288b.json

In [10]:
# Set up variables for api endpoint and dataset identifier
query_url = 'information.stpaul.gov'
data_id = 'fgbn-288b'

# Authenticate the request
client = Socrata(query_url, SODAPY_APPTOKEN)

# Returns results as JSON from API / converted to Python list of dictionaries by sodapy
results = client.get(data_id)
''' *****NOTE: Need to sort thru how to do pages because this works for now, but the dataset will only continue to grow.
Will undoubtedly become problematic.  Will work for now though.'''


# Convert to pandas DataFrame
vacant_df = pd.DataFrame.from_records(results)
pd.set_option('display.max_columns', None)
vacant_df.head()

Unnamed: 0,address,vacant_as_of,dwelling_type,sale_review_category,ward,district,census_tract,map_location,:@computed_region_jitf_9zyt,:@computed_region_dysk_fpxp,:@computed_region_crxq_iug7,:@computed_region_5wjj_5at9
0,754 VAN BUREN AVE,2020-07-30T00:00:00.000,Duplex,2,1,7,32400,"{'latitude': '44.96190507', 'longitude': '-93....",10,7,67,56
1,2132 3RD ST E,2020-07-30T00:00:00.000,Single Family Residential,1,7,1,34702,"{'latitude': '44.95653861', 'longitude': '-93....",16,2,155,44
2,197 LEXINGTON PKWY S,2020-07-27T00:00:00.000,Single Family Residential,1,2,16,35300,"{'latitude': '44.93551107', 'longitude': '-93....",8,5,44,4
3,892 JESSAMINE AVE E,2020-07-22T00:00:00.000,Duplex,1,6,5,31100,"{'latitude': '44.97466208', 'longitude': '-93....",13,1,194,72
4,1008 CONCORDIA AVE,2020-07-22T00:00:00.000,Single Family Residential,2,1,8,33500,"{'latitude': '44.95111962', 'longitude': '-93....",9,7,56,40


# Exploratory Analysis

In [5]:
# Dwelling types
vacant_df['dwelling_type'].unique()

array(['Duplex', 'Single Family Residential', 'Commercial',
       'Multi-family Residential', 'Mixed Use'], dtype=object)

In [12]:
# Find oldest and most recent date
# 'vacant_as_of' column is an object, change it to datetime
vacant_df['vacant_as_of'] = pd.to_datetime(vacant_df['vacant_as_of'])

# Preview
vacant_df.tail()

Unnamed: 0,address,vacant_as_of,dwelling_type,sale_review_category,ward,district,census_tract,map_location,:@computed_region_jitf_9zyt,:@computed_region_dysk_fpxp,:@computed_region_crxq_iug7,:@computed_region_5wjj_5at9
452,1308 VICTORIA ST N,2001-10-30,Single Family Residential,2,5,10,30300,"{'latitude': '44.98060192', 'longitude': '-93....",2,3,70,76
453,466 LAUREL AVE,2000-05-03,Single Family Residential,3,1,8,35500,"{'latitude': '44.94523453', 'longitude': '-93....",9,7,87,31
454,847 HUDSON ROAD,1999-08-10,Commercial,3,7,4,34400,"{'latitude': '44.95238319', 'longitude': '-93....",15,2,197,36
455,395 MICHIGAN ST,1996-05-15,Single Family Residential,3,2,9,36900,"{'latitude': '44.93525986', 'longitude': '-93....",11,5,89,16
456,365 MICHIGAN ST,1995-07-12,Single Family Residential,2,2,9,36000,"{'latitude': '44.93495123', 'longitude': '-93....",11,5,94,17


In [18]:
# Datset start and end dates
recently_vacant = vacant_df.iloc[0][1]
print(f"The most recent vacant lot was recorded: {recently_vacant}")
oldest_vacant = vacant_df.iloc[-1][1]
print(f"The oldest recorded vacant lot is: {oldest_vacant}")

The most recent vacant lot was recorded: 2020-07-30 00:00:00
The oldest recorded vacant lot is: 1995-07-12 00:00:00
