## Connect to Chicago Data Portal API - Business Licenses Data

In [1]:
#Import dependencies
import pandas as pd
import requests
import json

# Google developer API key
from config2 import API_chi_key

In [2]:
# Build API URL
# API calls = 8000 (based on zipcode and issued search results)
# Filters: 'application type' Issued
target_URL = f"https://data.cityofchicago.org/resource/xqx5-8hwx.json?$$app_token={API_chi_key}&$limit=8000&application_type=ISSUE&zip_code="

In [3]:
# Create list of zipcodes we are examining based
# on three different businesses of interest
zipcodes = ["60610","60607","60606","60661",
            "60614","60622","60647","60654"]

In [4]:
# Create a request to get json data on business licences
responses = []
for zipcode in zipcodes:
    license_response = requests.get(target_URL + zipcode).json()
    responses.append(license_response)

In [5]:
len(responses)

8

In [6]:
# Create sepearte variables for the 8 responses for zipcodes
# Data loaded in nested gropus based on zipcodes, so
# needed to make them separate
zip_60610 = responses[0]
zip_60607 = responses[1]
zip_60606 = responses[2]
zip_60661 = responses[3]
zip_60614 = responses[4]
zip_60622 = responses[5]
zip_60647 = responses[6]
zip_60654 = responses[7]

In [7]:
# Read zipcode_responses_busi.json files into pd DF
zip_60610_data = pd.DataFrame(zip_60610)

In [8]:
# Create list of the json object variables
# excluding zip_60610 bc that will start as a DF
zip_data = [zip_60607, zip_60606, zip_60661, zip_60614,
           zip_60622, zip_60647, zip_60654]

In [9]:
# Create a new DF to save compiled business data into
all_7_zipcodes = zip_60610_data

In [10]:
# Append json objects to all_7_zipcode DF
# Print length of all_7_zipcode to check adding correctly
for zipcodes_df in zip_data:
    all_7_zipcodes = all_7_zipcodes.append(zipcodes_df)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


In [11]:
len(all_7_zipcodes)

37679

In [12]:
# Get list of headers of all_7_zipcodes
list(all_7_zipcodes)

['account_number',
 'address',
 'application_created_date',
 'application_requirements_complete',
 'application_type',
 'business_activity',
 'business_activity_id',
 'city',
 'conditional_approval',
 'date_issued',
 'doing_business_as_name',
 'expiration_date',
 'id',
 'latitude',
 'legal_name',
 'license_approved_for_issuance',
 'license_code',
 'license_description',
 'license_id',
 'license_number',
 'license_start_date',
 'license_status',
 'license_status_change_date',
 'location',
 'longitude',
 'payment_date',
 'police_district',
 'precinct',
 'site_number',
 'ssa',
 'state',
 'ward',
 'ward_precinct',
 'zip_code']

In [13]:
# Select certain columns to show 
core_info_busi_licences = all_7_zipcodes[['legal_name', 'doing_business_as_name',
                                        'zip_code', 'license_description', 
                                        'business_activity', 'application_type', 
                                        'license_start_date', 'latitude', 'longitude']]

In [14]:
# Get an idea of the number of null values in each column
core_info_busi_licences.isna().sum()

legal_name                    0
doing_business_as_name        0
zip_code                      0
license_description           0
business_activity         15953
application_type              0
license_start_date          519
latitude                    622
longitude                   622
dtype: int64

In [15]:
# Add sepearate column for just the start year
# Will use later when selecting year businesess were created
core_info_busi_licences['start_year'] = core_info_busi_licences['license_start_date']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [16]:
# Edit 'start_year' to just include year from date information
core_info_busi_licences['start_year'] = core_info_busi_licences['start_year'].str[0:4]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
# Explore what kinds of businesses are missing "latitude" and "longitude"
# Also, the 'business_activity' licenses have null values (limited Business Licences?)
core_info_busi_licences[core_info_busi_licences.isnull().any(axis=1)]

Unnamed: 0,legal_name,doing_business_as_name,zip_code,license_description,business_activity,application_type,license_start_date,latitude,longitude,start_year
16,"HEAVENLY DAY SPA ON DIVISION, INC.",HM DAY SPA,60610,Limited Business License,,ISSUE,2009-12-10T00:00:00.000,,,2009
17,YOUNG MEN'S CHRISTIAN ASSOCIATION OF CHICAGO,YOUNG MEN'S CHRISTIAN ASSOCIATION OF CHICAGO,60610,Limited Business License,,ISSUE,2006-04-06T00:00:00.000,,,2006
18,"ALLYU 600 WEST, INC.",ALLYU SPA,60610,Limited Business License,,ISSUE,2007-08-23T00:00:00.000,,,2007
19,"4301 N Sheridan, LLC","4301 N Sheridan, LLC",60610,Residential Real Estate Developer,,ISSUE,2004-08-20T00:00:00.000,,,2004
20,"MARWEN FOUNDATION, INC.",MARWEN,60610,Children's Activities Facilities,,ISSUE,2012-04-06T00:00:00.000,,,2012
21,"FLIGHT 001, INC.","FLIGHT 001, INC",60610,Limited Business License,,ISSUE,2008-05-22T00:00:00.000,,,2008
22,christine noelle design inc,Christine Noelle Design,60610,Limited Business License,Retail Sales of General Merchandise,ISSUE,2013-07-22T00:00:00.000,,,2013
25,"ARETE & ASSOCIATES, LLC",FLORSHEIM MANSION,60610,Regulated Business License,"Bed and Breakfast Establishment, Owner-Occupie...",ISSUE,2014-05-02T00:00:00.000,,,2014
26,"DOWNTOWN ENTERTAINMENT, LLC",HQ BEERCADE II,60610,Retail Food Establishment,Retail Sales of Perishable Foods,ISSUE,2005-03-16T00:00:00.000,,,2005
28,TRATTORIA PIZZERIA ROMA INC.,WOODIE'S FLAT,60610,Outdoor Patio,Sale of Liquor Outdoors on Private Property,ISSUE,2015-03-26T00:00:00.000,,,2015


In [18]:
# Get rid of NaN values in 'latitude' and 'license_start_date'
core_info_busi_licences.dropna(subset=['latitude'], inplace=True)
core_info_busi_licences.dropna(subset=['license_start_date'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [21]:
core_info_busi_licences['application_type'].unique()

array(['ISSUE'], dtype=object)

In [22]:
# Cast 'start_year' column as an integer
core_info_busi_licences['start_year'] = core_info_busi_licences['start_year'].astype('int64')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [23]:
# Confirm that NaN values for 'latitude' and 'license_start_date' 
# were dropped
core_info_busi_licences.isna().sum()

legal_name                    0
doing_business_as_name        0
zip_code                      0
license_description           0
business_activity         15758
application_type              0
license_start_date            0
latitude                      0
longitude                     0
start_year                    0
dtype: int64

In [24]:
# Record number of businesses licenses pulled 
len(core_info_busi_licences)

36541

## Connect to sqlite database

In [None]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine
from config2 import mysql_password

In [None]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [None]:
# Create engine using the `demographics.sqlite` database file
# engine = create_engine("sqlite://", echo=False)

engine = create_engine(f'mysql://root:coolcat1015@localhost:3306/real_tech_db')

In [None]:
# Copy 'core_info_busi_licenses' db to MySql database
core_info_busi_licences.to_sql('business_licenses', 
                               con=engine, 
                               if_exists='replace',
                               index_label=True)