## Obtain Large Datasets  
The file naming conventions should already be in the gitignore file so these files will never be pushed to the repo.

In [2]:
# file urls - main website: https://data.cityofchicago.org/Health-Human-Services/Food-Inspections/4ijn-s7e5

# food inspections
url_food_inspections_cvs = 'https://data.cityofchicago.org/api/views/4ijn-s7e5/rows.csv?accessType=DOWNLOAD'
url_food_inspections_soda = 'https://data.cityofchicago.org/resource/4ijn-s7e5.json'

# all business licenses
url_all_businesses_csv = 'https://data.cityofchicago.org/api/views/r5kz-chrr/rows.csv?accessType=DOWNLOAD'
url_all_businesses_soda = 'https://data.cityofchicago.org/resource/r5kz-chrr.json'

# active business licenses
url_active_businesses_csv = 'https://data.cityofchicago.org/api/views/uupf-x98q/rows.csv?accessType=DOWNLOAD'
url_active_businesses_soda = 'https://data.cityofchicago.org/resource/uupf-x98q.json'  

# city boundaries
url_boundaries_shapefile = 'https://data.cityofchicago.org/api/geospatial/ewy2-6yfk?method=export&format=Shapefile'
url_boundaries_soda = 'https://data.cityofchicago.org/resource/qqq8-j68g.json'

### Obtain Data through CSV Download

In [23]:
# Dependencies
import os
import requests

In [24]:
# dictionary of file names and file doownload link
dataset_id = {'food_inspections.csv':url_food_inspections_cvs, 
              'all_licensed_businesses.csv': url_all_businesses_csv, 
              'active_licensed_businesses.csv':url_active_businesses_csv, 
              'Chicago_Boundaries.zip':url_boundaries_shapefile}

In [25]:
# accesss each dataset and save it within the directory
for k,v in dataset_id.items():
    response = requests.get(v)
    with open(os.path.join(k), 'wb') as f:
        f.write(response.content)

### Obtain Data through Socrata API  
With this API I could add additional filters to my query but for my data analysis purposes I would like to start with all the data.  
I am not continuing with this method since I don't want to spend time paginating the results and dealing with potential throttling issues.

In [None]:
# Dependencies
import pandas as pd
from sodapy import Socrata

In [20]:
# dictionary of file names and Socrata API id
dataset_id = {'food_inspections.csv':'4ijn-s7e5', 
              'all_licensed_businesses.csv': 'r5kz-chrr', 
              'active_licensed_businesses.csv':'uupf-x98q', 
              'Chicago_Boundaries.zip':'qqq8-j68g'}

In [22]:
# This returns the first 999 records
# access dataset via API and use pandas to write it to a file
client = Socrata("data.cityofchicago.org", None)
for k,v in dataset_id.items():
    results = client.get(v)
    results_df = pd.DataFrame.from_records(results)
    results_df.to_csv(f'{k}')
    del results_df



### Other interesting datasets  
I may want to add this dataset to the project.  In this case, since I only want restaurant complaints then the Socrata API might be helpful but I could do the same with Pandas also very easily.  

In [None]:
# 311 Restaurant Complaints
url = 'https://data.cityofchicago.org/Service-Requests/311-Service-Requests/v6vf-nfxy'