In [48]:
#Change Notebook Styling and Add Button to Show/Hide Code
from IPython.display import HTML
HTML("""
<link href='https://fonts.googleapis.com/css?family=Roboto' rel='stylesheet' type='text/css'>
<style>
div.text_cell_render {
    font-family: 'Roboto' sans-serif;
    text-align: justify;
    line-height: 130%;
    font-size: 115%;
    width:700px;
}
div.output_area {
    font-family: 'Roboto' sans-serif;
    line-height: 130%;
    font-size: 115%;
    margin-left: 100px;
    width:750px;
}
.dataframe * {
    border: 1px solid lightgray !important;
}
div.output_prompt {display: none;}
div.prompt {display: none;}
"""
#div.output_area pre {display: none;}
"""
</style>
<script>
code_shown=true; 
function code_toggle() {
 if (code_shown){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_shown = !code_shown
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle code"></form>
""")

Dataset: https://data.cityofnewyork.us/Business/Legally-Operating-Businesses/w7w3-xahh
DCA Licenses: http://www1.nyc.gov/site/dca/businesses/licenses-apply.page

## A Look at NYC Business Licenses

New York requires dozens of types of businesses to register for a license through the Department of Community Affairs (DCA). Licenses cover everything from sidewalk cafes to laundromats to cigarette dealers and the data is made available through [NYC Open data](https://data.cityofnewyork.us/Business/Legally-Operating-Businesses/w7w3-xahh). This notebook explores the data.

In [35]:
import pandas as pd

df = pd.read_csv('data/Legally_Operating_Businesses.csv')
df.head(3)

Unnamed: 0,DCA License Number,License Type,License Expiration Date,License Category,Business Name,Business Name 2,Address Building,Address Street Name,Secondary Address Street Name,Address City,Address State,Address ZIP,Contact Phone Number,Address Borough,Detail
0,2015059-DCA,Business,07/31/2017,Secondhand Dealer - General,AGUIRRE INC.,,21,WESTCHESTER SQ,,BRONX,NY,10461,3478107977,Bronx,
1,1219822-DCA,Business,03/15/2017,Amusement Device Portable,LIFETIME VENDING &AMUSEMENTS INC,FIRE HOUSE,4410,AUSTIN BLVD,,ISLAND PARK,NY,11558,5166702294,Outside NYC,Device Name: FIRE HOUSE
2,1357603-DCA,Individual,02/28/2017,Home Improvement Salesperson,"BELFOR, HENRY",,1716,E 37TH ST,,BROOKLYN,NY,11234,9176783915,,


In [108]:
# Extracting Zip Code Map


df_zip = df[df['License Type'] == 'Business']
export_df = pd.DataFrame(df_zip['Address ZIP'].value_counts())

export_df.index.name = 'Zipcode'
export_df.columns = ['License Count']
#export_df.head()
export_df.to_csv('data/licenses_by_zip_all.csv',index=True)


In [36]:
# Count by Zip and Type
df_zip = df[df['License Type'] == 'Business']
#export_df = pd.DataFrame(df_zip[['Address ZIP', 'License Category']].value_counts())
export_df = df_zip[['Address ZIP', 'License Category']]
export_df = export_df.groupby(['Address ZIP', 'License Category']).size().reset_index()
#export_df.index.name = 'Zipcode'
export_df.columns = ['ZIP','License Category','License Count']
export_df.head(150)
#export_df.groupby(['Address ZIP', 'License Category']).count()
export_df.to_csv('data/licenses_by_zip_and_type.csv',index=False)


### License Overview

We exclude licenses for individuals leavning 49983 businesses.

In [64]:
print 'License Types Counts:'
pd.DataFrame(df['License Type'].value_counts())

License Types Counts:


Unnamed: 0,License Type
Business,49983
Individual,27497


In [65]:
print 'Licenses by Borough (Businesses Only):'
df_a = df[df['License Type'] == 'Business']['Address Borough'].fillna('Not Listed')
#Population Estimates: https://www1.nyc.gov/site/planning/data-maps/nyc-population/current-future-populations.page
populations = pd.Series({
        "Brooklyn":2636735,
        "Queens":2339150,
        "Manhattan":1644518,
        "Bronx":1455444,
        "Staten Island":474558
    })
df_l = df_a.value_counts().to_frame().join(populations.to_frame())
df_l = df_l.dropna()
df_l.columns = ['licenses','population']
df_l['Licenses Per 1000 Residents'] = df_l['licenses'] / (df_l['population'] / 1000)
df_l

Licenses by Borough (Businesses Only):


Unnamed: 0,licenses,population,Licenses Per 1000 Residents
Brooklyn,12775,2636735.0,4.845007
Queens,12283,2339150.0,5.251053
Manhattan,10874,1644518.0,6.612272
Bronx,6073,1455444.0,4.17261
Staten Island,2842,474558.0,5.988731


BK Top 3: Home Imp, Cig, Secondhand Dealer
Queens Top 3: Home Imp, Cig, Secondhand Dealer
Manhat Top 3: Cig, Secondhand Dealer, Home Imp

First three in varying order:
- Cigarette Retail Dealer
- Improvement Contractor
- Secondhand Dealer - General

#4 by count
- Manhattan: Sidewalk Cafe
- Bronx: Electronics Store
- Brooklyn: Electronics Store
- Staten Island: Amusement Device Portable
- Queens: Electronics Store


[Amusement Device Portable](http://www1.nyc.gov/site/dca/businesses/license-checklist-amusement-device-portable.page) is a license for "a truck-mounted and/or inflatable amusement device that is designed to be operated on the vehicle that transports it". Is this a bouncy house license?

When comparing licenses by borough the first three most frequent types of license were always 'Cigarette Retail Dealer', 'Home Improvement Contractor', and 'Secondhand Dealer - General' although the order varied.  Fourth place started to vary somewhat and is shown below:

In [89]:
fourth_place = pd.Series({
    'Manhattan': 'Sidewalk Cafe',
    'Brooklyn': 'Electronics Store',
    'Queens': 'Electronics Store',
    'Bronx': 'Electronics Store',
    'Staten Island': 'Amusement Device Portable'
    }, name='Fourth Most Common License')
pd.DataFrame(fourth_place.sort_values(axis=0, ascending=False))

Unnamed: 0,Fourth Most Common License
Manhattan,Sidewalk Cafe
Queens,Electronics Store
Brooklyn,Electronics Store
Bronx,Electronics Store
Staten Island,Amusement Device Portable


Note: sightseeing guides and temporary street fair vendors are excluded from this dataset

In [90]:
#df_f = df[df['License Category'] =='Employment Agency']
#df_f.head()
#len(df_f.index)
df_f = df[df['License Type'] == 'Business']['License Category']
print 'Licence Type Counts (Businesses Only)'
#pd.DataFrame(df_f.value_counts())
#pd.DataFrame(df_f[df['Address Borough']=='Brooklyn'].value_counts())
pd.DataFrame(df_f.value_counts())

Licence Type Counts (Businesses Only)


Unnamed: 0,License Category
Home Improvement Contractor,14278
Cigarette Retail Dealer,8973
Secondhand Dealer - General,5092
Electronics Store,4379
Laundry,2371
Stoop Line Stand,2016
Laundry Jobber,1609
Electronic & Appliance Service,1599
Debt Collection Agency,1502
Sidewalk Cafe,1421


### Licenses By Zipcode

In [44]:
%%javascript
require.config({
  paths: {
      cartodb: '//libs.cartocdn.com/cartodb.js/v3/3.15/cartodb.js'
  }
});
element.append("<div id='container'><div id='map'></div></div>");

<IPython.core.display.Javascript object>

In [47]:
from IPython.display import Javascript
#runs arbitrary javascript, client-side
Javascript("""
            options = {}
            window.onload = function() {
              cartodb.createVis('map', 'http://aplucche.carto.com/api/v2/viz/ba1b648a-7c45-11e6-acb6-0e98b61680bf/viz.json')
                .done(function(vis, layers) {
                // Set max zoom
                vis.map.set({
                  minZoom: 10,
                  maxZoom: 13
                });
              })
              .error(function(err) {
                console.log(err);
              });
            }
           """)

<IPython.core.display.Javascript object>

In [49]:
HTML("""
<script src="http://libs.cartocdn.com/cartodb.js/v3/3.15/cartodb.js"></script>
<div id='container'>
  <div id='map'></div>
</div>
<script>
options = {}
window.onload = function() {
  cartodb.createVis('map', 'http://aplucche.carto.com/api/v2/viz/ba1b648a-7c45-11e6-acb6-0e98b61680bf/viz.json')
    .done(function(vis, layers) {
    // Set max zoom
    vis.map.set({
      minZoom: 10,
      maxZoom: 13
    });
  })
  .error(function(err) {
    console.log(err);
  });
}
</script>
""")

### Map of Counts by Zip - All Licence Types

In [50]:
from IPython.display import HTML
HTML("""
<iframe width="100%" height="520" frameborder="0" src="https://aplucche.carto.com/viz/ba1b648a-7c45-11e6-acb6-0e98b61680bf/embed_map" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>
""")

### Map of Counts by Zip - Sidewalk Cafes

In [51]:
from IPython.display import HTML
HTML("""
<iframe width="100%" height="520" frameborder="0" src="https://aplucche.carto.com/viz/e2ed7bec-808b-11e6-aa89-0e3ff518bd15/embed_map" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>
""")