In [None]:
#Change Notebook Styling and Add Button to Show/Hide Code
from IPython.display import HTML
HTML("""
<link href='https://fonts.googleapis.com/css?family=Roboto' rel='stylesheet' type='text/css'>
<style>
div.text_cell_render {
    font-family: 'Roboto' sans-serif;
    text-align: justify;
    line-height: 130%;
    font-size: 115%;
    width:700px;
}
div.output_area {
    font-family: 'Roboto' sans-serif;
    line-height: 130%;
    font-size: 115%;
    margin-left: 100px;
    width:750px;
}
.dataframe * {
    border: 1px solid lightgray !important;
}
div.output_prompt {display: none;}
div.prompt {display: none;}
"""
#div.output_area pre {display: none;}
"""
</style>
<script>
code_shown=true; 
function code_toggle() {
 if (code_shown){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_shown = !code_shown
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle code"></form>
""")

## A Look at NYC Business Licenses

New York requires dozens of types of businesses to register for a license through the Department of Community Affairs (DCA). Licenses cover everything from sidewalk cafes to laundromats to cigarette dealers and the data is made available through [NYC Open data](https://data.cityofnewyork.us/Business/Legally-Operating-Businesses/w7w3-xahh). This notebook explores the data set.

Dataset: https://data.cityofnewyork.us/Business/Legally-Operating-Businesses/w7w3-xahh  
DCA License Information: http://www1.nyc.gov/site/dca/businesses/licenses-apply.page

### License Overview

Individual and Business licenses are recorded in this dataset. The majority of individual licenses (57.8%) are Home Improvement Salesperson Licenses with Tow Truck Drivers and Locksmiths taking the #2 and #3 spots.  Individual licenses are excluded from the rest of this notebook leaving 49,983 business licenses.


In [None]:
print 'License Types Counts:'
pd.DataFrame(df['License Type'].value_counts())

### Licenses by Borough (Businesses Only)

Manhattan has the most licenses per resident with Staten Island coming in 2nd place.

In [None]:
df_a = df[df['License Type'] == 'Business']['Address Borough'].fillna('Not Listed')
#Population Estimates: https://www1.nyc.gov/site/planning/data-maps/nyc-population/current-future-populations.page
populations = pd.Series({
        "Brooklyn":2636735,
        "Queens":2339150,
        "Manhattan":1644518,
        "Bronx":1455444,
        "Staten Island":474558
    })
df_l = df_a.value_counts().to_frame().join(populations.to_frame())
df_l = df_l.dropna()
df_l.columns = ['licenses','population']
df_l['Licenses Per 1000 Residents'] = df_l['licenses'] / (df_l['population'] / 1000)
df_l

*Population Estimate Source: https://www1.nyc.gov/site/planning/data-maps/nyc-population/current-future-populations.page*

### Fourth Most Common License Type by Borough

All Boroughs shared the most three common license types (although the order varied).  These were 'Cigarette Retail Dealer', 'Home Improvement Contractor', and 'Secondhand Dealer - General'.  Fourth place started to vary somewhat and is shown below:

In [None]:
fourth_place = pd.Series({
    'Manhattan': 'Sidewalk Cafe',
    'Brooklyn': 'Electronics Store',
    'Queens': 'Electronics Store',
    'Bronx': 'Electronics Store',
    'Staten Island': 'Amusement Device Portable'
    }, name='')
pd.DataFrame(fourth_place.sort_values(axis=0, ascending=False))

Note: sightseeing guides and temporary street fair vendors are excluded from this dataset

### All Licence Type Counts (Businesses Only)

In [None]:
df_f = df[df['License Type'] == 'Business']['License Category']
pd.DataFrame(df_f.value_counts())

In [None]:
# Exported dataframe for use in zipcode maps
# All Licenses:
df_zip = df[df['License Type'] == 'Business']
export_df = pd.DataFrame(df_zip['Address ZIP'].value_counts())
export_df.index.name = 'Zipcode'
export_df.columns = ['License Count']
#export_df.head()
export_df.to_csv('data/licenses_by_zip_all.csv',index=True)

# Licenses by type
df_zip = df[df['License Type'] == 'Business']
export_df = df_zip[['Address ZIP', 'License Category']]
export_df = export_df.groupby(['Address ZIP', 'License Category']).size().reset_index()
export_df.columns = ['ZIP','License Category','License Count']
export_df.to_csv('data/licenses_by_zip_and_type.csv',index=False)

### Map of Counts by Zip - All Licence Types

In [None]:
from IPython.display import HTML
HTML("""
<iframe width="100%" height="520" frameborder="0" src="https://aplucche.carto.com/viz/ba1b648a-7c45-11e6-acb6-0e98b61680bf/embed_map" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>
""")

### Map of Counts by Zip - Sidewalk Cafes

In [None]:
from IPython.display import HTML
HTML("""
<iframe width="100%" height="520" frameborder="0" src="https://aplucche.carto.com/viz/e2ed7bec-808b-11e6-aa89-0e3ff518bd15/embed_map" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>
""")

### Map of Counts by Zip - Cigarette Retail Dealers

In [None]:
HTML("""
<iframe width="100%" height="520" frameborder="0" src="https://aplucche.carto.com/viz/39d3f738-6912-48b0-b285-16e5644507fb/embed_map" allowfullscreen webkitallowfullscreen mozallowfullscreen oallowfullscreen msallowfullscreen></iframe>
""")

### Map of Counts by Zip - Debt Collection Agencies