In [79]:
from IPython.display import HTML
HTML("""
<script>
var codeVisible = true; 
function code_toggle() {
 if (codeVisible){
   $('div.input').hide();
 } else {
   $('div.input').show();
 }
 codeVisible = !codeVisible
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle code visibility"></form>
""")

In [3]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

data_url = 'https://data.cityofnewyork.us/api/views/kpav-sd4t/rows.csv?accessType=DOWNLOAD'
source_df = pd.read_csv(data_url)
source_df = source_df.fillna('')
df = source_df.drop_duplicates('Job ID')

Method

- We will use the midpoint of the salary range for each position.
- For the purposes of this analysis all daily and hourly positions will be shown as full time:
  - Hourly Positions will be estimated at 40 hours a week for 50 weeks
  - Daily Salaries will be estimated at 5 days a week for 50 weeks
- Time the job is left open will be used as a proxy for difficulty to fill
  - This is calculated as the time from last update to the original posting date
- Duplicate rows are removed - only first occurence of each Job ID is shown
  
  
Notes:

Positions like City Seasonal Aide and City Park Worker have hundreds of low paying open positons

In [4]:
#df[df['Agency']=='FIRE DEPARTMENT'].head(2)

In [63]:
external_posted_job_ids = source_df[source_df['Posting Type'] == 'External']['Job ID'].unique() # get unique IDs from full dataframe
df['has_external_posting'] = [True if job_id in external_posted_job_ids else False for job_id in df['Job ID']]
df.has_external_posting.value_counts()

salary = df[['Agency','# Of Positions','Salary Range From', 'Salary Range To','Salary Frequency', 'has_external_posting']]
salary['salary_midpoint'] = salary['Salary Range From'] + (salary['Salary Range To'] - salary['Salary Range From'])/2

# Annualize Hourly and Daily rates (see http://stackoverflow.com/questions/12307099/modifying-a-subset-of-rows-in-a-pandas-dataframe)
salary.ix[df['Salary Frequency'] =='Hourly', 'salary_midpoint'] = salary.ix[df['Salary Frequency'] =='Hourly', 'salary_midpoint'] * 40 * 50
salary.ix[df['Salary Frequency'] =='Daily', 'salary_midpoint'] = salary.ix[df['Salary Frequency'] =='Daily', 'salary_midpoint'] * 5 * 50

salary['salary_total'] = salary['# Of Positions'] * salary['salary_midpoint']

salary_grouped = salary.groupby('Agency').sum()
salary[salary['Salary Frequency'] == 'Annual'].head(5)
salary_grouped['average_salary'] = salary_grouped['salary_total'] / salary_grouped['# Of Positions']
#ordered_salaries = salary_grouped[['# Of Positions', 'average_salary']].sort_values('average_salary', ascending=False)
ordered_salaries = salary_grouped[['average_salary']].sort_values('average_salary', ascending=True)
ordered_salaries = ordered_salaries.reset_index()

In [6]:
%%javascript
require.config({
    paths: {
        d3: '//d3js.org/d3.v4.min'
    }
});

<IPython.core.display.Javascript object>

In [64]:
from IPython.display import Javascript
Javascript('window.chartData={};'.format(ordered_salaries.to_json(orient='records')))

<IPython.core.display.Javascript object>

In [86]:
from IPython.display import HTML
HTML("""
<style>
div.text_cell_render {
    font-family: 'Roboto' sans-serif;
    text-align: justify;
    line-height: 130%;
    font-size: 115%;
    width:700px;
}

.bar {
  fill: steelblue;
}
.axis path,
.axis line {
  fill: none;
  stroke: #D4D8DA;
  stroke-width: 1px;
  shape-rendering: crispEdges;
}
.x path {
  display: none;
}
.toolTip {
	position: absolute;
  display: none;
  min-width: 80px;
  height: auto;
  background: none repeat scroll 0 0 #ffffff;
  border: 1px solid #6F257F;
  padding: 14px;
  text-align: center;
}
</style>
""")

In [114]:
%%javascript
require(['d3'], function(d3){
  $("#chart").remove();
  element.append("<div id='chart'></div>");
  $("#chart").width("960px");
  $("#chart").height("1200px");

  var margin = {top: 30, right: 20, bottom: 20, left: 120};
  var width = 960 - margin.left - margin.right;
  var height = 1100 - margin.top - margin.bottom;
  var svg = d3.select("#chart").append("svg")
    .attr("width", width + margin.left + margin.right)
    .attr("height", height + margin.top + margin.bottom)
    .append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
    
  var x = d3.scaleLinear().range([0, width-margin.left - margin.right]);
  var y = d3.scaleBand().range([height-margin.top - margin.bottom, 0]);
  var data = window.chartData

  var g = svg.append("g")
    .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
  x.domain([0, d3.max(data, function(d) { return d.average_salary; })]);
  y.domain(data.map(function(d) { return d.Agency; })).padding(0.1);
  g.append("g")
    .attr("class", "x axis")
    .call(d3.axisTop(x).ticks(5).tickFormat(function(d) { return d3.format("$,.2s")(d); }));
    
  svg.append("text")             
      .attr("transform","translate(" + (width/2) + " ," + 0 + ")")
      .style("text-anchor", "middle")
      .text("NYC City Jobs - Average Salary by Department");

  g.append("g")
    .attr("class", "y axis")
    .call(d3.axisLeft(y));
  g.selectAll(".bar")
    .data(data)
  .enter().append("rect")
    .attr("class", "bar")
    .attr("x", 0)
    .attr("height", y.bandwidth())
    .attr("y", function(d) { return y(d.Agency); })
    .attr("width", function(d) { return x(d.average_salary); })
});

<IPython.core.display.Javascript object>