In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from ipywidgets import widgets
import plotly.io as pio

In [2]:
payroll = pd.read_csv('/Users/Carlos/Desktop/Project_Files/Citywide_Payroll_Data__Fiscal_Year_.csv')


Columns (7,10,12,13,14,15,16) have mixed types. Specify dtype option on import or set low_memory=False.



In [3]:
pay = payroll.iloc[:500000, :]
pay['Base Salary'] = pay['Base Salary'].replace(('\,', '\.(.*)'), '', regex = True).astype(np.int64)
pay = pay.where(pay['Pay Basis'] == 'per Annum').dropna(how = 'all', axis = 0)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [4]:
mean = pay.groupby('Title Description')['Base Salary'].mean()
median = pay.groupby('Title Description')['Base Salary'].median()
minsalary = pay.groupby('Title Description')['Base Salary'].min()
maxsalary = pay.groupby('Title Description')['Base Salary'].max()
count = pay.groupby('Title Description')['Base Salary'].count()
frame = pd.DataFrame({'Mean' : mean, 'Median' : median, 'Minimum Salary' : minsalary, 'Maximum Salary' : maxsalary, 'Count' : count})
frame.reset_index(level= 'Title Description', inplace = True)
pd.DataFrame(frame)

Unnamed: 0,Title Description,Mean,Median,Minimum Salary,Maximum Salary,Count
0,* ATTENDING DENTIST,144040.0000,144040.0,144040.0,144040.0,1
1,*ADM DIR FLEET MAINTENANCE - NM,129039.0000,129039.0,129039.0,129039.0,1
2,*ADMIN SCHL SECUR MGR-MGL,143850.0000,143850.0,143850.0,143850.0,1
3,*ADMINISTRATIVE ATTORNEY,189233.0000,189233.0,172096.0,206370.0,2
4,*ASIST SYSTMS ANALYST,61546.0000,61546.0,56775.0,66317.0,2
...,...,...,...,...,...,...
1033,WATERSHED MAINTAINER,49844.0625,54243.0,35646.0,54742.0,256
1034,WIPER,85408.7500,85825.0,82495.0,85825.0,16
1035,WORKER'S COMPENSATION BENEFITS EXAMINER,46656.0000,46656.0,46656.0,46656.0,1
1036,X-RAY TECHNICIAN,55397.0000,53896.5,51566.0,62795.0,10


In [5]:
#Similar dataframe using .describe(), won't be used, just showing an alternative.
payer = pay.groupby('Title Description')['Base Salary'].apply(lambda x: pd.Series(x.values)).unstack().T
payer = payer.describe().T
payer.reset_index(level= 'Title Description', inplace = True)

In [6]:
paymath = pay.merge(frame, how = 'left')

In [7]:
fig = go.Figure()

# The code below is used to create the interactive visual, but the transition from Jupyter Notebook to GitHub removes the interactive elements. Please refer to the README.md file for more information.

In [8]:
use_date = widgets.Dropdown(
options = list(paymath['Fiscal Year'].unique()),
description = 'Year : ',
value = 2016.0)

job_list = list(paymath['Title Description'].unique())
status_list = list(paymath['Leave Status as of June 30'].unique())

container = widgets.HBox(childern = [use_date])

status = widgets.Dropdown(
    options = status_list,
    value = 'CEASED',
    description = 'Status : '
)


title = widgets.Dropdown(
    options = job_list,
    value = '* ATTENDING DENTIST',
    description = 'Job Title:   ',
)

filter_list = [i and j and k for i, j, k in zip(paymath['Fiscal Year'] == 2016.0,
                                                paymath['Leave Status as of June 30'] == 'CEASED',
                                                paymath['Title Description'] == '*ATTENDING DENTIST')]

trace1 = go.Scatter(x = paymath[filter_list]['Work Location Borough'], y = paymath[filter_list]['Base Salary'], 
                    mode = 'markers', marker_size = 10, name = 'Distribution')
trace2 = go.Bar(x = paymath[filter_list]['Mean'], y = paymath[filter_list]['Base Salary'], name = 'Mean')
trace3 = go.Bar(x = paymath[filter_list]['Median'], y = paymath[filter_list]['Base Salary'], name = 'Median')
trace4 = go.Bar(x = paymath[filter_list]['Minimum Salary'], y = paymath[filter_list]['Base Salary'], name = 'Minimum')
trace5 = go.Bar(x = paymath[filter_list]['Maximum Salary'], y = paymath[filter_list]['Base Salary'], name = 'Maximum')



g = go.FigureWidget(data = [trace1, trace2, trace3, trace4, trace5], layout = go.Layout(width = 550, height = 550, font_size = 12, 
                                                        title = dict(text = 'NYC Payroll by Borough')))

def validate():
    if title.value in paymath['Title Description'].unique():
        return True
    else:
        return False

def response(change):
    if validate():
        if use_date.value:
            filter_list = [i and j and k for i, j, k in zip(paymath['Fiscal Year'] == use_date.value, 
                                                            paymath['Leave Status as of June 30'] == status.value,
                                                            paymath['Title Description'] == title.value)]
            temp_df = paymath[filter_list]
        else:
            filter_list = [i and j for i,j in zip(paymath['Leave Status as of June 30'] == status.value,
                                                  paymath['Title Description'] == title.value)]
            temp_df = paymath[filter_list]
            
        with g.batch_update():
            g.data[0].x = temp_df['Work Location Borough']
            g.data[0].y = temp_df['Base Salary']
            g.data[1].x = ['Citywide Mean']
            g.data[1].y = temp_df['Mean']
            g.data[2].x = ['Citywide Median']
            g.data[2].y = temp_df['Median']
            g.data[3].x = ['Citywide Minimum']
            g.data[3].y = temp_df['Minimum Salary']
            g.data[4].x = ['Citywide Maximum']
            g.data[4].y = temp_df['Maximum Salary']
            g.layout.barmode = 'overlay'
            g.layout.xaxis.title = 'Location & Basic Math'
            g.layout.yaxis.title = 'Salary'
            
title.observe(response, names = "value")
status.observe(response, names = "value")
use_date.observe(response, names = "value")

container2 = widgets.HBox([title, status])
widgets.VBox([container, container2, g], renderer = 'svg')

VBox(children=(HBox(), HBox(children=(Dropdown(description='Job Title:   ', index=928, options=('COMMUNITY ASS…