In [114]:
# Put all the imports here
import folium
import pandas as pd
import plotly as ply
import plotly.graph_objs as go

# Make plotly work in offline mode
from plotly.offline import init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [99]:
# Load data
h1b_frame = pd.read_csv('./data/h1b.csv') # dataset to large, please download from README.md
geo_frame = pd.read_csv('./data/state_geocodes.csv')

In [7]:
#### Heat Map of Applications by State ######
states = geo_frame['name'].tolist()
states_frame = geo_frame.drop('fips', axis=1)
states_frame['count'] = 0
states_frame = states_frame.set_index('name')

top_cities = h1b_frame['WORKSITE'].value_counts()[:100]

for , count in top_cities.items():
    state = worksite.split(',')[1].strip().lower().title()
    states_frame.at[state, 'count'] = count
states_frame = states_frame[states_frame['count'] != 0]

# plot
map_data = './data/map/us_states.json'
m = folium.Map(location=[37, -102], zoom_start=5)

m.choropleth(
 geo_data=map_data,
 name='2011-2016 H-1B Visa Applications',
 data=states_frame,
 columns=['code', 'count'],
 key_on='feature.id',
 fill_color='YlGn',
 fill_opacity=0.7,
 line_opacity=0.2,
 legend_name='# of Applications'
)

folium.LayerControl().add_to(m)
m.save('./graphs/applications_by_states.html')

In [163]:
# Histogram for number of applications per year
years_frame_non_cert = h1b_frame.loc[h1b_frame['CASE_STATUS'] != 'CERTIFIED'].groupby('YEAR').count()
X = years_frame_non_cert.index.values
Y = years_frame_non_cert[['Unnamed: 0']].values.flatten()
trace_non_cert = go.Bar(x=X, 
                        y=Y, 
                        name="Non-Certified",
                        text=Y, 
                        textposition = 'auto')

# Histogram for number of applications per year that are 'CERTIFIED', 
# which means 'meeting the minumum requirement'.
years_frame_cert = h1b_frame.loc[h1b_frame['CASE_STATUS'] == 'CERTIFIED'].groupby('YEAR').count()
X = years_frame_cert.index.values
Y = years_frame_cert[['Unnamed: 0']].values.flatten()
trace_cert = go.Bar(x=X, 
                    y=Y, 
                    name="Certified", 
                    text=Y, 
                    textposition = 'auto')

data = [trace_cert, trace_non_cert]
layout = go.Layout(
    title="Certified vs Non-Certified H1B applications per Year",
    barmode='stack'  
)
fig = go.Figure(
    data=data, 
    layout=layout
)
iplot(fig, 
      filename='stacked-bar', 
)

In [216]:
# Certified H1Bs by State
t = h1b_frame.loc[h1b_frame['CASE_STATUS'] == 'CERTIFIED']['WORKSITE'].value_counts()

states = geo_frame['name'].tolist()
states_frame = geo_frame.drop('fips', axis=1)
states_frame['count'] = 0
states_frame = states_frame.set_index('name')

for worksite, count in t.items():
    state = worksite.split(',')[1].strip().lower().title()
    if state in states_frame.index.values:
        states_frame.at[state, 'count'] += count

        
states_frame = states_frame.sort_values('count')
X = states_frame[['code']].values.flatten()
Y = states_frame[['count']].values.flatten()
trace_states = go.Bar(x=X, 
                    y=Y, 
                    name="State", 
                    text=Y, 
                    textposition = 'auto')

data = [trace_states]
layout = go.Layout(
    title="H1Bs by State",
    barmode='stack'  
)
fig = go.Figure(
    data=data, 
    layout=layout
)
iplot(fig, 
      filename='stacked-bar', 
)

# Top 10 states 
X = states_frame[['code']].values.flatten()[-10:]
Y = states_frame[['count']].values.flatten()[-10:]
trace_states = go.Bar(x=X, 
                    y=Y, 
                    name="State", 
                    text=Y, 
                    textposition = 'auto')

data = [trace_states]
layout = go.Layout(
    title="H1Bs by State",
    barmode='stack'  
)
fig = go.Figure(
    data=data, 
    layout=layout
)
iplot(fig, 
      filename='stacked-bar', 
)

In [207]:
states_frame

Unnamed: 0_level_0,code,count
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,AL,8578
Alaska,AK,1066
Arizona,AZ,37075
Arkansas,AR,13334
California,CA,482904
Colorado,CO,27078
Connecticut,CT,44883
Delaware,DE,15907
District Of Columbia,DC,19262
Florida,FL,91728


In [212]:
states_frame[['code']]

Unnamed: 0_level_0,code
name,Unnamed: 1_level_1
Wyoming,WY
Montana,MT
Alaska,AK
South Dakota,SD
Vermont,VT
West Virginia,WV
North Dakota,ND
Hawaii,HI
Idaho,ID
Maine,ME
