# Dynamic Manning Report - Doctor Data

VADM Burke, Chief of Naval Personnel, ordered LTJGs Dannelly, Calcagno, Shaw, and Kuzma to build a prototype of a job-seeker / job-owner matching algorithm in one month to aid personnel detailing. This report offers an overview of the team’s results utilizing medical corps data provided by CDR Gingo. 

## Data Ingestion

In [1]:
#Import Libraries
import pandas as pd
import pref_metrics as pm

#Path to files
file_path = '/Users/z/Documents/Data Cleaning/'

#Sailors
S_df = pd.read_csv(file_path + "med_S.csv", index_col=0)

#Job Owners
O_df = pd.read_csv(file_path + "med_O.csv", index_col=0)

#Availability
A_df = pd.read_csv(file_path + "med_A.csv", index_col=0)

## Pre-Match Metrics
This section offers statistical overviews of the sailors and job owners in the given data-set.

In [2]:
#Overall numbers of Sailors and Job Owners
total_S, total_O = len(O_df.index), len(O_df.columns)

#Get unranked Job Owners and Sailors
unranked_O = list(O_df.T.columns[(O_df.T == total_S).all()])
unranked_S = list(S_df.T.columns[(S_df.T == total_O).all()])

#Get number of rankings for each comamnd
pref_counts_O = S_df.T.apply(pd.value_counts).drop(total_S)

#Get number of rankings for each sailor
pref_counts_S = O_df.T.apply(pd.value_counts).drop(total_O)

#Calculate Metrics
O_metric_df, S_sim_df = pm.pref_metrics(O_df)
S_metric_df, O_sim_df = pm.pref_metrics(S_df)

# Matching Algorithms

In [6]:
#Mixed Integer
from opt import opt
from da import da
MIP_results = opt(S_df, O_df, A_df)
algo_O_results = da(S_df, O_df, A_df,optimal='o')
algo_S_results = da(S_df, O_df, A_df,optimal='s')


                Solution status: optimal_inaccurate
                Num om Assignments given: 57
                Num of Assigments expected: 57.0
                


AttributeError: module 'matching' has no attribute 'Player'

In [212]:
matches = list(matched_results[matched_results > 0].stack().index)
def to_dict(touples):

    from collections import defaultdict

    d1 = defaultdict(list)

    for k, v in touples:
        d1[k].append(v)

    return dict((k, tuple(v)) for k, v in d1.items())
match_dict = to_dict(matches)
match_dict

{'command_A': ('officer_55',),
 'command_B': ('officer_33',),
 'command_C': ('officer_06', 'officer_55'),
 'command_D': ('officer_16',),
 'command_E': ('officer_11',
  'officer_12',
  'officer_17',
  'officer_24',
  'officer_46',
  'officer_49'),
 'command_F': ('officer_16',),
 'command_G': ('officer_16',),
 'command_H': ('officer_02',
  'officer_12',
  'officer_27',
  'officer_42',
  'officer_53',
  'officer_58'),
 'command_I': ('officer_07',),
 'command_J': ('officer_10', 'officer_22'),
 'command_K': ('officer_39', 'officer_42'),
 'command_L': ('officer_16', 'officer_18', 'officer_19', 'officer_47'),
 'command_M': ('officer_15', 'officer_29', 'officer_34', 'officer_54'),
 'command_N': ('officer_04',
  'officer_08',
  'officer_30',
  'officer_40',
  'officer_56'),
 'command_O': ('officer_06', 'officer_39', 'officer_42', 'officer_55'),
 'command_P': ('officer_01', 'officer_48'),
 'command_Q': ('officer_02',
  'officer_06',
  'officer_31',
  'officer_40',
  'officer_49'),
 'command_R': 

In [197]:
pref_counts = ((matched_results.T * O_df).astype(str) + '_' + (matched_results * S_df).T.astype(str)).stack().value_counts()
pref_df = pd.DataFrame(pref_counts).drop("0_0").reset_index()
goals = pref_df["index"].str.split("_", n = 1, expand = True)
labels = "JO-" + goals[0]
pref_viz_df = pd.DataFrame(columns=["Source","Target","Value","Color","Labels"])
pref_viz_df["Value"] = pref_df[0]
pref_viz_df["Labels"] = labels.append("S-" + goals[1], ignore_index=True).drop_duplicates().reset_index(drop=True)
pref_viz_df["Color"] = "#262C46"
pref_viz_df.fillna(0, inplace=True)
for index, row in goals.iterrows():
    pref_viz_df.at[index,"Source"] = pref_viz_df[pref_viz_df.Labels.str.contains("JO-" + row[0], na=False)].index.tolist()[0]
    pref_viz_df.at[index,"Target"] = pref_viz_df[pref_viz_df.Labels.str.contains("S-" + row[1], na=False)].index.tolist()[0]

pref_viz_df

Unnamed: 0,Source,Target,Value,Color,Labels
0,0,7,10,#262C46,JO-1
1,1,7,9,#262C46,JO-2
2,2,7,4,#262C46,JO-4
3,3,7,4,#262C46,JO-3
4,1,8,4,#262C46,JO-5
5,4,7,4,#262C46,JO-20
6,3,8,4,#262C46,JO-6
7,3,9,3,#262C46,S-1
8,5,9,2,#262C46,S-2
9,0,10,2,#262C46,S-3


In [200]:
import plotly.plotly as py

data_trace = dict(
    type='sankey',
    domain = dict(
      x =  [0,1],
      y =  [0,1]
    ),
    orientation = "h",
    valueformat = ".0f",
    node = dict(
      pad = 10,
      thickness = 30,
      line = dict(
        color = "black",
        width = 0.5
      ),
      label =  pref_viz_df['Labels'].dropna(axis=0, how='any'),
      color = pref_viz_df['Color']
    ),
    link = dict(
      source = pref_viz_df['Source'].dropna(axis=0, how='any'),
      target = pref_viz_df['Target'].dropna(axis=0, how='any'),
      value = pref_viz_df['Value'].dropna(axis=0, how='any'),
  )
)

layout =  dict(
    title = "Results: Job Owner Preference to Sailor Preference",
    height = 772,
    width = 950,
    font = dict(
      size = 10
    ),    
)


fig = dict(data=[data_trace], layout=layout)
py.iplot(fig, validate=False)

NameError: name 'test' is not defined

Unnamed: 0,Competitiveness,Specialization
officer_01,0.9311,0.794872
officer_02,0.9321,0.89071
officer_03,0.9251,0.240506
officer_04,0.9278,0.722222
officer_05,0.9279,0.57265
officer_06,0.9298,0.798489
officer_07,0.9321,0.699248
officer_08,0.9268,0.810427
officer_09,0.9242,0.0
officer_10,0.9283,0.804401


In [217]:
fig = {
    'data': [
      {
      'x': O_metric_df.Competitiveness, 
            'y': O_metric_df.Specialization, 
            'text': O_metric_df.index, 
            'mode': 'markers', 
            'name': '2007'}
    ],
    'layout': {
        'xaxis': {'title': 'Competitiveness', 'type': 'log'},
        'yaxis': {'title': "Specialization"}
    }
}

# IPython notebook
py.iplot(fig, filename='out')


In [None]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

import pandas as pd
import numpy as np


N = 20
x = np.linspace(1, 10, N)
y = np.random.randn(N)+3
y2 = np.random.randn(N)+6
y3 = np.random.randn(N)+9
y4 = np.random.randn(N)+12
df = pd.DataFrame({'x': x, 'y': y, 'y2':y2, 'y3':y3, 'y4':y4})
df.head()

data = [
    go.Bar(
        x=df.x, # assign x as the dataframe column 'x'
        y=df.y1,
        name = "Perf 1"
    ),
    go.Bar(
        x=test.index,
        y=test['Pref 2'],
        name = "Perf 2"
    ),
    go.Bar(
        x=test.index,
        y=test['Pref 3'],
        name = "Perf 3"
    )

]

layout = go.Layout(
    barmode='stack',
    title='Stacked Bar with Pandas',
    yaxis=dict(
        tickformat='d'
    )
)

fig = go.Figure(data=data, layout=layout)

# IPython notebook
py.iplot(fig, filename='pandas-bar-chart-layout')