<a href="https://colab.research.google.com/github/j-buss/wi-dpi-analysis/blob/development/eda/6.1_Salary_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Salary and Education in Wisconsin - 6.1 Salary Prediction


Salary prediction for all percentage of employees

### Import Libraries

In [0]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)
import seaborn as sns
import matplotlib.pyplot as plt

from google.cloud import bigquery

In [0]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [0]:
%matplotlib inline
plt.style.use('bmh')

### Functions

In [0]:
# To help with the pie charts later
def make_autopct(values):
  def my_autopct(pct):
    total = sum(values)
    val = int(round(pct*total/100.0))
    return '{p:.2f}% \n({v:,d})'.format(p=pct,v=val)
  return my_autopct

### Processing

In [0]:
# Authenticate to GCS.
from google.colab import auth
auth.authenticate_user()

In [0]:
project_id='wi-dpi-010'
landing_dataset_name='landing'
refined_dataset_name='refined'
gold_dataset_name='gold'

In [0]:
bq_client = bigquery.Client(project=project_id)

## Analysis


### How Many People Have a Salary?

In [0]:
select_people_w_salary = '''
SELECT
  count(distinct research_id)
FROM
  {}.{}
WHERE
  salary is not null
'''.format(refined_dataset_name, '2017_all_staff_report')

### Probability density for # of Records

In [0]:
select_2017 = '''
SELECT
  research_id,
  gender,
  race_ethnicity_cd,
  birth_year,
  contract_high_degree_cd,
  contract_days,
  contract_local_experience,
  contract_total_experience,
  cesa_num,
  
  assignment_fte,
  assignment_requires_dpi_license,
  assignment_staff_category_cd,
  position_classification,
  assignment_position_cd,
  assignment_area_cd,
  assignment_work_school_level_cd,
  assignment_work_agency_cd,
  salary
FROM
  {}.{}
WHERE
  assignment_fte = 1 and salary is not null
'''.format(refined_dataset_name, '2017_all_staff_report')

In [10]:
df = pd.read_gbq(select_2017, project_id=project_id)

  """Entry point for launching an IPython kernel.


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55633 entries, 0 to 55632
Data columns (total 18 columns):
research_id                        55633 non-null int64
gender                             55633 non-null object
race_ethnicity_cd                  55633 non-null object
birth_year                         55633 non-null int64
contract_high_degree_cd            55628 non-null object
contract_days                      55633 non-null int64
contract_local_experience          55633 non-null float64
contract_total_experience          55633 non-null float64
cesa_num                           55572 non-null object
assignment_fte                     55633 non-null float64
assignment_requires_dpi_license    55633 non-null object
assignment_staff_category_cd       55633 non-null object
position_classification            55633 non-null object
assignment_position_cd             55633 non-null object
assignment_area_cd                 55633 non-null object
assignment_work_school_level_cd    5

In [12]:
df.head(10)

Unnamed: 0,research_id,gender,race_ethnicity_cd,birth_year,contract_high_degree_cd,contract_days,contract_local_experience,contract_total_experience,cesa_num,assignment_fte,assignment_requires_dpi_license,assignment_staff_category_cd,position_classification,assignment_position_cd,assignment_area_cd,assignment_work_school_level_cd,assignment_work_agency_cd,salary
0,47079,F,W,1959,5,260,2.0,36.0,10,1.0,Y,1,Administrators,5,0,,7,108840.0
1,66962,M,W,1972,5,260,1.0,1.0,5,1.0,Y,1,Administrators,5,0,,14,118000.0
2,29934,F,W,1971,5,260,2.0,20.0,2,1.0,Y,1,Administrators,5,0,,63,92000.0
3,41977,M,W,1972,5,240,8.0,21.0,4,1.0,Y,1,Administrators,5,0,,91,109629.0
4,21817,F,W,1953,7,260,6.0,6.0,10,1.0,Y,1,Administrators,5,0,,112,136331.0
5,66608,M,W,1963,6,261,3.0,31.0,11,1.0,Y,1,Administrators,5,0,,119,145550.0
6,53446,F,W,1967,7,261,1.0,21.0,9,1.0,Y,1,Administrators,5,0,,140,150000.0
7,50365,F,W,1957,7,261,23.0,40.0,6,1.0,Y,1,Administrators,5,0,,147,185000.0
8,90990,M,W,1960,5,260,19.0,34.0,4,1.0,Y,1,Administrators,5,0,,154,124258.0
9,22531,M,W,1965,7,260,8.0,8.0,12,1.0,Y,1,Administrators,5,0,,170,145030.0


### Predict Salary for Full Time Teachers in Only One Role

#### Version 1

In [0]:
df_v1 = df[['gender','birth_year','contract_total_experience','salary']]
df_v1 = pd.concat([df_v1.drop('gender', axis=1), pd.get_dummies(df_v1['gender'],prefix='gender')], axis=1)

In [48]:
df_v1.head()

Unnamed: 0,birth_year,contract_total_experience,salary,gender_F,gender_M
0,1959,36.0,108840.0,1,0
1,1972,1.0,118000.0,0,1
2,1971,20.0,92000.0,1,0
3,1972,21.0,109629.0,0,1
4,1953,6.0,136331.0,1,0


In [0]:
target_v1 = df_v1['salary']
data_v1 = df_v1.drop('salary', axis=1)

In [50]:
X_train, X_test, y_train, y_test = train_test_split(data_v1, target_v1, test_size=0.25, random_state=0)
model_v1 = LinearRegression()
model_v1.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [51]:
model_v1.score(X_test, y_test)

0.368015985502004

#### Version 2

In [0]:
df_v2 = df.copy()
df_v2 = df_v2.drop(['research_id','assignment_requires_dpi_license','assignment_position_cd','assignment_area_cd','assignment_work_agency_cd'], axis=1)

In [0]:

df_v2 = pd.concat([df_v2.drop('gender', axis=1), pd.get_dummies(df_v2['gender'],prefix='gender')], axis=1)
df_v2 = pd.concat([df_v2.drop('race_ethnicity_cd', axis=1), pd.get_dummies(df_v2['race_ethnicity_cd'],prefix='ethnicity')], axis=1)
df_v2 = pd.concat([df_v2.drop('contract_high_degree_cd', axis=1), pd.get_dummies(df_v2['contract_high_degree_cd'],prefix='degree')], axis=1)
df_v2 = pd.concat([df_v2.drop('cesa_num', axis=1), pd.get_dummies(df_v2['cesa_num'],prefix='cesa')], axis=1)
df_v2 = pd.concat([df_v2.drop('assignment_staff_category_cd', axis=1), pd.get_dummies(df_v2['assignment_staff_category_cd'],prefix='assignment_staff_category_cd')], axis=1)
df_v2 = pd.concat([df_v2.drop('position_classification', axis=1), pd.get_dummies(df_v2['position_classification'])], axis=1)
df_v2 = pd.concat([df_v2.drop('assignment_work_school_level_cd', axis=1), pd.get_dummies(df_v2['assignment_work_school_level_cd'],prefix='assignment_work_school_level_cd')], axis=1)

In [0]:
df_v2.columns = df_v2.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [55]:
df_v2.head()

Unnamed: 0,birth_year,contract_days,contract_local_experience,contract_total_experience,assignment_fte,salary,gender_f,gender_m,ethnicity_a,ethnicity_b,ethnicity_h,ethnicity_i,ethnicity_p,ethnicity_t,ethnicity_w,degree_3,degree_4,degree_5,degree_6,degree_7,degree_8,cesa_1,cesa_2,cesa_3,cesa_4,cesa_5,cesa_6,cesa_7,cesa_8,cesa_9,cesa_10,cesa_11,cesa_12,assignment_staff_category_cd_0,assignment_staff_category_cd_1,assignment_staff_category_cd_3,administrators,other,pupil_services,teachers,assignment_work_school_level_cd_3,assignment_work_school_level_cd_4,assignment_work_school_level_cd_5,assignment_work_school_level_cd_6,assignment_work_school_level_cd_7
0,1959,260,2.0,36.0,1.0,108840.0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0
1,1972,260,1.0,1.0,1.0,118000.0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0
2,1971,260,2.0,20.0,1.0,92000.0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0
3,1972,240,8.0,21.0,1.0,109629.0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0
4,1953,260,6.0,6.0,1.0,136331.0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0


In [0]:
target_v2 = df_v2['salary']
data_v2 = df_v2.drop('salary', axis=1)

In [57]:
X_train, X_test, y_train, y_test = train_test_split(data_v2, target_v2, test_size=0.2, random_state=0)
model_v2 = LinearRegression()
model_v2.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [58]:
model_v2.score(X_test, y_test)

0.7967505594073234

### Version 3

In [59]:
df.head(10)

Unnamed: 0,research_id,gender,race_ethnicity_cd,birth_year,contract_high_degree_cd,contract_days,contract_local_experience,contract_total_experience,cesa_num,assignment_fte,assignment_requires_dpi_license,assignment_staff_category_cd,position_classification,assignment_position_cd,assignment_area_cd,assignment_work_school_level_cd,assignment_work_agency_cd,salary
0,47079,F,W,1959,5,260,2.0,36.0,10,1.0,Y,1,Administrators,5,0,,7,108840.0
1,66962,M,W,1972,5,260,1.0,1.0,5,1.0,Y,1,Administrators,5,0,,14,118000.0
2,29934,F,W,1971,5,260,2.0,20.0,2,1.0,Y,1,Administrators,5,0,,63,92000.0
3,41977,M,W,1972,5,240,8.0,21.0,4,1.0,Y,1,Administrators,5,0,,91,109629.0
4,21817,F,W,1953,7,260,6.0,6.0,10,1.0,Y,1,Administrators,5,0,,112,136331.0
5,66608,M,W,1963,6,261,3.0,31.0,11,1.0,Y,1,Administrators,5,0,,119,145550.0
6,53446,F,W,1967,7,261,1.0,21.0,9,1.0,Y,1,Administrators,5,0,,140,150000.0
7,50365,F,W,1957,7,261,23.0,40.0,6,1.0,Y,1,Administrators,5,0,,147,185000.0
8,90990,M,W,1960,5,260,19.0,34.0,4,1.0,Y,1,Administrators,5,0,,154,124258.0
9,22531,M,W,1965,7,260,8.0,8.0,12,1.0,Y,1,Administrators,5,0,,170,145030.0


In [0]:
df_v3 = df.copy()
df_v3['assignment_requires_dpi_license'] = df_v3['assignment_requires_dpi_license'].replace(to_replace={'Y':1, 'N':0})
df_v3 = df_v3.drop(['research_id'], axis=1)

In [0]:
df_v3 = pd.concat([df_v3.drop('gender', axis=1), pd.get_dummies(df_v3['gender'],prefix='gender')], axis=1)
df_v3 = pd.concat([df_v3.drop('race_ethnicity_cd', axis=1), pd.get_dummies(df_v3['race_ethnicity_cd'],prefix='ethnicity')], axis=1)
df_v3 = pd.concat([df_v3.drop('contract_high_degree_cd', axis=1), pd.get_dummies(df_v3['contract_high_degree_cd'],prefix='degree')], axis=1)
df_v3 = pd.concat([df_v3.drop('cesa_num', axis=1), pd.get_dummies(df_v3['cesa_num'],prefix='cesa')], axis=1)
df_v3 = pd.concat([df_v3.drop('assignment_staff_category_cd', axis=1), pd.get_dummies(df_v3['assignment_staff_category_cd'],prefix='assignment_staff_category_cd')], axis=1)
df_v3 = pd.concat([df_v3.drop('position_classification', axis=1), pd.get_dummies(df_v3['position_classification'])], axis=1)
df_v3 = pd.concat([df_v3.drop('assignment_work_school_level_cd', axis=1), pd.get_dummies(df_v3['assignment_work_school_level_cd'],prefix='assignment_work_school_level_cd')], axis=1)

df_v3 = pd.concat([df_v3.drop('assignment_position_cd', axis=1), pd.get_dummies(df_v3['assignment_position_cd'],prefix='assignment_position_cd')], axis=1)
df_v3 = pd.concat([df_v3.drop('assignment_work_agency_cd', axis=1), pd.get_dummies(df_v3['assignment_work_agency_cd'],prefix='assignment_work_agency_cd')], axis=1)
df_v3 = df_v3.drop('assignment_area_cd', axis=1)

In [0]:
df_v3.columns = df_v3.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

In [63]:
df_v3.head(10)

Unnamed: 0,birth_year,contract_days,contract_local_experience,contract_total_experience,assignment_fte,assignment_requires_dpi_license,salary,gender_f,gender_m,ethnicity_a,ethnicity_b,ethnicity_h,ethnicity_i,ethnicity_p,ethnicity_t,ethnicity_w,degree_3,degree_4,degree_5,degree_6,degree_7,degree_8,cesa_1,cesa_2,cesa_3,cesa_4,cesa_5,cesa_6,cesa_7,cesa_8,cesa_9,cesa_10,cesa_11,cesa_12,assignment_staff_category_cd_0,assignment_staff_category_cd_1,assignment_staff_category_cd_3,administrators,other,pupil_services,teachers,assignment_work_school_level_cd_3,assignment_work_school_level_cd_4,assignment_work_school_level_cd_5,assignment_work_school_level_cd_6,assignment_work_school_level_cd_7,assignment_position_cd_05,assignment_position_cd_06,assignment_position_cd_08,assignment_position_cd_09,assignment_position_cd_10,assignment_position_cd_17,assignment_position_cd_18,assignment_position_cd_19,assignment_position_cd_50,assignment_position_cd_51,assignment_position_cd_52,assignment_position_cd_53,assignment_position_cd_54,assignment_position_cd_55,assignment_position_cd_59,assignment_position_cd_62,assignment_position_cd_63,assignment_position_cd_64,assignment_position_cd_73,assignment_position_cd_75,assignment_position_cd_79,assignment_position_cd_80,assignment_position_cd_83,assignment_position_cd_84,assignment_position_cd_85,assignment_position_cd_86,assignment_position_cd_87,assignment_position_cd_88,assignment_position_cd_90,assignment_position_cd_91,assignment_position_cd_92,assignment_position_cd_93,assignment_position_cd_98,assignment_position_cd_99,assignment_work_agency_cd_0007,assignment_work_agency_cd_0014,assignment_work_agency_cd_0063,assignment_work_agency_cd_0070,assignment_work_agency_cd_0084,assignment_work_agency_cd_0091,assignment_work_agency_cd_0105,assignment_work_agency_cd_0112,assignment_work_agency_cd_0119,assignment_work_agency_cd_0126,assignment_work_agency_cd_0140,assignment_work_agency_cd_0147,assignment_work_agency_cd_0154,assignment_work_agency_cd_0161,assignment_work_agency_cd_0170,assignment_work_agency_cd_0182,assignment_work_agency_cd_0196,assignment_work_agency_cd_0203,assignment_work_agency_cd_0217,assignment_work_agency_cd_0231,assignment_work_agency_cd_0238,assignment_work_agency_cd_0245,assignment_work_agency_cd_0280,assignment_work_agency_cd_0287,assignment_work_agency_cd_0308,assignment_work_agency_cd_0315,assignment_work_agency_cd_0336,assignment_work_agency_cd_0350,assignment_work_agency_cd_0364,assignment_work_agency_cd_0413,assignment_work_agency_cd_0422,assignment_work_agency_cd_0427,assignment_work_agency_cd_0434,assignment_work_agency_cd_0441,assignment_work_agency_cd_0469,assignment_work_agency_cd_0476,assignment_work_agency_cd_0485,assignment_work_agency_cd_0490,assignment_work_agency_cd_0497,assignment_work_agency_cd_0602,assignment_work_agency_cd_0609,assignment_work_agency_cd_0616,assignment_work_agency_cd_0623,assignment_work_agency_cd_0637,assignment_work_agency_cd_0657,assignment_work_agency_cd_0658,assignment_work_agency_cd_0665,assignment_work_agency_cd_0700,assignment_work_agency_cd_0714,assignment_work_agency_cd_0721,assignment_work_agency_cd_0735,assignment_work_agency_cd_0777,assignment_work_agency_cd_0840,assignment_work_agency_cd_0870,assignment_work_agency_cd_0882,assignment_work_agency_cd_0896,assignment_work_agency_cd_0903,assignment_work_agency_cd_0910,assignment_work_agency_cd_0980,assignment_work_agency_cd_0994,assignment_work_agency_cd_1015,assignment_work_agency_cd_1029,assignment_work_agency_cd_1071,assignment_work_agency_cd_1080,assignment_work_agency_cd_1085,assignment_work_agency_cd_1092,assignment_work_agency_cd_1120,assignment_work_agency_cd_1127,assignment_work_agency_cd_1134,assignment_work_agency_cd_1141,assignment_work_agency_cd_1155,assignment_work_agency_cd_1162,assignment_work_agency_cd_1169,assignment_work_agency_cd_1176,assignment_work_agency_cd_1183,assignment_work_agency_cd_1204,assignment_work_agency_cd_1218,assignment_work_agency_cd_1232,assignment_work_agency_cd_1246,assignment_work_agency_cd_1253,assignment_work_agency_cd_1260,assignment_work_agency_cd_1295,assignment_work_agency_cd_1309,assignment_work_agency_cd_1316,assignment_work_agency_cd_1376,assignment_work_agency_cd_1380,assignment_work_agency_cd_1407,assignment_work_agency_cd_1414,assignment_work_agency_cd_1421,assignment_work_agency_cd_1428,assignment_work_agency_cd_1449,assignment_work_agency_cd_1491,assignment_work_agency_cd_1499,assignment_work_agency_cd_1526,assignment_work_agency_cd_1540,assignment_work_agency_cd_1554,assignment_work_agency_cd_1561,assignment_work_agency_cd_1568,assignment_work_agency_cd_1582,assignment_work_agency_cd_1600,assignment_work_agency_cd_1631,assignment_work_agency_cd_1638,assignment_work_agency_cd_1645,assignment_work_agency_cd_1659,assignment_work_agency_cd_1666,assignment_work_agency_cd_1673,assignment_work_agency_cd_1687,assignment_work_agency_cd_1694,assignment_work_agency_cd_1729,assignment_work_agency_cd_1736,assignment_work_agency_cd_1813,assignment_work_agency_cd_1848,assignment_work_agency_cd_1855,assignment_work_agency_cd_1862,assignment_work_agency_cd_1870,assignment_work_agency_cd_1883,assignment_work_agency_cd_1890,assignment_work_agency_cd_1897,assignment_work_agency_cd_1900,assignment_work_agency_cd_1939,assignment_work_agency_cd_1945,assignment_work_agency_cd_1953,assignment_work_agency_cd_2009,assignment_work_agency_cd_2016,assignment_work_agency_cd_2044,assignment_work_agency_cd_2051,assignment_work_agency_cd_2058,assignment_work_agency_cd_2114,assignment_work_agency_cd_2128,assignment_work_agency_cd_2135,assignment_work_agency_cd_2142,assignment_work_agency_cd_2177,assignment_work_agency_cd_2184,assignment_work_agency_cd_2198,assignment_work_agency_cd_2212,assignment_work_agency_cd_2217,assignment_work_agency_cd_2226,assignment_work_agency_cd_2233,assignment_work_agency_cd_2240,assignment_work_agency_cd_2289,assignment_work_agency_cd_2296,assignment_work_agency_cd_2303,assignment_work_agency_cd_2310,assignment_work_agency_cd_2394,assignment_work_agency_cd_2415,assignment_work_agency_cd_2420,assignment_work_agency_cd_2422,assignment_work_agency_cd_2436,assignment_work_agency_cd_2443,assignment_work_agency_cd_2450,assignment_work_agency_cd_2460,assignment_work_agency_cd_2478,assignment_work_agency_cd_2485,assignment_work_agency_cd_2525,assignment_work_agency_cd_2527,assignment_work_agency_cd_2534,assignment_work_agency_cd_2541,assignment_work_agency_cd_2562,assignment_work_agency_cd_2576,assignment_work_agency_cd_2583,assignment_work_agency_cd_2604,assignment_work_agency_cd_2605,assignment_work_agency_cd_2611,assignment_work_agency_cd_2618,assignment_work_agency_cd_2625,assignment_work_agency_cd_2632,assignment_work_agency_cd_2639,assignment_work_agency_cd_2646,assignment_work_agency_cd_2660,assignment_work_agency_cd_2695,assignment_work_agency_cd_2702,assignment_work_agency_cd_2730,assignment_work_agency_cd_2737,assignment_work_agency_cd_2744,assignment_work_agency_cd_2758,assignment_work_agency_cd_2793,assignment_work_agency_cd_2800,assignment_work_agency_cd_2814,assignment_work_agency_cd_2828,assignment_work_agency_cd_2835,assignment_work_agency_cd_2842,assignment_work_agency_cd_2849,assignment_work_agency_cd_2856,assignment_work_agency_cd_2863,assignment_work_agency_cd_2884,assignment_work_agency_cd_2885,assignment_work_agency_cd_2891,assignment_work_agency_cd_2898,assignment_work_agency_cd_2912,assignment_work_agency_cd_2940,assignment_work_agency_cd_2961,assignment_work_agency_cd_3087,assignment_work_agency_cd_3094,assignment_work_agency_cd_3122,assignment_work_agency_cd_3129,assignment_work_agency_cd_3150,assignment_work_agency_cd_3171,assignment_work_agency_cd_3206,assignment_work_agency_cd_3213,assignment_work_agency_cd_3220,assignment_work_agency_cd_3269,assignment_work_agency_cd_3276,assignment_work_agency_cd_3290,assignment_work_agency_cd_3297,assignment_work_agency_cd_3304,assignment_work_agency_cd_3311,assignment_work_agency_cd_3318,assignment_work_agency_cd_3325,assignment_work_agency_cd_3332,assignment_work_agency_cd_3339,assignment_work_agency_cd_3360,assignment_work_agency_cd_3367,assignment_work_agency_cd_3381,assignment_work_agency_cd_3409,assignment_work_agency_cd_3427,assignment_work_agency_cd_3428,assignment_work_agency_cd_3430,assignment_work_agency_cd_3434,assignment_work_agency_cd_3437,assignment_work_agency_cd_3444,assignment_work_agency_cd_3479,assignment_work_agency_cd_3484,assignment_work_agency_cd_3500,assignment_work_agency_cd_3510,assignment_work_agency_cd_3514,assignment_work_agency_cd_3528,assignment_work_agency_cd_3542,assignment_work_agency_cd_3549,assignment_work_agency_cd_3612,assignment_work_agency_cd_3619,assignment_work_agency_cd_3633,assignment_work_agency_cd_3640,assignment_work_agency_cd_3647,assignment_work_agency_cd_3654,assignment_work_agency_cd_3661,assignment_work_agency_cd_3668,assignment_work_agency_cd_3675,assignment_work_agency_cd_3682,assignment_work_agency_cd_3689,assignment_work_agency_cd_3696,assignment_work_agency_cd_3787,assignment_work_agency_cd_3794,assignment_work_agency_cd_3822,assignment_work_agency_cd_3850,assignment_work_agency_cd_3857,assignment_work_agency_cd_3862,assignment_work_agency_cd_3871,assignment_work_agency_cd_3892,assignment_work_agency_cd_3899,assignment_work_agency_cd_3906,assignment_work_agency_cd_3920,assignment_work_agency_cd_3925,assignment_work_agency_cd_3934,assignment_work_agency_cd_3941,assignment_work_agency_cd_3948,assignment_work_agency_cd_3955,assignment_work_agency_cd_3962,assignment_work_agency_cd_3969,assignment_work_agency_cd_3976,assignment_work_agency_cd_3983,assignment_work_agency_cd_3990,assignment_work_agency_cd_4011,assignment_work_agency_cd_4018,assignment_work_agency_cd_4025,assignment_work_agency_cd_4060,assignment_work_agency_cd_4067,assignment_work_agency_cd_4074,assignment_work_agency_cd_4088,assignment_work_agency_cd_4095,assignment_work_agency_cd_4137,assignment_work_agency_cd_4144,assignment_work_agency_cd_4151,assignment_work_agency_cd_4165,assignment_work_agency_cd_4179,assignment_work_agency_cd_4186,assignment_work_agency_cd_4207,assignment_work_agency_cd_4221,assignment_work_agency_cd_4228,assignment_work_agency_cd_4235,assignment_work_agency_cd_4263,assignment_work_agency_cd_4270,assignment_work_agency_cd_4305,assignment_work_agency_cd_4312,assignment_work_agency_cd_4330,assignment_work_agency_cd_4347,assignment_work_agency_cd_4368,assignment_work_agency_cd_4375,assignment_work_agency_cd_4389,assignment_work_agency_cd_4459,assignment_work_agency_cd_4473,assignment_work_agency_cd_4501,assignment_work_agency_cd_4508,assignment_work_agency_cd_4515,assignment_work_agency_cd_4522,assignment_work_agency_cd_4529,assignment_work_agency_cd_4536,assignment_work_agency_cd_4543,assignment_work_agency_cd_4557,assignment_work_agency_cd_4571,assignment_work_agency_cd_4578,assignment_work_agency_cd_4606,assignment_work_agency_cd_4613,assignment_work_agency_cd_4620,assignment_work_agency_cd_4627,assignment_work_agency_cd_4634,assignment_work_agency_cd_4641,assignment_work_agency_cd_4686,assignment_work_agency_cd_4690,assignment_work_agency_cd_4753,assignment_work_agency_cd_4760,assignment_work_agency_cd_4781,assignment_work_agency_cd_4795,assignment_work_agency_cd_4802,assignment_work_agency_cd_4820,assignment_work_agency_cd_4843,assignment_work_agency_cd_4851,assignment_work_agency_cd_4865,assignment_work_agency_cd_4872,assignment_work_agency_cd_4893,assignment_work_agency_cd_4904,assignment_work_agency_cd_4956,assignment_work_agency_cd_4963,assignment_work_agency_cd_4970,assignment_work_agency_cd_5019,assignment_work_agency_cd_5026,assignment_work_agency_cd_5054,assignment_work_agency_cd_5068,assignment_work_agency_cd_5100,assignment_work_agency_cd_5124,assignment_work_agency_cd_5130,assignment_work_agency_cd_5138,assignment_work_agency_cd_5258,assignment_work_agency_cd_5264,assignment_work_agency_cd_5271,assignment_work_agency_cd_5278,assignment_work_agency_cd_5306,assignment_work_agency_cd_5348,assignment_work_agency_cd_5355,assignment_work_agency_cd_5362,assignment_work_agency_cd_5369,assignment_work_agency_cd_5376,assignment_work_agency_cd_5390,assignment_work_agency_cd_5397,assignment_work_agency_cd_5432,assignment_work_agency_cd_5439,assignment_work_agency_cd_5457,assignment_work_agency_cd_5460,assignment_work_agency_cd_5467,assignment_work_agency_cd_5474,assignment_work_agency_cd_5523,assignment_work_agency_cd_5586,assignment_work_agency_cd_5593,assignment_work_agency_cd_5607,assignment_work_agency_cd_5614,assignment_work_agency_cd_5621,assignment_work_agency_cd_5628,assignment_work_agency_cd_5642,assignment_work_agency_cd_5656,assignment_work_agency_cd_5663,assignment_work_agency_cd_5670,assignment_work_agency_cd_5726,assignment_work_agency_cd_5733,assignment_work_agency_cd_5740,assignment_work_agency_cd_5747,assignment_work_agency_cd_5754,assignment_work_agency_cd_5757,assignment_work_agency_cd_5780,assignment_work_agency_cd_5810,assignment_work_agency_cd_5817,assignment_work_agency_cd_5824,assignment_work_agency_cd_5852,assignment_work_agency_cd_5859,assignment_work_agency_cd_5866,assignment_work_agency_cd_5901,assignment_work_agency_cd_5960,assignment_work_agency_cd_5985,assignment_work_agency_cd_5992,assignment_work_agency_cd_6013,assignment_work_agency_cd_6022,assignment_work_agency_cd_6027,assignment_work_agency_cd_6069,assignment_work_agency_cd_6083,assignment_work_agency_cd_6104,assignment_work_agency_cd_6113,assignment_work_agency_cd_6118,assignment_work_agency_cd_6125,assignment_work_agency_cd_6174,assignment_work_agency_cd_6181,assignment_work_agency_cd_6195,assignment_work_agency_cd_6216,assignment_work_agency_cd_6223,assignment_work_agency_cd_6230,assignment_work_agency_cd_6237,assignment_work_agency_cd_6244,assignment_work_agency_cd_6251,assignment_work_agency_cd_6293,assignment_work_agency_cd_6300,assignment_work_agency_cd_6307,assignment_work_agency_cd_6321,assignment_work_agency_cd_6328,assignment_work_agency_cd_6335,assignment_work_agency_cd_6354,assignment_work_agency_cd_6370,assignment_work_agency_cd_6384,assignment_work_agency_cd_6412,assignment_work_agency_cd_6419,assignment_work_agency_cd_6426,assignment_work_agency_cd_6440,assignment_work_agency_cd_6461,assignment_work_agency_cd_6470,assignment_work_agency_cd_6475,assignment_work_agency_cd_6482,assignment_work_agency_cd_6545,assignment_work_agency_cd_6608,assignment_work_agency_cd_6615,assignment_work_agency_cd_6678,assignment_work_agency_cd_6685,assignment_work_agency_cd_6692,assignment_work_agency_cd_6713,assignment_work_agency_cd_6720,assignment_work_agency_cd_6734,assignment_work_agency_cd_6748,assignment_work_agency_cd_6905,assignment_work_agency_cd_6937,assignment_work_agency_cd_6964,assignment_work_agency_cd_7301,assignment_work_agency_cd_7302,assignment_work_agency_cd_8001,assignment_work_agency_cd_8101,assignment_work_agency_cd_8105,assignment_work_agency_cd_8106,assignment_work_agency_cd_8107,assignment_work_agency_cd_8109,assignment_work_agency_cd_8110,assignment_work_agency_cd_8113,assignment_work_agency_cd_8114,assignment_work_agency_cd_8123,assignment_work_agency_cd_8127,assignment_work_agency_cd_8128,assignment_work_agency_cd_8129,assignment_work_agency_cd_8131,assignment_work_agency_cd_8132,assignment_work_agency_cd_8133,assignment_work_agency_cd_8135,assignment_work_agency_cd_8136,assignment_work_agency_cd_8137,assignment_work_agency_cd_8138,assignment_work_agency_cd_8139,assignment_work_agency_cd_9901,assignment_work_agency_cd_9902,assignment_work_agency_cd_9903,assignment_work_agency_cd_9904,assignment_work_agency_cd_9905,assignment_work_agency_cd_9906,assignment_work_agency_cd_9907,assignment_work_agency_cd_9908,assignment_work_agency_cd_9909,assignment_work_agency_cd_9910,assignment_work_agency_cd_9911,assignment_work_agency_cd_9912
0,1959,260,2.0,36.0,1.0,1,108840.0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1972,260,1.0,1.0,1.0,1,118000.0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1971,260,2.0,20.0,1.0,1,92000.0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1972,240,8.0,21.0,1.0,1,109629.0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1953,260,6.0,6.0,1.0,1,136331.0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,1963,261,3.0,31.0,1.0,1,145550.0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,1967,261,1.0,21.0,1.0,1,150000.0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,1957,261,23.0,40.0,1.0,1,185000.0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,1960,260,19.0,34.0,1.0,1,124258.0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,1965,260,8.0,8.0,1.0,1,145030.0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [0]:
target_v3 = df_v3['salary']
data_v3 = df_v3.drop('salary', axis=1)

In [65]:
X_train, X_test, y_train, y_test = train_test_split(data_v3, target_v3, test_size=0.2, random_state=0)
model_v3 = LinearRegression()
model_v3.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [66]:
model_v3.score(X_test, y_test)

0.8549880829480746

### Test a new Point

In [0]:
test_point = [[1969,15.0,0,1]]

In [74]:
model_v1.predict(test_point)

array([64828.08996283])

### NEXT FEW CELLS SHOULD PROBABLY GO INTO 3.0 Refined Notebook

### New Dataset - Teacher Master Data

|Query Grp.|Role Count | Contract Count | Record Count | Educator Count|
|---------|------|----|------------------------|--------------------------|
|A| 1 | 1 | 114,00|114,000|
|B| Mult. | 1 | 1 | 1 |
|C|Mult. | Mult. |2|2|
|Z|||187,344|143,493|

Hypothesis: Contract Days are tied to a Salary; Therefore if you group by contract days you should not have multiple salaries for contract days for a person

In [0]:
select_grp_A = '''
SELECT
  count(*)
FROM
  (SELECT
    research_id,
    first_name,
    last_name,
    count(*)
  FROM
    `wi-dpi-010.refined.2017_all_staff_report`
  GROUP BY
    1,2,3
  HAVING count(*) = 1) A

'''

In [0]:
select_grp_B_rec_cnt = '''
'''

In [0]:
select_grp_Z_educ_cnt = '''
SELECT
  count(distinct research_id)
FROM
  `wi-dpi-010.refined.2017_all_staff_report`
'''

In [0]:
# Every role for the educator has a salary and contract_days
#    This applies even if they have multiple roles
select_salary_educators = '''
'''

In [0]:
# MIXED Educators who have at least one salaried role and 
#    at least one 'Non-Salaried' role
select_mixed_educ_cnt ='''
SELECT
  count(distinct A.research_id)
FROM
  `wi-dpi-010.refined.2017_all_staff_report` A
  INNER JOIN `wi-dpi-010.refined.2017_all_staff_report` B ON
  A.research_id = B.research_id
WHERE 
  A.salary is not null AND
  B.salary is null
'''

In [0]:
# MIXED Educators who have at least one salaried role and 
#    at least one 'Non-Salaried' role
#    This count is essentially using the previous query as subselect
#    then selecting all associated records
select_mixed_rec_cnt='''
SELECT
  count(*)
FROM
  (SELECT
    distinct A.research_id
  FROM
    `wi-dpi-010.refined.2017_all_staff_report` A
    INNER JOIN `wi-dpi-010.refined.2017_all_staff_report` B ON
    A.research_id = B.research_id
  WHERE 
    A.salary is not null AND
    B.salary is null) C 
  INNER JOIN `wi-dpi-010.refined.2017_all_staff_report` D ON
  C.research_id = D.research_id
'''

### Next Data Set

In [0]:
select_teacher_master_data = '''
SELECT
  research_id,
  year_session,
  first_name,
  last_name,
  gender,
  race_ethnicity_cd,
  birth_year,
  contract_days,
  
  contract_high_degree_cd,
  contract_days,
  contract_local_experience,
  contract_total_experience,
  
  cesa_num,
  assignment_fte,
  assignment_requires_dpi_license,
  assignment_staff_category_cd,
  position_classification,
  assignment_position_cd,
  assignment_area_cd,
  assignment_work_school_level_cd,
  assignment_work_agency_cd,
  salary
FROM
  {}.{}
WHERE
  assignment_fte is not null and salary is not null
'''.format(refined_dataset_name, '2017_all_staff_report')

In [0]:
df = pd.read_gbq(select_all_salaries, project_id=project_id)

  """Entry point for launching an IPython kernel.


In [0]:
assignment_std = df['assignment_fte'].std()
assignment_mean = df['assignment_fte'].mean()

In [0]:
df['assign_fte_1_std'] = 

### Host a model

Evaluation: https://www.ritchieng.com/machine-learning-evaluate-linear-regression-model/

ML Engine - https://cloud.google.com/ml-engine/docs/scikit/using-pipelines

### Extra Queries

In [0]:
Query_for_Sum_of_FTE='''
SELECT
  A.assgn_fte,
  count(a.research_id)
FROM
  (SELECT
    research_id,
    count(*) as person_assgn_cnt,
    round(sum(assignment_fte),2) as assgn_fte
  FROM
    refined.2017_all_staff_report
  WHERE
    salary is not null
  GROUP BY
    research_id) A
GROUP BY a.assgn_fte
ORDER BY 2 DESC'''

In [0]:
'''
SELECT
  research_id,
  year_session,
  first_name,
  last_name,
  gender,
  race_ethnicity_cd,
  birth_year,
  contract_days,
  contract_local_experience, 
  contract_total_experience,
  salary,
  cesa_num,
  assignment_fte,
  assignment_requires_dpi_license,
  assignment_staff_category_cd,
  position_classification,
  assignment_position_cd,
  assignment_area_cd,
  assignment_work_school_level_cd
  assignment_work_agency_cd
FROM
  `wi-dpi-010.refined.2017_all_staff_report`
WHERE
  salary is not null and assignment_fte is not null
ORDER BY
  research_id,
  assignment_fte desc'''