In [None]:
import pandas as pd
import numpy as np
import datetime as dt
from itertools import product

import findspark
findspark.init()
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.ml.feature import VectorAssembler, StringIndexer, StandardScaler
from pyspark.ml.clustering import *
from pyspark.ml.evaluation import *
from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
from pyspark.ml.stat import Correlation
from pyspark.ml.functions import vector_to_array

# iplot won't work because I've not installed the extension
import chart_studio.plotly as ply
import plotly.offline as plyoff
import plotly.graph_objects as go
import plotly.subplots as plysub

plyoff.init_notebook_mode(connected=True)
init = go.Figure(data=[go.Scatter(x=[1,2], y=[42,42])], layout=go.Layout(title='Init'))
plyoff.iplot(init)

pd.set_option('display.max_columns', None)

In [None]:
# initialize
sc = pyspark.SparkContext()
spark = SparkSession(sc)
spark.sparkContext.appName = 'cluster'
# show the number of cores
print('%d cores'%spark._jsc.sc().getExecutorMemoryStatus().keySet().size())
spark

In [None]:
''' get the data '''
# load the data
fil = '../../data/CollegeScorecard.csv'
schem = StructType([StructField('UNITID', IntegerType()), StructField('OPEID', IntegerType()), StructField('opeid6', IntegerType()), StructField('INSTNM', StringType()), StructField('CITY', StringType()), StructField('STABBR', StringType()), StructField('ZIP', IntegerType()), StructField('AccredAgency', StringType()), StructField('INSTURL', StringType()), StructField('NPCURL', StringType()), StructField('sch_deg', IntegerType()), StructField('main', IntegerType()), StructField('NUMBRANCH', IntegerType()), StructField('PREDDEG', IntegerType()), StructField('HIGHDEG', IntegerType()), StructField('CONTROL', IntegerType()), StructField('st_fips', IntegerType()), StructField('region', IntegerType()), StructField('LOCALE', IntegerType()), StructField('locale2', IntegerType()), StructField('LATITUDE', FloatType()), StructField('LONGITUDE', FloatType()), StructField('CCBASIC', IntegerType()), StructField('CCUGPROF', IntegerType()), StructField('CCSIZSET', IntegerType()), StructField('HBCU', IntegerType()), StructField('PBI', IntegerType()), StructField('ANNHI', IntegerType()), StructField('TRIBAL', IntegerType()), StructField('AANAPII', IntegerType()), StructField('HSI', IntegerType()), StructField('NANTI', IntegerType()), StructField('MENONLY', IntegerType()), StructField('WOMENONLY', IntegerType()), StructField('RELAFFIL', IntegerType()), StructField('ADM_RATE', FloatType()), StructField('ADM_RATE_ALL', FloatType()), StructField('SATVR25', FloatType()), StructField('SATVR75', FloatType()), StructField('SATMT25', FloatType()), StructField('SATMT75', FloatType()), StructField('SATWR25', FloatType()), StructField('SATWR75', FloatType()), StructField('SATVRMID', FloatType()), StructField('SATMTMID', FloatType()), StructField('SATWRMID', FloatType()), StructField('ACTCM25', FloatType()), StructField('ACTCM75', FloatType()), StructField('ACTEN25', FloatType()), StructField('ACTEN75', FloatType()), StructField('ACTMT25', FloatType()), StructField('ACTMT75', FloatType()), StructField('ACTWR25', FloatType()), StructField('ACTWR75', FloatType()), StructField('ACTCMMID', FloatType()), StructField('ACTENMID', FloatType()), StructField('ACTMTMID', FloatType()), StructField('ACTWRMID', FloatType()), StructField('SAT_AVG', FloatType()), StructField('SAT_AVG_ALL', FloatType()), StructField('PCIP01', FloatType()), StructField('PCIP03', FloatType()), StructField('PCIP04', FloatType()), StructField('PCIP05', FloatType()), StructField('PCIP09', FloatType()), StructField('PCIP10', FloatType()), StructField('PCIP11', FloatType()), StructField('PCIP12', FloatType()), StructField('PCIP13', FloatType()), StructField('PCIP14', FloatType()), StructField('PCIP15', FloatType()), StructField('PCIP16', FloatType()), StructField('PCIP19', FloatType()), StructField('PCIP22', FloatType()), StructField('PCIP23', FloatType()), StructField('PCIP24', FloatType()), StructField('PCIP25', FloatType()), StructField('PCIP26', FloatType()), StructField('PCIP27', FloatType()), StructField('PCIP29', FloatType()), StructField('PCIP30', FloatType()), StructField('PCIP31', FloatType()), StructField('PCIP38', FloatType()), StructField('PCIP39', FloatType()), StructField('PCIP40', FloatType()), StructField('PCIP41', FloatType()), StructField('PCIP42', FloatType()), StructField('PCIP43', FloatType()), StructField('PCIP44', FloatType()), StructField('PCIP45', FloatType()), StructField('PCIP46', FloatType()), StructField('PCIP47', FloatType()), StructField('PCIP48', FloatType()), StructField('PCIP49', FloatType()), StructField('PCIP50', FloatType()), StructField('PCIP51', FloatType()), StructField('PCIP52', FloatType()), StructField('PCIP54', FloatType()), StructField('CIP01CERT1', IntegerType()), StructField('CIP01CERT2', IntegerType()), StructField('CIP01ASSOC', IntegerType()), StructField('CIP01CERT4', IntegerType()), StructField('CIP01BACHL', IntegerType()), StructField('CIP03CERT1', IntegerType()), StructField('CIP03CERT2', IntegerType()), StructField('CIP03ASSOC', IntegerType()), StructField('CIP03CERT4', IntegerType()), StructField('CIP03BACHL', IntegerType()), StructField('CIP04CERT1', IntegerType()), StructField('CIP04CERT2', IntegerType()), StructField('CIP04ASSOC', IntegerType()), StructField('CIP04CERT4', IntegerType()), StructField('CIP04BACHL', IntegerType()), StructField('CIP05CERT1', IntegerType()), StructField('CIP05CERT2', IntegerType()), StructField('CIP05ASSOC', IntegerType()), StructField('CIP05CERT4', IntegerType()), StructField('CIP05BACHL', IntegerType()), StructField('CIP09CERT1', IntegerType()), StructField('CIP09CERT2', IntegerType()), StructField('CIP09ASSOC', IntegerType()), StructField('CIP09CERT4', IntegerType()), StructField('CIP09BACHL', IntegerType()), StructField('CIP10CERT1', IntegerType()), StructField('CIP10CERT2', IntegerType()), StructField('CIP10ASSOC', IntegerType()), StructField('CIP10CERT4', IntegerType()), StructField('CIP10BACHL', IntegerType()), StructField('CIP11CERT1', IntegerType()), StructField('CIP11CERT2', IntegerType()), StructField('CIP11ASSOC', IntegerType()), StructField('CIP11CERT4', IntegerType()), StructField('CIP11BACHL', IntegerType()), StructField('CIP12CERT1', IntegerType()), StructField('CIP12CERT2', IntegerType()), StructField('CIP12ASSOC', IntegerType()), StructField('CIP12CERT4', IntegerType()), StructField('CIP12BACHL', IntegerType()), StructField('CIP13CERT1', IntegerType()), StructField('CIP13CERT2', IntegerType()), StructField('CIP13ASSOC', IntegerType()), StructField('CIP13CERT4', IntegerType()), StructField('CIP13BACHL', IntegerType()), StructField('CIP14CERT1', IntegerType()), StructField('CIP14CERT2', IntegerType()), StructField('CIP14ASSOC', IntegerType()), StructField('CIP14CERT4', IntegerType()), StructField('CIP14BACHL', IntegerType()), StructField('CIP15CERT1', IntegerType()), StructField('CIP15CERT2', IntegerType()), StructField('CIP15ASSOC', IntegerType()), StructField('CIP15CERT4', IntegerType()), StructField('CIP15BACHL', IntegerType()), StructField('CIP16CERT1', IntegerType()), StructField('CIP16CERT2', IntegerType()), StructField('CIP16ASSOC', IntegerType()), StructField('CIP16CERT4', IntegerType()), StructField('CIP16BACHL', IntegerType()), StructField('CIP19CERT1', IntegerType()), StructField('CIP19CERT2', IntegerType()), StructField('CIP19ASSOC', IntegerType()), StructField('CIP19CERT4', IntegerType()), StructField('CIP19BACHL', IntegerType()), StructField('CIP22CERT1', IntegerType()), StructField('CIP22CERT2', IntegerType()), StructField('CIP22ASSOC', IntegerType()), StructField('CIP22CERT4', IntegerType()), StructField('CIP22BACHL', IntegerType()), StructField('CIP23CERT1', IntegerType()), StructField('CIP23CERT2', IntegerType()), StructField('CIP23ASSOC', IntegerType()), StructField('CIP23CERT4', IntegerType()), StructField('CIP23BACHL', IntegerType()), StructField('CIP24CERT1', IntegerType()), StructField('CIP24CERT2', IntegerType()), StructField('CIP24ASSOC', IntegerType()), StructField('CIP24CERT4', IntegerType()), StructField('CIP24BACHL', IntegerType()), StructField('CIP25CERT1', IntegerType()), StructField('CIP25CERT2', IntegerType()), StructField('CIP25ASSOC', IntegerType()), StructField('CIP25CERT4', IntegerType()), StructField('CIP25BACHL', IntegerType()), StructField('CIP26CERT1', IntegerType()), StructField('CIP26CERT2', IntegerType()), StructField('CIP26ASSOC', IntegerType()), StructField('CIP26CERT4', IntegerType()), StructField('CIP26BACHL', IntegerType()), StructField('CIP27CERT1', IntegerType()), StructField('CIP27CERT2', IntegerType()), StructField('CIP27ASSOC', IntegerType()), StructField('CIP27CERT4', IntegerType()), StructField('CIP27BACHL', IntegerType()), StructField('CIP29CERT1', IntegerType()), StructField('CIP29CERT2', IntegerType()), StructField('CIP29ASSOC', IntegerType()), StructField('CIP29CERT4', IntegerType()), StructField('CIP29BACHL', IntegerType()), StructField('CIP30CERT1', IntegerType()), StructField('CIP30CERT2', IntegerType()), StructField('CIP30ASSOC', IntegerType()), StructField('CIP30CERT4', IntegerType()), StructField('CIP30BACHL', IntegerType()), StructField('CIP31CERT1', IntegerType()), StructField('CIP31CERT2', IntegerType()), StructField('CIP31ASSOC', IntegerType()), StructField('CIP31CERT4', IntegerType()), StructField('CIP31BACHL', IntegerType()), StructField('CIP38CERT1', IntegerType()), StructField('CIP38CERT2', IntegerType()), StructField('CIP38ASSOC', IntegerType()), StructField('CIP38CERT4', IntegerType()), StructField('CIP38BACHL', IntegerType()), StructField('CIP39CERT1', IntegerType()), StructField('CIP39CERT2', IntegerType()), StructField('CIP39ASSOC', IntegerType()), StructField('CIP39CERT4', IntegerType()), StructField('CIP39BACHL', IntegerType()), StructField('CIP40CERT1', IntegerType()), StructField('CIP40CERT2', IntegerType()), StructField('CIP40ASSOC', IntegerType()), StructField('CIP40CERT4', IntegerType()), StructField('CIP40BACHL', IntegerType()), StructField('CIP41CERT1', IntegerType()), StructField('CIP41CERT2', IntegerType()), StructField('CIP41ASSOC', IntegerType()), StructField('CIP41CERT4', IntegerType()), StructField('CIP41BACHL', IntegerType()), StructField('CIP42CERT1', IntegerType()), StructField('CIP42CERT2', IntegerType()), StructField('CIP42ASSOC', IntegerType()), StructField('CIP42CERT4', IntegerType()), StructField('CIP42BACHL', IntegerType()), StructField('CIP43CERT1', IntegerType()), StructField('CIP43CERT2', IntegerType()), StructField('CIP43ASSOC', IntegerType()), StructField('CIP43CERT4', IntegerType()), StructField('CIP43BACHL', IntegerType()), StructField('CIP44CERT1', IntegerType()), StructField('CIP44CERT2', IntegerType()), StructField('CIP44ASSOC', IntegerType()), StructField('CIP44CERT4', IntegerType()), StructField('CIP44BACHL', IntegerType()), StructField('CIP45CERT1', IntegerType()), StructField('CIP45CERT2', IntegerType()), StructField('CIP45ASSOC', IntegerType()), StructField('CIP45CERT4', IntegerType()), StructField('CIP45BACHL', IntegerType()), StructField('CIP46CERT1', IntegerType()), StructField('CIP46CERT2', IntegerType()), StructField('CIP46ASSOC', IntegerType()), StructField('CIP46CERT4', IntegerType()), StructField('CIP46BACHL', IntegerType()), StructField('CIP47CERT1', IntegerType()), StructField('CIP47CERT2', IntegerType()), StructField('CIP47ASSOC', IntegerType()), StructField('CIP47CERT4', IntegerType()), StructField('CIP47BACHL', IntegerType()), StructField('CIP48CERT1', IntegerType()), StructField('CIP48CERT2', IntegerType()), StructField('CIP48ASSOC', IntegerType()), StructField('CIP48CERT4', IntegerType()), StructField('CIP48BACHL', IntegerType()), StructField('CIP49CERT1', IntegerType()), StructField('CIP49CERT2', IntegerType()), StructField('CIP49ASSOC', IntegerType()), StructField('CIP49CERT4', IntegerType()), StructField('CIP49BACHL', IntegerType()), StructField('CIP50CERT1', IntegerType()), StructField('CIP50CERT2', IntegerType()), StructField('CIP50ASSOC', IntegerType()), StructField('CIP50CERT4', IntegerType()), StructField('CIP50BACHL', IntegerType()), StructField('CIP51CERT1', IntegerType()), StructField('CIP51CERT2', IntegerType()), StructField('CIP51ASSOC', IntegerType()), StructField('CIP51CERT4', IntegerType()), StructField('CIP51BACHL', IntegerType()), StructField('CIP52CERT1', IntegerType()), StructField('CIP52CERT2', IntegerType()), StructField('CIP52ASSOC', IntegerType()), StructField('CIP52CERT4', IntegerType()), StructField('CIP52BACHL', IntegerType()), StructField('CIP54CERT1', IntegerType()), StructField('CIP54CERT2', IntegerType()), StructField('CIP54ASSOC', IntegerType()), StructField('CIP54CERT4', IntegerType()), StructField('CIP54BACHL', IntegerType()), StructField('DISTANCEONLY', IntegerType()), StructField('UGDS', IntegerType()), StructField('UG', IntegerType()), StructField('UGDS_WHITE', FloatType()), StructField('UGDS_BLACK', FloatType()), StructField('UGDS_HISP', FloatType()), StructField('UGDS_ASIAN', FloatType()), StructField('UGDS_AIAN', FloatType()), StructField('UGDS_NHPI', FloatType()), StructField('UGDS_2MOR', FloatType()), StructField('UGDS_NRA', FloatType()), StructField('UGDS_UNKN', FloatType()), StructField('UGDS_WHITENH', FloatType()), StructField('UGDS_BLACKNH', FloatType()), StructField('UGDS_API', FloatType()), StructField('UGDS_AIANOld', FloatType()), StructField('UGDS_HISPOld', FloatType()), StructField('UG_NRA', FloatType()), StructField('UG_UNKN', FloatType()), StructField('UG_WHITENH', FloatType()), StructField('UG_BLACKNH', FloatType()), StructField('UG_API', FloatType()), StructField('UG_AIANOld', FloatType()), StructField('UG_HISPOld', FloatType()), StructField('PPTUG_EF', FloatType()), StructField('PPTUG_EF2', FloatType()), StructField('CURROPER', IntegerType()), StructField('NPT4_PUB', IntegerType()), StructField('NPT4_PRIV', IntegerType()), StructField('NPT4_PROG', IntegerType()), StructField('NPT4_OTHER', IntegerType()), StructField('NPT41_PUB', IntegerType()), StructField('NPT42_PUB', IntegerType()), StructField('NPT43_PUB', IntegerType()), StructField('NPT44_PUB', IntegerType()), StructField('NPT45_PUB', IntegerType()), StructField('NPT41_PRIV', IntegerType()), StructField('NPT42_PRIV', IntegerType()), StructField('NPT43_PRIV', IntegerType()), StructField('NPT44_PRIV', IntegerType()), StructField('NPT45_PRIV', IntegerType()), StructField('NPT41_PROG', IntegerType()), StructField('NPT42_PROG', IntegerType()), StructField('NPT43_PROG', IntegerType()), StructField('NPT44_PROG', IntegerType()), StructField('NPT45_PROG', IntegerType()), StructField('NPT41_OTHER', IntegerType()), StructField('NPT42_OTHER', IntegerType()), StructField('NPT43_OTHER', IntegerType()), StructField('NPT44_OTHER', IntegerType()), StructField('NPT45_OTHER', IntegerType()), StructField('NPT4_048_PUB', IntegerType()), StructField('NPT4_048_PRIV', IntegerType()), StructField('NPT4_048_PROG', IntegerType()), StructField('NPT4_048_OTHER', IntegerType()), StructField('NPT4_3075_PUB', IntegerType()), StructField('NPT4_3075_PRIV', IntegerType()), StructField('NPT4_75UP_PUB', IntegerType()), StructField('NPT4_75UP_PRIV', IntegerType()), StructField('NPT4_3075_PROG', IntegerType()), StructField('NPT4_3075_OTHER', IntegerType()), StructField('NPT4_75UP_PROG', IntegerType()), StructField('NPT4_75UP_OTHER', IntegerType()), StructField('NUM4_PUB', IntegerType()), StructField('NUM4_PRIV', IntegerType()), StructField('NUM4_PROG', IntegerType()), StructField('NUM4_OTHER', IntegerType()), StructField('NUM41_PUB', IntegerType()), StructField('NUM42_PUB', IntegerType()), StructField('NUM43_PUB', IntegerType()), StructField('NUM44_PUB', IntegerType()), StructField('NUM45_PUB', IntegerType()), StructField('NUM41_PRIV', IntegerType()), StructField('NUM42_PRIV', IntegerType()), StructField('NUM43_PRIV', IntegerType()), StructField('NUM44_PRIV', IntegerType()), StructField('NUM45_PRIV', IntegerType()), StructField('NUM41_PROG', IntegerType()), StructField('NUM42_PROG', IntegerType()), StructField('NUM43_PROG', IntegerType()), StructField('NUM44_PROG', IntegerType()), StructField('NUM45_PROG', IntegerType()), StructField('NUM41_OTHER', IntegerType()), StructField('NUM42_OTHER', IntegerType()), StructField('NUM43_OTHER', IntegerType()), StructField('NUM44_OTHER', IntegerType()), StructField('NUM45_OTHER', IntegerType()), StructField('COSTT4_A', IntegerType()), StructField('COSTT4_P', IntegerType()), StructField('TUITIONFEE_IN', IntegerType()), StructField('TUITIONFEE_OUT', IntegerType()), StructField('TUITIONFEE_PROG', IntegerType()), StructField('TUITFTE', IntegerType()), StructField('INEXPFTE', IntegerType()), StructField('AVGFACSAL', IntegerType()), StructField('PFTFAC', FloatType()), StructField('PCTPELL', FloatType()), StructField('C150_4', FloatType()), StructField('C150_L4', FloatType()), StructField('C150_4_POOLED', FloatType()), StructField('C150_L4_POOLED', FloatType()), StructField('poolyrs', IntegerType()), StructField('PFTFTUG1_EF', FloatType()), StructField('D150_4', IntegerType()), StructField('D150_L4', IntegerType()), StructField('D150_4_POOLED', IntegerType()), StructField('D150_L4_POOLED', IntegerType()), StructField('C150_4_WHITE', FloatType()), StructField('C150_4_BLACK', FloatType()), StructField('C150_4_HISP', FloatType()), StructField('C150_4_ASIAN', FloatType()), StructField('C150_4_AIAN', FloatType()), StructField('C150_4_NHPI', FloatType()), StructField('C150_4_2MOR', FloatType()), StructField('C150_4_NRA', FloatType()), StructField('C150_4_UNKN', FloatType()), StructField('C150_4_WHITENH', FloatType()), StructField('C150_4_BLACKNH', FloatType()), StructField('C150_4_API', FloatType()), StructField('C150_4_AIANOld', FloatType()), StructField('C150_4_HISPOld', FloatType()), StructField('C150_L4_WHITE', FloatType()), StructField('C150_L4_BLACK', FloatType()), StructField('C150_L4_HISP', FloatType()), StructField('C150_L4_ASIAN', FloatType()), StructField('C150_L4_AIAN', FloatType()), StructField('C150_L4_NHPI', FloatType()), StructField('C150_L4_2MOR', FloatType()), StructField('C150_L4_NRA', FloatType()), StructField('C150_L4_UNKN', FloatType()), StructField('C150_L4_WHITENH', FloatType()), StructField('C150_L4_BLACKNH', FloatType()), StructField('C150_L4_API', FloatType()), StructField('C150_L4_AIANOld', FloatType()), StructField('C150_L4_HISPOld', FloatType()), StructField('RET_FT4', FloatType()), StructField('RET_FTL4', FloatType()), StructField('RET_PT4', FloatType()), StructField('RET_PTL4', FloatType()), StructField('PCTFLOAN', FloatType()), StructField('UG25abv', FloatType()), StructField('CDR2', FloatType()), StructField('CDR3', FloatType()), StructField('DEATH_YR2_RT', FloatType()), StructField('COMP_ORIG_YR2_RT', FloatType()), StructField('COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('WDRAW_ORIG_YR2_RT', FloatType()), StructField('WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('ENRL_ORIG_YR2_RT', FloatType()), StructField('ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('UNKN_ORIG_YR2_RT', FloatType()), StructField('UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_DEATH_YR2_RT', FloatType()), StructField('LO_INC_COMP_ORIG_YR2_RT', FloatType()), StructField('LO_INC_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_WDRAW_ORIG_YR2_RT', FloatType()), StructField('LO_INC_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_ENRL_ORIG_YR2_RT', FloatType()), StructField('LO_INC_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_UNKN_ORIG_YR2_RT', FloatType()), StructField('LO_INC_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('LO_INC_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_DEATH_YR2_RT', FloatType()), StructField('MD_INC_COMP_ORIG_YR2_RT', FloatType()), StructField('MD_INC_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_WDRAW_ORIG_YR2_RT', FloatType()), StructField('MD_INC_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_ENRL_ORIG_YR2_RT', FloatType()), StructField('MD_INC_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_UNKN_ORIG_YR2_RT', FloatType()), StructField('MD_INC_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('MD_INC_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_DEATH_YR2_RT', FloatType()), StructField('HI_INC_COMP_ORIG_YR2_RT', FloatType()), StructField('HI_INC_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_WDRAW_ORIG_YR2_RT', FloatType()), StructField('HI_INC_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_ENRL_ORIG_YR2_RT', FloatType()), StructField('HI_INC_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_UNKN_ORIG_YR2_RT', FloatType()), StructField('HI_INC_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('HI_INC_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('DEP_DEATH_YR2_RT', FloatType()), StructField('DEP_COMP_ORIG_YR2_RT', FloatType()), StructField('DEP_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('DEP_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('DEP_WDRAW_ORIG_YR2_RT', FloatType()), StructField('DEP_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('DEP_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('DEP_ENRL_ORIG_YR2_RT', FloatType()), StructField('DEP_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('DEP_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('DEP_UNKN_ORIG_YR2_RT', FloatType()), StructField('DEP_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('DEP_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('IND_DEATH_YR2_RT', FloatType()), StructField('IND_COMP_ORIG_YR2_RT', FloatType()), StructField('IND_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('IND_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('IND_WDRAW_ORIG_YR2_RT', FloatType()), StructField('IND_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('IND_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('IND_ENRL_ORIG_YR2_RT', FloatType()), StructField('IND_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('IND_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('IND_UNKN_ORIG_YR2_RT', FloatType()), StructField('IND_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('IND_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_DEATH_YR2_RT', FloatType()), StructField('FEMALE_COMP_ORIG_YR2_RT', FloatType()), StructField('FEMALE_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_WDRAW_ORIG_YR2_RT', FloatType()), StructField('FEMALE_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_ENRL_ORIG_YR2_RT', FloatType()), StructField('FEMALE_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_UNKN_ORIG_YR2_RT', FloatType()), StructField('FEMALE_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('FEMALE_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('MALE_DEATH_YR2_RT', FloatType()), StructField('MALE_COMP_ORIG_YR2_RT', FloatType()), StructField('MALE_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('MALE_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('MALE_WDRAW_ORIG_YR2_RT', FloatType()), StructField('MALE_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('MALE_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('MALE_ENRL_ORIG_YR2_RT', FloatType()), StructField('MALE_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('MALE_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('MALE_UNKN_ORIG_YR2_RT', FloatType()), StructField('MALE_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('MALE_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('PELL_DEATH_YR2_RT', FloatType()), StructField('PELL_COMP_ORIG_YR2_RT', FloatType()), StructField('PELL_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('PELL_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('PELL_WDRAW_ORIG_YR2_RT', FloatType()), StructField('PELL_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('PELL_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('PELL_ENRL_ORIG_YR2_RT', FloatType()), StructField('PELL_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('PELL_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('PELL_UNKN_ORIG_YR2_RT', FloatType()), StructField('PELL_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('PELL_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_DEATH_YR2_RT', FloatType()), StructField('NOPELL_COMP_ORIG_YR2_RT', FloatType()), StructField('NOPELL_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_WDRAW_ORIG_YR2_RT', FloatType()), StructField('NOPELL_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_ENRL_ORIG_YR2_RT', FloatType()), StructField('NOPELL_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_UNKN_ORIG_YR2_RT', FloatType()), StructField('NOPELL_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('NOPELL_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_DEATH_YR2_RT', FloatType()), StructField('LOAN_COMP_ORIG_YR2_RT', FloatType()), StructField('LOAN_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_WDRAW_ORIG_YR2_RT', FloatType()), StructField('LOAN_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_ENRL_ORIG_YR2_RT', FloatType()), StructField('LOAN_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_UNKN_ORIG_YR2_RT', FloatType()), StructField('LOAN_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('LOAN_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_DEATH_YR2_RT', FloatType()), StructField('NOLOAN_COMP_ORIG_YR2_RT', FloatType()), StructField('NOLOAN_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_WDRAW_ORIG_YR2_RT', FloatType()), StructField('NOLOAN_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_ENRL_ORIG_YR2_RT', FloatType()), StructField('NOLOAN_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_UNKN_ORIG_YR2_RT', FloatType()), StructField('NOLOAN_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('NOLOAN_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_DEATH_YR2_RT', FloatType()), StructField('FIRSTGEN_COMP_ORIG_YR2_RT', FloatType()), StructField('FIRSTGEN_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_WDRAW_ORIG_YR2_RT', FloatType()), StructField('FIRSTGEN_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_ENRL_ORIG_YR2_RT', FloatType()), StructField('FIRSTGEN_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_UNKN_ORIG_YR2_RT', FloatType()), StructField('FIRSTGEN_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('FIRSTGEN_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_DEATH_YR2_RT', FloatType()), StructField('NOT1STGEN_COMP_ORIG_YR2_RT', FloatType()), StructField('NOT1STGEN_COMP_4YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_COMP_2YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_WDRAW_ORIG_YR2_RT', FloatType()), StructField('NOT1STGEN_WDRAW_4YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_WDRAW_2YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_ENRL_ORIG_YR2_RT', FloatType()), StructField('NOT1STGEN_ENRL_4YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_ENRL_2YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_UNKN_ORIG_YR2_RT', FloatType()), StructField('NOT1STGEN_UNKN_4YR_TRANS_YR2_RT', FloatType()), StructField('NOT1STGEN_UNKN_2YR_TRANS_YR2_RT', FloatType()), StructField('DEATH_YR3_RT', FloatType()), StructField('COMP_ORIG_YR3_RT', FloatType()), StructField('COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('WDRAW_ORIG_YR3_RT', FloatType()), StructField('WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('ENRL_ORIG_YR3_RT', FloatType()), StructField('ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('UNKN_ORIG_YR3_RT', FloatType()), StructField('UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_DEATH_YR3_RT', FloatType()), StructField('LO_INC_COMP_ORIG_YR3_RT', FloatType()), StructField('LO_INC_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_WDRAW_ORIG_YR3_RT', FloatType()), StructField('LO_INC_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_ENRL_ORIG_YR3_RT', FloatType()), StructField('LO_INC_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_UNKN_ORIG_YR3_RT', FloatType()), StructField('LO_INC_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('LO_INC_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_DEATH_YR3_RT', FloatType()), StructField('MD_INC_COMP_ORIG_YR3_RT', FloatType()), StructField('MD_INC_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_WDRAW_ORIG_YR3_RT', FloatType()), StructField('MD_INC_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_ENRL_ORIG_YR3_RT', FloatType()), StructField('MD_INC_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_UNKN_ORIG_YR3_RT', FloatType()), StructField('MD_INC_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('MD_INC_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_DEATH_YR3_RT', FloatType()), StructField('HI_INC_COMP_ORIG_YR3_RT', FloatType()), StructField('HI_INC_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_WDRAW_ORIG_YR3_RT', FloatType()), StructField('HI_INC_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_ENRL_ORIG_YR3_RT', FloatType()), StructField('HI_INC_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_UNKN_ORIG_YR3_RT', FloatType()), StructField('HI_INC_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('HI_INC_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('DEP_DEATH_YR3_RT', FloatType()), StructField('DEP_COMP_ORIG_YR3_RT', FloatType()), StructField('DEP_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('DEP_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('DEP_WDRAW_ORIG_YR3_RT', FloatType()), StructField('DEP_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('DEP_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('DEP_ENRL_ORIG_YR3_RT', FloatType()), StructField('DEP_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('DEP_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('DEP_UNKN_ORIG_YR3_RT', FloatType()), StructField('DEP_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('DEP_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('IND_DEATH_YR3_RT', FloatType()), StructField('IND_COMP_ORIG_YR3_RT', FloatType()), StructField('IND_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('IND_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('IND_WDRAW_ORIG_YR3_RT', FloatType()), StructField('IND_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('IND_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('IND_ENRL_ORIG_YR3_RT', FloatType()), StructField('IND_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('IND_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('IND_UNKN_ORIG_YR3_RT', FloatType()), StructField('IND_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('IND_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_DEATH_YR3_RT', FloatType()), StructField('FEMALE_COMP_ORIG_YR3_RT', FloatType()), StructField('FEMALE_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_WDRAW_ORIG_YR3_RT', FloatType()), StructField('FEMALE_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_ENRL_ORIG_YR3_RT', FloatType()), StructField('FEMALE_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_UNKN_ORIG_YR3_RT', FloatType()), StructField('FEMALE_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('FEMALE_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('MALE_DEATH_YR3_RT', FloatType()), StructField('MALE_COMP_ORIG_YR3_RT', FloatType()), StructField('MALE_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('MALE_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('MALE_WDRAW_ORIG_YR3_RT', FloatType()), StructField('MALE_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('MALE_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('MALE_ENRL_ORIG_YR3_RT', FloatType()), StructField('MALE_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('MALE_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('MALE_UNKN_ORIG_YR3_RT', FloatType()), StructField('MALE_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('MALE_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('PELL_DEATH_YR3_RT', FloatType()), StructField('PELL_COMP_ORIG_YR3_RT', FloatType()), StructField('PELL_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('PELL_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('PELL_WDRAW_ORIG_YR3_RT', FloatType()), StructField('PELL_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('PELL_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('PELL_ENRL_ORIG_YR3_RT', FloatType()), StructField('PELL_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('PELL_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('PELL_UNKN_ORIG_YR3_RT', FloatType()), StructField('PELL_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('PELL_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_DEATH_YR3_RT', FloatType()), StructField('NOPELL_COMP_ORIG_YR3_RT', FloatType()), StructField('NOPELL_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_WDRAW_ORIG_YR3_RT', FloatType()), StructField('NOPELL_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_ENRL_ORIG_YR3_RT', FloatType()), StructField('NOPELL_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_UNKN_ORIG_YR3_RT', FloatType()), StructField('NOPELL_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('NOPELL_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_DEATH_YR3_RT', FloatType()), StructField('LOAN_COMP_ORIG_YR3_RT', FloatType()), StructField('LOAN_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_WDRAW_ORIG_YR3_RT', FloatType()), StructField('LOAN_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_ENRL_ORIG_YR3_RT', FloatType()), StructField('LOAN_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_UNKN_ORIG_YR3_RT', FloatType()), StructField('LOAN_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('LOAN_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_DEATH_YR3_RT', FloatType()), StructField('NOLOAN_COMP_ORIG_YR3_RT', FloatType()), StructField('NOLOAN_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_WDRAW_ORIG_YR3_RT', FloatType()), StructField('NOLOAN_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_ENRL_ORIG_YR3_RT', FloatType()), StructField('NOLOAN_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_UNKN_ORIG_YR3_RT', FloatType()), StructField('NOLOAN_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('NOLOAN_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_DEATH_YR3_RT', FloatType()), StructField('FIRSTGEN_COMP_ORIG_YR3_RT', FloatType()), StructField('FIRSTGEN_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_WDRAW_ORIG_YR3_RT', FloatType()), StructField('FIRSTGEN_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_ENRL_ORIG_YR3_RT', FloatType()), StructField('FIRSTGEN_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_UNKN_ORIG_YR3_RT', FloatType()), StructField('FIRSTGEN_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('FIRSTGEN_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_DEATH_YR3_RT', FloatType()), StructField('NOT1STGEN_COMP_ORIG_YR3_RT', FloatType()), StructField('NOT1STGEN_COMP_4YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_COMP_2YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_WDRAW_ORIG_YR3_RT', FloatType()), StructField('NOT1STGEN_WDRAW_4YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_WDRAW_2YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_ENRL_ORIG_YR3_RT', FloatType()), StructField('NOT1STGEN_ENRL_4YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_ENRL_2YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_UNKN_ORIG_YR3_RT', FloatType()), StructField('NOT1STGEN_UNKN_4YR_TRANS_YR3_RT', FloatType()), StructField('NOT1STGEN_UNKN_2YR_TRANS_YR3_RT', FloatType()), StructField('DEATH_YR4_RT', FloatType()), StructField('COMP_ORIG_YR4_RT', FloatType()), StructField('COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('WDRAW_ORIG_YR4_RT', FloatType()), StructField('WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('ENRL_ORIG_YR4_RT', FloatType()), StructField('ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('UNKN_ORIG_YR4_RT', FloatType()), StructField('UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_DEATH_YR4_RT', FloatType()), StructField('LO_INC_COMP_ORIG_YR4_RT', FloatType()), StructField('LO_INC_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_WDRAW_ORIG_YR4_RT', FloatType()), StructField('LO_INC_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_ENRL_ORIG_YR4_RT', FloatType()), StructField('LO_INC_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_UNKN_ORIG_YR4_RT', FloatType()), StructField('LO_INC_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('LO_INC_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_DEATH_YR4_RT', FloatType()), StructField('MD_INC_COMP_ORIG_YR4_RT', FloatType()), StructField('MD_INC_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_WDRAW_ORIG_YR4_RT', FloatType()), StructField('MD_INC_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_ENRL_ORIG_YR4_RT', FloatType()), StructField('MD_INC_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_UNKN_ORIG_YR4_RT', FloatType()), StructField('MD_INC_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('MD_INC_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_DEATH_YR4_RT', FloatType()), StructField('HI_INC_COMP_ORIG_YR4_RT', FloatType()), StructField('HI_INC_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_WDRAW_ORIG_YR4_RT', FloatType()), StructField('HI_INC_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_ENRL_ORIG_YR4_RT', FloatType()), StructField('HI_INC_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_UNKN_ORIG_YR4_RT', FloatType()), StructField('HI_INC_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('HI_INC_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('DEP_DEATH_YR4_RT', FloatType()), StructField('DEP_COMP_ORIG_YR4_RT', FloatType()), StructField('DEP_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('DEP_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('DEP_WDRAW_ORIG_YR4_RT', FloatType()), StructField('DEP_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('DEP_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('DEP_ENRL_ORIG_YR4_RT', FloatType()), StructField('DEP_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('DEP_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('DEP_UNKN_ORIG_YR4_RT', FloatType()), StructField('DEP_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('DEP_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('IND_DEATH_YR4_RT', FloatType()), StructField('IND_COMP_ORIG_YR4_RT', FloatType()), StructField('IND_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('IND_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('IND_WDRAW_ORIG_YR4_RT', FloatType()), StructField('IND_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('IND_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('IND_ENRL_ORIG_YR4_RT', FloatType()), StructField('IND_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('IND_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('IND_UNKN_ORIG_YR4_RT', FloatType()), StructField('IND_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('IND_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_DEATH_YR4_RT', FloatType()), StructField('FEMALE_COMP_ORIG_YR4_RT', FloatType()), StructField('FEMALE_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_WDRAW_ORIG_YR4_RT', FloatType()), StructField('FEMALE_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_ENRL_ORIG_YR4_RT', FloatType()), StructField('FEMALE_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_UNKN_ORIG_YR4_RT', FloatType()), StructField('FEMALE_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('FEMALE_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('MALE_DEATH_YR4_RT', FloatType()), StructField('MALE_COMP_ORIG_YR4_RT', FloatType()), StructField('MALE_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('MALE_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('MALE_WDRAW_ORIG_YR4_RT', FloatType()), StructField('MALE_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('MALE_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('MALE_ENRL_ORIG_YR4_RT', FloatType()), StructField('MALE_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('MALE_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('MALE_UNKN_ORIG_YR4_RT', FloatType()), StructField('MALE_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('MALE_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('PELL_DEATH_YR4_RT', FloatType()), StructField('PELL_COMP_ORIG_YR4_RT', FloatType()), StructField('PELL_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('PELL_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('PELL_WDRAW_ORIG_YR4_RT', FloatType()), StructField('PELL_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('PELL_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('PELL_ENRL_ORIG_YR4_RT', FloatType()), StructField('PELL_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('PELL_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('PELL_UNKN_ORIG_YR4_RT', FloatType()), StructField('PELL_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('PELL_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_DEATH_YR4_RT', FloatType()), StructField('NOPELL_COMP_ORIG_YR4_RT', FloatType()), StructField('NOPELL_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_WDRAW_ORIG_YR4_RT', FloatType()), StructField('NOPELL_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_ENRL_ORIG_YR4_RT', FloatType()), StructField('NOPELL_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_UNKN_ORIG_YR4_RT', FloatType()), StructField('NOPELL_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('NOPELL_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_DEATH_YR4_RT', FloatType()), StructField('LOAN_COMP_ORIG_YR4_RT', FloatType()), StructField('LOAN_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_WDRAW_ORIG_YR4_RT', FloatType()), StructField('LOAN_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_ENRL_ORIG_YR4_RT', FloatType()), StructField('LOAN_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_UNKN_ORIG_YR4_RT', FloatType()), StructField('LOAN_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('LOAN_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_DEATH_YR4_RT', FloatType()), StructField('NOLOAN_COMP_ORIG_YR4_RT', FloatType()), StructField('NOLOAN_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_WDRAW_ORIG_YR4_RT', FloatType()), StructField('NOLOAN_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_ENRL_ORIG_YR4_RT', FloatType()), StructField('NOLOAN_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_UNKN_ORIG_YR4_RT', FloatType()), StructField('NOLOAN_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('NOLOAN_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_DEATH_YR4_RT', FloatType()), StructField('FIRSTGEN_COMP_ORIG_YR4_RT', FloatType()), StructField('FIRSTGEN_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_WDRAW_ORIG_YR4_RT', FloatType()), StructField('FIRSTGEN_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_ENRL_ORIG_YR4_RT', FloatType()), StructField('FIRSTGEN_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_UNKN_ORIG_YR4_RT', FloatType()), StructField('FIRSTGEN_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('FIRSTGEN_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_DEATH_YR4_RT', FloatType()), StructField('NOT1STGEN_COMP_ORIG_YR4_RT', FloatType()), StructField('NOT1STGEN_COMP_4YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_COMP_2YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_WDRAW_ORIG_YR4_RT', FloatType()), StructField('NOT1STGEN_WDRAW_4YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_WDRAW_2YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_ENRL_ORIG_YR4_RT', FloatType()), StructField('NOT1STGEN_ENRL_4YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_ENRL_2YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_UNKN_ORIG_YR4_RT', FloatType()), StructField('NOT1STGEN_UNKN_4YR_TRANS_YR4_RT', FloatType()), StructField('NOT1STGEN_UNKN_2YR_TRANS_YR4_RT', FloatType()), StructField('DEATH_YR6_RT', FloatType()), StructField('COMP_ORIG_YR6_RT', FloatType()), StructField('COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('WDRAW_ORIG_YR6_RT', FloatType()), StructField('WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('ENRL_ORIG_YR6_RT', FloatType()), StructField('ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('UNKN_ORIG_YR6_RT', FloatType()), StructField('UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_DEATH_YR6_RT', FloatType()), StructField('LO_INC_COMP_ORIG_YR6_RT', FloatType()), StructField('LO_INC_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_WDRAW_ORIG_YR6_RT', FloatType()), StructField('LO_INC_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_ENRL_ORIG_YR6_RT', FloatType()), StructField('LO_INC_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_UNKN_ORIG_YR6_RT', FloatType()), StructField('LO_INC_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('LO_INC_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_DEATH_YR6_RT', FloatType()), StructField('MD_INC_COMP_ORIG_YR6_RT', FloatType()), StructField('MD_INC_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_WDRAW_ORIG_YR6_RT', FloatType()), StructField('MD_INC_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_ENRL_ORIG_YR6_RT', FloatType()), StructField('MD_INC_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_UNKN_ORIG_YR6_RT', FloatType()), StructField('MD_INC_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('MD_INC_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_DEATH_YR6_RT', FloatType()), StructField('HI_INC_COMP_ORIG_YR6_RT', FloatType()), StructField('HI_INC_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_WDRAW_ORIG_YR6_RT', FloatType()), StructField('HI_INC_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_ENRL_ORIG_YR6_RT', FloatType()), StructField('HI_INC_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_UNKN_ORIG_YR6_RT', FloatType()), StructField('HI_INC_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('HI_INC_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('DEP_DEATH_YR6_RT', FloatType()), StructField('DEP_COMP_ORIG_YR6_RT', FloatType()), StructField('DEP_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('DEP_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('DEP_WDRAW_ORIG_YR6_RT', FloatType()), StructField('DEP_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('DEP_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('DEP_ENRL_ORIG_YR6_RT', FloatType()), StructField('DEP_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('DEP_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('DEP_UNKN_ORIG_YR6_RT', FloatType()), StructField('DEP_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('DEP_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('IND_DEATH_YR6_RT', FloatType()), StructField('IND_COMP_ORIG_YR6_RT', FloatType()), StructField('IND_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('IND_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('IND_WDRAW_ORIG_YR6_RT', FloatType()), StructField('IND_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('IND_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('IND_ENRL_ORIG_YR6_RT', FloatType()), StructField('IND_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('IND_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('IND_UNKN_ORIG_YR6_RT', FloatType()), StructField('IND_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('IND_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_DEATH_YR6_RT', FloatType()), StructField('FEMALE_COMP_ORIG_YR6_RT', FloatType()), StructField('FEMALE_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_WDRAW_ORIG_YR6_RT', FloatType()), StructField('FEMALE_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_ENRL_ORIG_YR6_RT', FloatType()), StructField('FEMALE_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_UNKN_ORIG_YR6_RT', FloatType()), StructField('FEMALE_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('FEMALE_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('MALE_DEATH_YR6_RT', FloatType()), StructField('MALE_COMP_ORIG_YR6_RT', FloatType()), StructField('MALE_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('MALE_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('MALE_WDRAW_ORIG_YR6_RT', FloatType()), StructField('MALE_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('MALE_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('MALE_ENRL_ORIG_YR6_RT', FloatType()), StructField('MALE_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('MALE_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('MALE_UNKN_ORIG_YR6_RT', FloatType()), StructField('MALE_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('MALE_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('PELL_DEATH_YR6_RT', FloatType()), StructField('PELL_COMP_ORIG_YR6_RT', FloatType()), StructField('PELL_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('PELL_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('PELL_WDRAW_ORIG_YR6_RT', FloatType()), StructField('PELL_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('PELL_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('PELL_ENRL_ORIG_YR6_RT', FloatType()), StructField('PELL_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('PELL_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('PELL_UNKN_ORIG_YR6_RT', FloatType()), StructField('PELL_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('PELL_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_DEATH_YR6_RT', FloatType()), StructField('NOPELL_COMP_ORIG_YR6_RT', FloatType()), StructField('NOPELL_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_WDRAW_ORIG_YR6_RT', FloatType()), StructField('NOPELL_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_ENRL_ORIG_YR6_RT', FloatType()), StructField('NOPELL_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_UNKN_ORIG_YR6_RT', FloatType()), StructField('NOPELL_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('NOPELL_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_DEATH_YR6_RT', FloatType()), StructField('LOAN_COMP_ORIG_YR6_RT', FloatType()), StructField('LOAN_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_WDRAW_ORIG_YR6_RT', FloatType()), StructField('LOAN_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_ENRL_ORIG_YR6_RT', FloatType()), StructField('LOAN_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_UNKN_ORIG_YR6_RT', FloatType()), StructField('LOAN_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('LOAN_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_DEATH_YR6_RT', FloatType()), StructField('NOLOAN_COMP_ORIG_YR6_RT', FloatType()), StructField('NOLOAN_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_WDRAW_ORIG_YR6_RT', FloatType()), StructField('NOLOAN_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_ENRL_ORIG_YR6_RT', FloatType()), StructField('NOLOAN_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_UNKN_ORIG_YR6_RT', FloatType()), StructField('NOLOAN_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('NOLOAN_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_DEATH_YR6_RT', FloatType()), StructField('FIRSTGEN_COMP_ORIG_YR6_RT', FloatType()), StructField('FIRSTGEN_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_WDRAW_ORIG_YR6_RT', FloatType()), StructField('FIRSTGEN_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_ENRL_ORIG_YR6_RT', FloatType()), StructField('FIRSTGEN_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_UNKN_ORIG_YR6_RT', FloatType()), StructField('FIRSTGEN_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('FIRSTGEN_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_DEATH_YR6_RT', FloatType()), StructField('NOT1STGEN_COMP_ORIG_YR6_RT', FloatType()), StructField('NOT1STGEN_COMP_4YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_COMP_2YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_WDRAW_ORIG_YR6_RT', FloatType()), StructField('NOT1STGEN_WDRAW_4YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_WDRAW_2YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_ENRL_ORIG_YR6_RT', FloatType()), StructField('NOT1STGEN_ENRL_4YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_ENRL_2YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_UNKN_ORIG_YR6_RT', FloatType()), StructField('NOT1STGEN_UNKN_4YR_TRANS_YR6_RT', FloatType()), StructField('NOT1STGEN_UNKN_2YR_TRANS_YR6_RT', FloatType()), StructField('DEATH_YR8_RT', FloatType()), StructField('COMP_ORIG_YR8_RT', FloatType()), StructField('COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('WDRAW_ORIG_YR8_RT', FloatType()), StructField('WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('ENRL_ORIG_YR8_RT', FloatType()), StructField('ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('UNKN_ORIG_YR8_RT', FloatType()), StructField('UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_DEATH_YR8_RT', FloatType()), StructField('LO_INC_COMP_ORIG_YR8_RT', FloatType()), StructField('LO_INC_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_WDRAW_ORIG_YR8_RT', FloatType()), StructField('LO_INC_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_ENRL_ORIG_YR8_RT', FloatType()), StructField('LO_INC_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_UNKN_ORIG_YR8_RT', FloatType()), StructField('LO_INC_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('LO_INC_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_DEATH_YR8_RT', FloatType()), StructField('MD_INC_COMP_ORIG_YR8_RT', FloatType()), StructField('MD_INC_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_WDRAW_ORIG_YR8_RT', FloatType()), StructField('MD_INC_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_ENRL_ORIG_YR8_RT', FloatType()), StructField('MD_INC_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_UNKN_ORIG_YR8_RT', FloatType()), StructField('MD_INC_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('MD_INC_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_DEATH_YR8_RT', FloatType()), StructField('HI_INC_COMP_ORIG_YR8_RT', FloatType()), StructField('HI_INC_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_WDRAW_ORIG_YR8_RT', FloatType()), StructField('HI_INC_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_ENRL_ORIG_YR8_RT', FloatType()), StructField('HI_INC_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_UNKN_ORIG_YR8_RT', FloatType()), StructField('HI_INC_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('HI_INC_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('DEP_DEATH_YR8_RT', FloatType()), StructField('DEP_COMP_ORIG_YR8_RT', FloatType()), StructField('DEP_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('DEP_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('DEP_WDRAW_ORIG_YR8_RT', FloatType()), StructField('DEP_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('DEP_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('DEP_ENRL_ORIG_YR8_RT', FloatType()), StructField('DEP_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('DEP_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('DEP_UNKN_ORIG_YR8_RT', FloatType()), StructField('DEP_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('DEP_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('IND_DEATH_YR8_RT', FloatType()), StructField('IND_COMP_ORIG_YR8_RT', FloatType()), StructField('IND_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('IND_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('IND_WDRAW_ORIG_YR8_RT', FloatType()), StructField('IND_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('IND_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('IND_ENRL_ORIG_YR8_RT', FloatType()), StructField('IND_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('IND_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('IND_UNKN_ORIG_YR8_RT', FloatType()), StructField('IND_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('IND_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_DEATH_YR8_RT', FloatType()), StructField('FEMALE_COMP_ORIG_YR8_RT', FloatType()), StructField('FEMALE_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_WDRAW_ORIG_YR8_RT', FloatType()), StructField('FEMALE_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_ENRL_ORIG_YR8_RT', FloatType()), StructField('FEMALE_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_UNKN_ORIG_YR8_RT', FloatType()), StructField('FEMALE_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('FEMALE_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('MALE_DEATH_YR8_RT', FloatType()), StructField('MALE_COMP_ORIG_YR8_RT', FloatType()), StructField('MALE_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('MALE_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('MALE_WDRAW_ORIG_YR8_RT', FloatType()), StructField('MALE_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('MALE_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('MALE_ENRL_ORIG_YR8_RT', FloatType()), StructField('MALE_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('MALE_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('MALE_UNKN_ORIG_YR8_RT', FloatType()), StructField('MALE_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('MALE_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('PELL_DEATH_YR8_RT', FloatType()), StructField('PELL_COMP_ORIG_YR8_RT', FloatType()), StructField('PELL_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('PELL_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('PELL_WDRAW_ORIG_YR8_RT', FloatType()), StructField('PELL_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('PELL_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('PELL_ENRL_ORIG_YR8_RT', FloatType()), StructField('PELL_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('PELL_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('PELL_UNKN_ORIG_YR8_RT', FloatType()), StructField('PELL_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('PELL_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_DEATH_YR8_RT', FloatType()), StructField('NOPELL_COMP_ORIG_YR8_RT', FloatType()), StructField('NOPELL_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_WDRAW_ORIG_YR8_RT', FloatType()), StructField('NOPELL_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_ENRL_ORIG_YR8_RT', FloatType()), StructField('NOPELL_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_UNKN_ORIG_YR8_RT', FloatType()), StructField('NOPELL_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('NOPELL_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_DEATH_YR8_RT', FloatType()), StructField('LOAN_COMP_ORIG_YR8_RT', FloatType()), StructField('LOAN_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_WDRAW_ORIG_YR8_RT', FloatType()), StructField('LOAN_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_ENRL_ORIG_YR8_RT', FloatType()), StructField('LOAN_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_UNKN_ORIG_YR8_RT', FloatType()), StructField('LOAN_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('LOAN_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_DEATH_YR8_RT', FloatType()), StructField('NOLOAN_COMP_ORIG_YR8_RT', FloatType()), StructField('NOLOAN_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_WDRAW_ORIG_YR8_RT', FloatType()), StructField('NOLOAN_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_ENRL_ORIG_YR8_RT', FloatType()), StructField('NOLOAN_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_UNKN_ORIG_YR8_RT', FloatType()), StructField('NOLOAN_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('NOLOAN_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_DEATH_YR8_RT', FloatType()), StructField('FIRSTGEN_COMP_ORIG_YR8_RT', FloatType()), StructField('FIRSTGEN_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_WDRAW_ORIG_YR8_RT', FloatType()), StructField('FIRSTGEN_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_ENRL_ORIG_YR8_RT', FloatType()), StructField('FIRSTGEN_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_UNKN_ORIG_YR8_RT', FloatType()), StructField('FIRSTGEN_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('FIRSTGEN_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_DEATH_YR8_RT', FloatType()), StructField('NOT1STGEN_COMP_ORIG_YR8_RT', FloatType()), StructField('NOT1STGEN_COMP_4YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_COMP_2YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_WDRAW_ORIG_YR8_RT', FloatType()), StructField('NOT1STGEN_WDRAW_4YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_WDRAW_2YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_ENRL_ORIG_YR8_RT', FloatType()), StructField('NOT1STGEN_ENRL_4YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_ENRL_2YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_UNKN_ORIG_YR8_RT', FloatType()), StructField('NOT1STGEN_UNKN_4YR_TRANS_YR8_RT', FloatType()), StructField('NOT1STGEN_UNKN_2YR_TRANS_YR8_RT', FloatType()), StructField('RPY_1YR_RT', FloatType()), StructField('COMPL_RPY_1YR_RT', FloatType()), StructField('NONCOM_RPY_1YR_RT', FloatType()), StructField('LO_INC_RPY_1YR_RT', FloatType()), StructField('MD_INC_RPY_1YR_RT', FloatType()), StructField('HI_INC_RPY_1YR_RT', FloatType()), StructField('DEP_RPY_1YR_RT', FloatType()), StructField('IND_RPY_1YR_RT', FloatType()), StructField('PELL_RPY_1YR_RT', FloatType()), StructField('NOPELL_RPY_1YR_RT', FloatType()), StructField('FEMALE_RPY_1YR_RT', FloatType()), StructField('MALE_RPY_1YR_RT', FloatType()), StructField('FIRSTGEN_RPY_1YR_RT', FloatType()), StructField('NOTFIRSTGEN_RPY_1YR_RT', FloatType()), StructField('RPY_3YR_RT', FloatType()), StructField('COMPL_RPY_3YR_RT', FloatType()), StructField('NONCOM_RPY_3YR_RT', FloatType()), StructField('LO_INC_RPY_3YR_RT', FloatType()), StructField('MD_INC_RPY_3YR_RT', FloatType()), StructField('HI_INC_RPY_3YR_RT', FloatType()), StructField('DEP_RPY_3YR_RT', FloatType()), StructField('IND_RPY_3YR_RT', FloatType()), StructField('PELL_RPY_3YR_RT', FloatType()), StructField('NOPELL_RPY_3YR_RT', FloatType()), StructField('FEMALE_RPY_3YR_RT', FloatType()), StructField('MALE_RPY_3YR_RT', FloatType()), StructField('FIRSTGEN_RPY_3YR_RT', FloatType()), StructField('NOTFIRSTGEN_RPY_3YR_RT', FloatType()), StructField('RPY_5YR_RT', FloatType()), StructField('COMPL_RPY_5YR_RT', FloatType()), StructField('NONCOM_RPY_5YR_RT', FloatType()), StructField('LO_INC_RPY_5YR_RT', FloatType()), StructField('MD_INC_RPY_5YR_RT', FloatType()), StructField('HI_INC_RPY_5YR_RT', FloatType()), StructField('DEP_RPY_5YR_RT', FloatType()), StructField('IND_RPY_5YR_RT', FloatType()), StructField('PELL_RPY_5YR_RT', FloatType()), StructField('NOPELL_RPY_5YR_RT', FloatType()), StructField('FEMALE_RPY_5YR_RT', FloatType()), StructField('MALE_RPY_5YR_RT', FloatType()), StructField('FIRSTGEN_RPY_5YR_RT', FloatType()), StructField('NOTFIRSTGEN_RPY_5YR_RT', FloatType()), StructField('RPY_7YR_RT', FloatType()), StructField('COMPL_RPY_7YR_RT', FloatType()), StructField('NONCOM_RPY_7YR_RT', FloatType()), StructField('LO_INC_RPY_7YR_RT', FloatType()), StructField('MD_INC_RPY_7YR_RT', FloatType()), StructField('HI_INC_RPY_7YR_RT', FloatType()), StructField('DEP_RPY_7YR_RT', FloatType()), StructField('IND_RPY_7YR_RT', FloatType()), StructField('PELL_RPY_7YR_RT', FloatType()), StructField('NOPELL_RPY_7YR_RT', FloatType()), StructField('FEMALE_RPY_7YR_RT', FloatType()), StructField('MALE_RPY_7YR_RT', FloatType()), StructField('FIRSTGEN_RPY_7YR_RT', FloatType()), StructField('NOTFIRSTGEN_RPY_7YR_RT', FloatType()), StructField('INC_PCT_LO', FloatType()), StructField('DEP_STAT_PCT_IND', FloatType()), StructField('DEP_INC_PCT_LO', FloatType()), StructField('IND_INC_PCT_LO', FloatType()), StructField('PAR_ED_PCT_1STGEN', FloatType()), StructField('INC_PCT_M1', FloatType()), StructField('INC_PCT_M2', FloatType()), StructField('INC_PCT_H1', FloatType()), StructField('INC_PCT_H2', FloatType()), StructField('DEP_INC_PCT_M1', FloatType()), StructField('DEP_INC_PCT_M2', FloatType()), StructField('DEP_INC_PCT_H1', FloatType()), StructField('DEP_INC_PCT_H2', FloatType()), StructField('IND_INC_PCT_M1', FloatType()), StructField('IND_INC_PCT_M2', FloatType()), StructField('IND_INC_PCT_H1', FloatType()), StructField('IND_INC_PCT_H2', FloatType()), StructField('PAR_ED_PCT_MS', FloatType()), StructField('PAR_ED_PCT_HS', FloatType()), StructField('PAR_ED_PCT_PS', FloatType()), StructField('APPL_SCH_PCT_GE2', FloatType()), StructField('APPL_SCH_PCT_GE3', FloatType()), StructField('APPL_SCH_PCT_GE4', FloatType()), StructField('APPL_SCH_PCT_GE5', FloatType()), StructField('DEP_INC_AVG', IntegerType()), StructField('IND_INC_AVG', IntegerType()), StructField('OVERALL_YR2_N', IntegerType()), StructField('LO_INC_YR2_N', IntegerType()), StructField('MD_INC_YR2_N', IntegerType()), StructField('HI_INC_YR2_N', IntegerType()), StructField('DEP_YR2_N', IntegerType()), StructField('IND_YR2_N', IntegerType()), StructField('FEMALE_YR2_N', IntegerType()), StructField('MALE_YR2_N', IntegerType()), StructField('PELL_YR2_N', IntegerType()), StructField('NOPELL_YR2_N', IntegerType()), StructField('LOAN_YR2_N', IntegerType()), StructField('NOLOAN_YR2_N', IntegerType()), StructField('FIRSTGEN_YR2_N', IntegerType()), StructField('NOT1STGEN_YR2_N', IntegerType()), StructField('OVERALL_YR3_N', IntegerType()), StructField('LO_INC_YR3_N', IntegerType()), StructField('MD_INC_YR3_N', IntegerType()), StructField('HI_INC_YR3_N', IntegerType()), StructField('DEP_YR3_N', IntegerType()), StructField('IND_YR3_N', IntegerType()), StructField('FEMALE_YR3_N', IntegerType()), StructField('MALE_YR3_N', IntegerType()), StructField('PELL_YR3_N', IntegerType()), StructField('NOPELL_YR3_N', IntegerType()), StructField('LOAN_YR3_N', IntegerType()), StructField('NOLOAN_YR3_N', IntegerType()), StructField('FIRSTGEN_YR3_N', IntegerType()), StructField('NOT1STGEN_YR3_N', IntegerType()), StructField('OVERALL_YR4_N', IntegerType()), StructField('LO_INC_YR4_N', IntegerType()), StructField('MD_INC_YR4_N', IntegerType()), StructField('HI_INC_YR4_N', IntegerType()), StructField('DEP_YR4_N', IntegerType()), StructField('IND_YR4_N', IntegerType()), StructField('FEMALE_YR4_N', IntegerType()), StructField('MALE_YR4_N', IntegerType()), StructField('PELL_YR4_N', IntegerType()), StructField('NOPELL_YR4_N', IntegerType()), StructField('LOAN_YR4_N', IntegerType()), StructField('NOLOAN_YR4_N', IntegerType()), StructField('FIRSTGEN_YR4_N', IntegerType()), StructField('NOT1STGEN_YR4_N', IntegerType()), StructField('OVERALL_YR6_N', IntegerType()), StructField('LO_INC_YR6_N', IntegerType()), StructField('MD_INC_YR6_N', IntegerType()), StructField('HI_INC_YR6_N', IntegerType()), StructField('DEP_YR6_N', IntegerType()), StructField('IND_YR6_N', IntegerType()), StructField('FEMALE_YR6_N', IntegerType()), StructField('MALE_YR6_N', IntegerType()), StructField('PELL_YR6_N', IntegerType()), StructField('NOPELL_YR6_N', IntegerType()), StructField('LOAN_YR6_N', IntegerType()), StructField('NOLOAN_YR6_N', IntegerType()), StructField('FIRSTGEN_YR6_N', IntegerType()), StructField('NOT1STGEN_YR6_N', IntegerType()), StructField('OVERALL_YR8_N', IntegerType()), StructField('LO_INC_YR8_N', IntegerType()), StructField('MD_INC_YR8_N', IntegerType()), StructField('HI_INC_YR8_N', IntegerType()), StructField('DEP_YR8_N', IntegerType()), StructField('IND_YR8_N', IntegerType()), StructField('FEMALE_YR8_N', IntegerType()), StructField('MALE_YR8_N', IntegerType()), StructField('PELL_YR8_N', IntegerType()), StructField('NOPELL_YR8_N', IntegerType()), StructField('LOAN_YR8_N', IntegerType()), StructField('NOLOAN_YR8_N', IntegerType()), StructField('FIRSTGEN_YR8_N', IntegerType()), StructField('NOT1STGEN_YR8_N', IntegerType()), StructField('DEBT_MDN', FloatType()), StructField('GRAD_DEBT_MDN', FloatType()), StructField('WDRAW_DEBT_MDN', FloatType()), StructField('LO_INC_DEBT_MDN', FloatType()), StructField('MD_INC_DEBT_MDN', FloatType()), StructField('HI_INC_DEBT_MDN', FloatType()), StructField('DEP_DEBT_MDN', FloatType()), StructField('IND_DEBT_MDN', FloatType()), StructField('PELL_DEBT_MDN', FloatType()), StructField('NOPELL_DEBT_MDN', FloatType()), StructField('FEMALE_DEBT_MDN', FloatType()), StructField('MALE_DEBT_MDN', FloatType()), StructField('FIRSTGEN_DEBT_MDN', FloatType()), StructField('NOTFIRSTGEN_DEBT_MDN', FloatType()), StructField('DEBT_N', IntegerType()), StructField('GRAD_DEBT_N', IntegerType()), StructField('WDRAW_DEBT_N', IntegerType()), StructField('LO_INC_DEBT_N', IntegerType()), StructField('MD_INC_DEBT_N', IntegerType()), StructField('HI_INC_DEBT_N', IntegerType()), StructField('DEP_DEBT_N', IntegerType()), StructField('IND_DEBT_N', IntegerType()), StructField('PELL_DEBT_N', IntegerType()), StructField('NOPELL_DEBT_N', IntegerType()), StructField('FEMALE_DEBT_N', IntegerType()), StructField('MALE_DEBT_N', IntegerType()), StructField('FIRSTGEN_DEBT_N', IntegerType()), StructField('NOTFIRSTGEN_DEBT_N', IntegerType()), StructField('GRAD_DEBT_MDN10YR', FloatType()), StructField('CUML_DEBT_N', IntegerType()), StructField('CUML_DEBT_P90', IntegerType()), StructField('CUML_DEBT_P75', IntegerType()), StructField('CUML_DEBT_P25', IntegerType()), StructField('CUML_DEBT_P10', IntegerType()), StructField('INC_N', IntegerType()), StructField('DEP_INC_N', IntegerType()), StructField('IND_INC_N', IntegerType()), StructField('DEP_STAT_N', IntegerType()), StructField('PAR_ED_N', IntegerType()), StructField('APPL_SCH_N', IntegerType()), StructField('REPAY_DT_MDN', IntegerType()), StructField('SEPAR_DT_MDN', IntegerType()), StructField('REPAY_DT_N', IntegerType()), StructField('SEPAR_DT_N', IntegerType()), StructField('RPY_1YR_N', IntegerType()), StructField('COMPL_RPY_1YR_N', IntegerType()), StructField('NONCOM_RPY_1YR_N', IntegerType()), StructField('LO_INC_RPY_1YR_N', IntegerType()), StructField('MD_INC_RPY_1YR_N', IntegerType()), StructField('HI_INC_RPY_1YR_N', IntegerType()), StructField('DEP_RPY_1YR_N', IntegerType()), StructField('IND_RPY_1YR_N', IntegerType()), StructField('PELL_RPY_1YR_N', IntegerType()), StructField('NOPELL_RPY_1YR_N', IntegerType()), StructField('FEMALE_RPY_1YR_N', IntegerType()), StructField('MALE_RPY_1YR_N', IntegerType()), StructField('FIRSTGEN_RPY_1YR_N', IntegerType()), StructField('NOTFIRSTGEN_RPY_1YR_N', IntegerType()), StructField('RPY_3YR_N', IntegerType()), StructField('COMPL_RPY_3YR_N', IntegerType()), StructField('NONCOM_RPY_3YR_N', IntegerType()), StructField('LO_INC_RPY_3YR_N', IntegerType()), StructField('MD_INC_RPY_3YR_N', IntegerType()), StructField('HI_INC_RPY_3YR_N', IntegerType()), StructField('DEP_RPY_3YR_N', IntegerType()), StructField('IND_RPY_3YR_N', IntegerType()), StructField('PELL_RPY_3YR_N', IntegerType()), StructField('NOPELL_RPY_3YR_N', IntegerType()), StructField('FEMALE_RPY_3YR_N', IntegerType()), StructField('MALE_RPY_3YR_N', IntegerType()), StructField('FIRSTGEN_RPY_3YR_N', IntegerType()), StructField('NOTFIRSTGEN_RPY_3YR_N', IntegerType()), StructField('RPY_5YR_N', IntegerType()), StructField('COMPL_RPY_5YR_N', IntegerType()), StructField('NONCOM_RPY_5YR_N', IntegerType()), StructField('LO_INC_RPY_5YR_N', IntegerType()), StructField('MD_INC_RPY_5YR_N', IntegerType()), StructField('HI_INC_RPY_5YR_N', IntegerType()), StructField('DEP_RPY_5YR_N', IntegerType()), StructField('IND_RPY_5YR_N', IntegerType()), StructField('PELL_RPY_5YR_N', IntegerType()), StructField('NOPELL_RPY_5YR_N', IntegerType()), StructField('FEMALE_RPY_5YR_N', IntegerType()), StructField('MALE_RPY_5YR_N', IntegerType()), StructField('FIRSTGEN_RPY_5YR_N', IntegerType()), StructField('NOTFIRSTGEN_RPY_5YR_N', IntegerType()), StructField('RPY_7YR_N', IntegerType()), StructField('COMPL_RPY_7YR_N', IntegerType()), StructField('NONCOM_RPY_7YR_N', IntegerType()), StructField('LO_INC_RPY_7YR_N', IntegerType()), StructField('MD_INC_RPY_7YR_N', IntegerType()), StructField('HI_INC_RPY_7YR_N', IntegerType()), StructField('DEP_RPY_7YR_N', IntegerType()), StructField('IND_RPY_7YR_N', IntegerType()), StructField('PELL_RPY_7YR_N', IntegerType()), StructField('NOPELL_RPY_7YR_N', IntegerType()), StructField('FEMALE_RPY_7YR_N', IntegerType()), StructField('MALE_RPY_7YR_N', IntegerType()), StructField('FIRSTGEN_RPY_7YR_N', IntegerType()), StructField('NOTFIRSTGEN_RPY_7YR_N', IntegerType()), StructField('HCM2', IntegerType()), StructField('count_ed', IntegerType()), StructField('loan_ever', FloatType()), StructField('pell_ever', FloatType()), StructField('age_entry', IntegerType()), StructField('age_entry_sq', IntegerType()), StructField('agege24', FloatType()), StructField('female', FloatType()), StructField('married', FloatType()), StructField('dependent', FloatType()), StructField('veteran', FloatType()), StructField('first_gen', FloatType()), StructField('faminc', IntegerType()), StructField('md_faminc', IntegerType()), StructField('faminc_ind', IntegerType()), StructField('lnfaminc', IntegerType()), StructField('lnfaminc_ind', IntegerType()), StructField('pct_white', FloatType()), StructField('pct_black', FloatType()), StructField('pct_asian', FloatType()), StructField('pct_hispanic', FloatType()), StructField('pct_ba', FloatType()), StructField('pct_grad_prof', FloatType()), StructField('pct_born_us', FloatType()), StructField('median_hh_inc', IntegerType()), StructField('poverty_rate', FloatType()), StructField('unemp_rate', FloatType()), StructField('ln_median_hh_inc', IntegerType()), StructField('fsend_count', IntegerType()), StructField('fsend_1', FloatType()), StructField('fsend_2', FloatType()), StructField('fsend_3', FloatType()), StructField('fsend_4', FloatType()), StructField('fsend_5', FloatType()), StructField('count_nwne_p10', IntegerType()), StructField('count_wne_p10', IntegerType()), StructField('mn_earn_wne_p10', IntegerType()), StructField('md_earn_wne_p10', IntegerType()), StructField('pct10_earn_wne_p10', IntegerType()), StructField('pct25_earn_wne_p10', IntegerType()), StructField('pct75_earn_wne_p10', IntegerType()), StructField('pct90_earn_wne_p10', IntegerType()), StructField('sd_earn_wne_p10', IntegerType()), StructField('count_wne_inc1_p10', IntegerType()), StructField('count_wne_inc2_p10', IntegerType()), StructField('count_wne_inc3_p10', IntegerType()), StructField('count_wne_indep0_inc1_p10', IntegerType()), StructField('count_wne_indep0_p10', IntegerType()), StructField('count_wne_indep1_p10', IntegerType()), StructField('count_wne_male0_p10', IntegerType()), StructField('count_wne_male1_p10', IntegerType()), StructField('gt_25k_p10', FloatType()), StructField('mn_earn_wne_inc1_p10', IntegerType()), StructField('mn_earn_wne_inc2_p10', IntegerType()), StructField('mn_earn_wne_inc3_p10', IntegerType()), StructField('mn_earn_wne_indep0_inc1_p10', IntegerType()), StructField('mn_earn_wne_indep0_p10', IntegerType()), StructField('mn_earn_wne_indep1_p10', IntegerType()), StructField('mn_earn_wne_male0_p10', IntegerType()), StructField('mn_earn_wne_male1_p10', IntegerType()), StructField('count_nwne_p6', IntegerType()), StructField('count_wne_p6', IntegerType()), StructField('mn_earn_wne_p6', IntegerType()), StructField('md_earn_wne_p6', IntegerType()), StructField('pct10_earn_wne_p6', IntegerType()), StructField('pct25_earn_wne_p6', IntegerType()), StructField('pct75_earn_wne_p6', IntegerType()), StructField('pct90_earn_wne_p6', IntegerType()), StructField('sd_earn_wne_p6', IntegerType()), StructField('count_wne_inc1_p6', IntegerType()), StructField('count_wne_inc2_p6', IntegerType()), StructField('count_wne_inc3_p6', IntegerType()), StructField('count_wne_indep0_inc1_p6', IntegerType()), StructField('count_wne_indep0_p6', IntegerType()), StructField('count_wne_indep1_p6', IntegerType()), StructField('count_wne_male0_p6', IntegerType()), StructField('count_wne_male1_p6', IntegerType()), StructField('gt_25k_p6', FloatType()), StructField('mn_earn_wne_inc1_p6', FloatType()), StructField('mn_earn_wne_inc2_p6', FloatType()), StructField('mn_earn_wne_inc3_p6', FloatType()), StructField('mn_earn_wne_indep0_inc1_p6', FloatType()), StructField('mn_earn_wne_indep0_p6', FloatType()), StructField('mn_earn_wne_indep1_p6', FloatType()), StructField('mn_earn_wne_male0_p6', FloatType()), StructField('mn_earn_wne_male1_p6', FloatType()), StructField('count_nwne_p7', FloatType()), StructField('count_wne_p7', IntegerType()), StructField('mn_earn_wne_p7', FloatType()), StructField('sd_earn_wne_p7', FloatType()), StructField('gt_25k_p7', FloatType()), StructField('count_nwne_p8', IntegerType()), StructField('count_wne_p8', IntegerType()), StructField('mn_earn_wne_p8', FloatType()), StructField('md_earn_wne_p8', FloatType()), StructField('pct10_earn_wne_p8', IntegerType()), StructField('pct25_earn_wne_p8', IntegerType()), StructField('pct75_earn_wne_p8', IntegerType()), StructField('pct90_earn_wne_p8', IntegerType()), StructField('sd_earn_wne_p8', FloatType()), StructField('gt_25k_p8', FloatType()), StructField('count_nwne_p9', IntegerType()), StructField('count_wne_p9', IntegerType()), StructField('mn_earn_wne_p9', FloatType()), StructField('sd_earn_wne_p9', FloatType()), StructField('gt_25k_p9', FloatType()), StructField('DEBT_MDN_SUPP', FloatType()), StructField('GRAD_DEBT_MDN_SUPP', FloatType()), StructField('GRAD_DEBT_MDN10YR_SUPP', FloatType()), StructField('RPY_3YR_RT_SUPP', FloatType()), StructField('LO_INC_RPY_3YR_RT_SUPP', FloatType()), StructField('MD_INC_RPY_3YR_RT_SUPP', FloatType()), StructField('HI_INC_RPY_3YR_RT_SUPP', FloatType()), StructField('COMPL_RPY_3YR_RT_SUPP', FloatType()), StructField('NONCOM_RPY_3YR_RT_SUPP', FloatType()), StructField('DEP_RPY_3YR_RT_SUPP', FloatType()), StructField('IND_RPY_3YR_RT_SUPP', FloatType()), StructField('PELL_RPY_3YR_RT_SUPP', FloatType()), StructField('NOPELL_RPY_3YR_RT_SUPP', FloatType()), StructField('FEMALE_RPY_3YR_RT_SUPP', FloatType()), StructField('MALE_RPY_3YR_RT_SUPP', FloatType()), StructField('FIRSTGEN_RPY_3YR_RT_SUPP', FloatType()), StructField('NOTFIRSTGEN_RPY_3YR_RT_SUPP', FloatType()), StructField('C150_L4_POOLED_SUPP', FloatType()), StructField('C150_4_POOLED_SUPP', FloatType()), StructField('C200_L4_POOLED_SUPP', FloatType()), StructField('C200_4_POOLED_SUPP', FloatType())])
college = spark.read.format('csv').options(header=True).schema(schem).load(fil)

# talk
cnt = college.count()
print('%d records'%cnt)
display(college.limit(100).toPandas().head(20))

### Data Prep

In [None]:
''' handle missing values '''
# presumably important columns (for modeling)
importantCols = college.columns

# check for missing values
nullCounts = {colm:college.select(colm).where(col(colm).isNull()).count() for colm in college.columns}
nullCounts = {colm:(ncnt, ncnt/cnt) for (colm, ncnt) in nullCounts.items()}
nullCountsDF = pd.DataFrame(nullCounts).T.reset_index(drop=False).sort_values(1, ascending=False)
nullCountsDF.columns = ['Column', 'Freq.', 'Rel. Freq.']
nullCountsDF = nullCountsDF.merge(pd.DataFrame([[colm.name, colm.dataType] for colm in college.schema], columns=['Column', 'Type']),
                                how='inner', on=['Column'])
nullCountsDF['Important'] = [c in importantCols for c in nullCountsDF['Column']]
nullCountsDF = nullCountsDF[nullCountsDF['Rel. Freq.'] > 0.0]

# talk
display(nullCountsDF)

# handle nulls - drop columns with too many
tooMany = nullCountsDF.loc[nullCountsDF['Rel. Freq.'] > 0.5, 'Column'].values
print('Dropping %d columns with more than 50%% null values'%len(tooMany))
college = college.drop(*tooMany)
# handle nulls - fill remaining numeric with the median from the column
nulls = []
for row in nullCountsDF.itertuples(index=False):
    if row.Type in [IntegerType(), FloatType()]:
        print('Processing %s'%row.Column)
        # compute the median
        med = college.approxQuantile(row.Column, (0.5,), relativeError=0.01)[0]
        print('\tfilling nulls with %0.2f'%med)
        # remember where nulls were filled, then fill
        nulls.append(college.select(isNull(row.Column)))
        college = college.fillna(value=med, subset=row.Column)
# what about the string columns?


# talk some more
print('%d records'%college.count())
display(college.limit(10).toPandas())

In [None]:
''' see some value counts '''
strCols = [c for c in college.schema if c.dataType == StringType()]
for colm in strCols:
    print(colm)
    college.select(colm).groupBy(colm).count().show()

In [None]:
''' index the string columns '''
# remove obviously useless string columns
strCols.remove('INSTURL')
strCols.remove('NPCURL')

# do the indexing
indxr = StringIndexer(inputCols=strCols, outputCols=[c+'_int' for c in strCols])
college = indxr.fit(college).transform(college)
# talk
display(college.limit(10).toPandas())

In [None]:
''' prepare the features '''
# create the features vector - no need to scale
assr = VectorAssembler(inputCols=features, outputCol='features')
sales = assr.transform(sales)

# talk
display(sales.limit(10).toPandas())
sales.select('features').take(1)
print('First row features = %s'%sales.select('features').take(1)[0])

In [None]:
# check for multicollinearity
corr = Correlation.corr(sales, column='features', method='pearson')
corrdf = pd.DataFrame(index=features, data=corr.collect()[0][0].toArray(), columns=features)
display(corrdf)

## Modeling

In [None]:
# split for cross-val
trainPerc = 0.7
randSeed = 42
tran, test = sales.select('ORDERNUMBER', 'features').randomSplit([trainPerc, 1.0 - trainPerc], seed=randSeed)

# talk
print('Training Cases')
tran.select('ORDERNUMBER').show()
print('Testing Cases')
test.select('ORDERNUMBER').show()

In [None]:
''' evaluate different clustering cardinalities '''
# setup range that will be tried
kMax = 6 # make this 1 more than what you actually want
xs = list(range(2, kMax))
kCost = np.ones(kMax)*np.inf
kSil = np.ones(kMax)*-1

# iterate over k
models = [None]*kMax
for k in range(2, kMax):
    print('Trying k = %d'%k)
    # fit the model on the training set
    GMM = GaussianMixture(k=k, seed=randSeed, featuresCol='features')
    models[k] = GMM.fit(tran)
    # eval the model on the training set
    kCost[k] = models[k].summary.logLikelihood
    print('\tLog likelihood = %0.3f'%kCost[k])
    # eval the model on the testing set 
    testPred = models[k].transform(test)
    kSil[k] = ClusteringEvaluator().evaluate(testPred)
    print('\tSilhouette score = %0.3f'%kSil[k])
    
# show the scree plot
fig = plysub.make_subplots(rows=2, cols=1, print_grid=False, subplot_titles=('Train Log Likelihood vs. k', 'Test Silhouette vs. k'))
fig.add_trace(go.Scatter(x=xs, y=kCost[2:], mode='markers+lines'), 1, 1)
fig.add_trace(go.Scatter(x=xs, y=kSil[2:], mode='markers+lines'), 2, 1)
fig['layout']['title'] = '|GMM Results'
plyoff.plot(fig)

# find the min
bestK = np.argmax(kCost)
print('Best model has %d clusters, with a cost of %0.3f'%(bestK, kCost[bestK]))

In [None]:
''' Evaluate best model on test set '''
# get the best
bestK = int(input('Enter the "best" k'))
bestModel = models[bestK]
cst = kCost[bestK]
print('Best GMM model has %d clusters, with a log likelihood of %0.3f'%(bestK, cst))

# predict
testPred = bestModel.transform(test)
# eval
evalSil = ClusteringEvaluator()
silhouette = evalSil.evaluate(testPred)
print('Silhouette score for GMM model with %d cluster = [-1, %0.3f, 1]'%(bestK, silhouette))
# get the centers
cents = pd.DataFrame(index=list(range(bestK)), data=[bestModel.gaussians[k].mean.toArray() for k in list(range(bestK))], columns=features)
display(cents)

In [None]:
''' predict topics on input data '''
# predict and parse the topic probabilities
preds = bestModel.transform(sales).select('ORDERNUMBER', *features, 'prediction', vector_to_array('probability').alias('mixtures')).toPandas()
# label the mixture probabilities and drop the vector column
preds = preds.drop(columns=['mixtures'], inplace=False).join(pd.DataFrame(data=preds['mixtures'].tolist(), columns=['Mixture %d'%k for k in range(bestK)]))
# get the most likely topic
preds['Predicted Mixture'] = preds[['Mixture %d'%k for k in range(bestK)]].idxmax(axis=1)
preds['Predicted Mixture Prob.'] = preds[['Mixture %d'%k for k in range(bestK)]].max(axis=1)
# talk
display(preds.head())

In [None]:
# characterize the clusters (would want to really use a classification model for this...)
aggs = dict.fromkeys(features, ['min', 'mean', 'max'])
aggs['prediction'] = 'count'
pgb = preds.groupby(by=['prediction']).agg(aggs)
display(pgb.head())

In [None]:
sc.stop()