In [1]:
# imports
import os
import logging
import getpass
import pyodbc
import numpy as np
import pandas as pd
import sklearn
import pickle
import joblib
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns



In [2]:
logging.info('numpy:' + np.__version__)
logging.info('pandas:' + pd.__version__)
logging.info('sklearn:' + sklearn.__version__)
logging.info('matplotlib:' + mpl.__version__)
logging.info('seaborn:' + sns.__version__)


 # Setup

In [3]:
from datetime import date
today = str(date.today())


In [4]:
# ## Set up logging
DirLogs = '..\\logs\\'
os.makedirs(DirLogs, exist_ok=True)
_ = [logging.root.removeHandler(handler) for handler in logging.root.handlers[:]]
#del _
logging.basicConfig(filename='../logs.//clf_impute_' + today + '.log', 
                    filemode='w', 
                    format='%(asctime)s.%(msecs)03d %(levelname)8s %(message)s', 
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.DEBUG)
                    

 ## Set I/O Folders

In [5]:
#InDir = '.\\inputs\\'
#os.makedirs(InDir, exist_ok=True)

OutDir = '..\\src\\'
os.makedirs(OutDir, exist_ok=True)



 ## Load Data

In [6]:
# server = 'localhost\sqlexpress' # for a named instance
# server = 'myserver,port' # to specify an alternate port
server = 'db7.erpint.pmi.org'
database = 'AnalyticDataStore'

# login = input('Login type (''Windows'', ''Password''):')
login='Windows'

if login=='Windows':
    cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=' +
                        server+';DATABASE='+database+';Trusted_Connection=yes;')
else:
    username = input('Username:')
    password = getpass.getpass('Password:')
    cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=' +
                          server+';DATABASE='+database+';UID='+username+';PWD=' + password)
#cursor = cnxn.cursor()

logging.info('Start loading UDS')

selectData = '''
SELECT
	 uds.PMI_ID
	,PersonId
	,CASE
		WHEN PersonID IS NULL
		THEN uds.PMI_ID
		ELSE STR(PersonID)
		END as UniqueID
	,CASE 
		WHEN TRY_CAST(uds.YearsInPM AS INT) IS NULL
		THEN
			CASE 
				WHEN uds.YearsInPM = '0-1' THEN 1
				WHEN uds.YearsInPM = '2-4' THEN 2
				WHEN uds.YearsInPM = '5-9' THEN 3
				WHEN uds.YearsInPM = '10-19' THEN 4
				WHEN uds.YearsInPM = '20 or more' THEN 5
				WHEN uds.YearsInPm = 'N/A' THEN NULL
				ELSE NULL
				END
		ELSE
			CASE 
				WHEN uds.YearsInPM = 0 THEN 1
				WHEN uds.YearsInPM = 1 THEN 1
				WHEN uds.YearsInPM between 2 and 4 THEN 2
				WHEN uds.YearsInPM between 5 and 9 THEN 3
				WHEN uds.YearsInPM between 10 and 19 THEN 4
				WHEN uds.YearsInPM > =  20 THEN 5
				ELSE NULL
				END 
		END AS YearsInPM
	,YearsFullTimeExperience
	,Age
	,RegisteredUserCreateDate
	,ViewOfProjectManagement
	,CASE
		WHEN uds.JobTitle = 'Analyst' THEN 1
		WHEN uds.JobTitle = 'Consultant' THEN 2
		WHEN uds.JobTitle = 'Executive' THEN 3
		WHEN uds.JobTitle = 'Management' THEN 4
		WHEN uds.JobTitle = 'Other' THEN 5
		WHEN uds.JobTitle = 'Program' THEN 6
		WHEN uds.JobTitle = 'Project Management' THEN 7
		WHEN uds.JobTitle = 'Senior Project Management' THEN 8
		ELSE NULL
		END AS JobTitle
	,OnLeadershipTeamPMO
	,PMOLeadershipInfluence
	,AssignedSegmentName
	,seg.Segment_Number
FROM [AnalyticDataStore].[dbo].[DM_UniversalDataSet] uds
LEFT JOIN (
	SELECT
		PMI_ID
		,Segment_Number
		,Segment_Assignment_Effective_Date
	FROM (
		SELECT
			PMI_ID
			,Segment_Number
			,Segment_Assignment_Effective_Date
			,ROW_NUMBER() OVER(PARTITION BY PMI_ID ORDER BY Segment_Assignment_Effective_Date DESC) as rn
		FROM [AnalyticDataStore].[dbo].[Segment_Assignment]
		) t
	WHERE rn=1
      ) seg
ON uds.PMI_ID = seg.PMI_ID
'''

try:
    uds = pd.read_sql(selectData, 
                      cnxn,
                      parse_dates=['RegisteredUserCreateDate']
                     )
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occured. See the log for details.')

del selectData

# startTime= dtm.now()
# timeElapsed=dtm.now()-startTime
# print('(hh:mm:ss.ms): {}'.format(timeElapsed))
# del timeElapsed, startTime
uds = uds.drop_duplicates(['UniqueID']).reset_index(drop=True)
logging.info('Data loaded successfully.  Dimensions:' + str(uds.shape))


In [7]:
try:
    uds.to_pickle(OutDir + 'CustomerLifecycleFramework_' + today + '.pkl')
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occured while pickling UDS. See the log for details.')
 

In [8]:
# uds = pd.read_pickle(OutDir + 'CustomerLifecycleFramework_' + today + '.pkl')


In [9]:
"""
# dict key:labels for reference
pmyears_dict = {
#     nan:'missing',
    1:'0-1',
    2:'2-4',
    3:'5-9',
    4:'10-19',
    5:'20 or more',
}
jobtitle_dict = {
#     nan:'missing',
    1:'Analyst',
    2:'Consultant',
    3:'Executive',
    4:'Management',
    5:'Other',
    6:'Program',
    7:'Project Management',
    8:'Senior Project Management'
}
pmview_dict = {
#     nan:'missing',
    1:'Profession',
    2:'Skill',
    3:'Role',
    4:'Other'
}
pmoteam_dict = {
#     nan:'missing',
    1:'No',
    2:'Yes'
}
pmoinfluence_dict = {
#     nan:'missing',
    1:'Primary Decisionmaker',
    2:'Involved NonPrimary',
    3:'Somewhat Involved',
    4:'Not Very Invovled',
    5:'Not Involved'
}
"""


"\n# dict key:labels for reference\npmyears_dict = {\n#     nan:'missing',\n    1:'0-1',\n    2:'2-4',\n    3:'5-9',\n    4:'10-19',\n    5:'20 or more',\n}\njobtitle_dict = {\n#     nan:'missing',\n    1:'Analyst',\n    2:'Consultant',\n    3:'Executive',\n    4:'Management',\n    5:'Other',\n    6:'Program',\n    7:'Project Management',\n    8:'Senior Project Management'\n}\npmview_dict = {\n#     nan:'missing',\n    1:'Profession',\n    2:'Skill',\n    3:'Role',\n    4:'Other'\n}\npmoteam_dict = {\n#     nan:'missing',\n    1:'No',\n    2:'Yes'\n}\npmoinfluence_dict = {\n#     nan:'missing',\n    1:'Primary Decisionmaker',\n    2:'Involved NonPrimary',\n    3:'Somewhat Involved',\n    4:'Not Very Invovled',\n    5:'Not Involved'\n}\n"

 # Diagnostics

In [10]:
# quick diagnostic - how much of each variable can we ultimately have filled?
try:
    total = uds.shape[0]
    fte = uds[(uds['YearsFullTimeExperience'].notna()) | (uds['Age'].notna())].shape[0]
    pme = uds[(uds['YearsInPM'].notna())].shape[0]
    title = uds[(uds['JobTitle'].notna())].shape[0]
    pmview = uds[(uds['ViewOfProjectManagement'].notna())].shape[0]
    onPMO = uds[(uds['OnLeadershipTeamPMO'].notna())].shape[0]
    PMOinfl = uds[(uds['PMOLeadershipInfluence'].notna())].shape[0]

    logging.info('Total records: ' + str(total))
    logging.info('With FullTimeExperience | Age: ' + str(fte) + ', ' + str(round(fte/total,4)*100) + '%')
    logging.info('With PMExperience: ' + str(pme) + ', ' + str(round(pme/total,4)*100) + '%')
    logging.info('With JobTitle: ' + str(title) + ', ' + str(round(title/total,4)*100) + '%')
    logging.info('With PMView: ' + str(pmview) + ', ' + str(round(pmview/total,4)*100) + '%')
    logging.info('With onPMO: ' + str(onPMO) + ', ' + str(round(onPMO/total,4)*100) + '%')
    logging.info('With PMOinfl: ' + str(PMOinfl) + ', ' + str(round(PMOinfl/total,4)*100) + '%')

except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred in diagnostic calculations.  See log for details.')


del total, fte, pme, title, pmview, onPMO, PMOinfl



In [11]:
def pctmissing(values):
    import pandas as pd
    return values.isna().sum() / values.size


In [12]:
df.groupby(['ModelType']).agg(
    {'YearsInPM':pctmissing
     ,'YearsFullTimeExperience':pctmissing
     ,'Age':pctmissing
     ,'RegisteredUserCreateDate':pctmissing
     ,'ViewOfProjectManagement':pctmissing
     ,'JobTitle':pctmissing
    }
)


NameError: name 'df' is not defined

In [13]:
df.groupby(['CustomerType']).agg(
    {'YearsInPM':pctmissing
     ,'YearsFullTimeExperience':pctmissing
     ,'Age':pctmissing
     ,'RegisteredUserCreateDate':pctmissing
     ,'ViewOfProjectManagement':pctmissing
     ,'JobTitle':pctmissing
    }
)


NameError: name 'df' is not defined

In [14]:
df['RegisteredYear'] = pd.to_datetime(df['RegisteredUserCreateDate']).dt.year

df.groupby(['CustomerType','RegisteredYear']).agg(
    {'YearsInPM':pctmissing
     ,'YearsFullTimeExperience':pctmissing
     ,'Age':pctmissing
     ,'ViewOfProjectManagement':pctmissing
     ,'JobTitle':pctmissing
    }
)



NameError: name 'df' is not defined

In [15]:
try:
    tmp = uds.copy()[[
        'YearsFullTimeExperience',
        'Age',
        'YearsInPM',
        'JobTitle',
        'ViewOfProjectManagement'
        ]]

    corr = tmp.corr()
    logging.info(str(corr))

    del tmp, corr

except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred in correlation diagnostic.  See log for details.')

# ax = sns.heatmap(
#     corr, 
#     vmin=-1, vmax=1, center=0,
#     cmap=sns.diverging_palette(20, 220, n=200),
#     square=True
# )
# ax.set_xticklabels(
#     ax.get_xticklabels(),
#     rotation=45,
#     horizontalalignment='right'
# );


 # Create Models for Imputation

In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate, cross_val_predict
from sklearn.metrics import r2_score, mean_squared_error, median_absolute_error, max_error

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate, cross_val_predict
from sklearn.metrics import multilabel_confusion_matrix, classification_report


 ## YearsFullTimeExperience

In [17]:
# imputation flag
# try:
#     # base case - no/impute needed 
#     uds['FTE_impute'] = -1 
#     uds.loc[(uds['YearsFullTimeExperience'].notna()), 'FTE_impute'] = 0

#     # flag from worst to best model
#     uds.loc[(uds['YearsFullTimeExperience'].isna()) 
#             & (uds['YearsInPM'].notna()), 'FTE_impute'] = 4
#     uds.loc[(uds['YearsFullTimeExperience'].isna()) 
#             & (uds['Age'].notna()), 'FTE_impute'] = 3
#     uds.loc[(uds['YearsFullTimeExperience'].isna()) 
#             & (uds['Age'].notna()) 
#             & (uds['YearsInPM'].notna()), 'FTE_impute'] = 2
#     uds.loc[(uds['YearsFullTimeExperience'].isna()) 
#             & (uds['Age'].notna()) 
#             & (uds['JobTitle'].notna()), 'FTE_impute'] = 1

# except Exception as e:
#     logging.exception(e)
#     raise SystemExit('Error occurred re: FTE Impute Flag.  See log for details.')


In [18]:
# FTE Imputation Models
try:
    tmp = uds[['YearsFullTimeExperience','Age','JobTitle','YearsInPM']]

    # train FTE models
    # work experience based on age & jobtitle
    train = tmp[tmp['YearsFullTimeExperience'].notna() 
                & tmp['Age'].notna() 
                & tmp['JobTitle'].notna()]

    X = pd.get_dummies(train[['Age','JobTitle']], columns=['JobTitle']).to_numpy()
    y = train['YearsFullTimeExperience'].to_numpy().reshape(-1,1)
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred in FTE training set creation.  See log for details.')

try:
    FTE1 = LinearRegression(n_jobs = -1)
    FTE1.fit(X, y)

    logging.info('FTE1: YearsFTE based on Age & JobTitle.  Metrics:')
    logging.info('CV R2:' + str(cross_validate(FTE1, X, y, cv=5, n_jobs=-1)['test_score']))
    pred = cross_val_predict(FTE1, X, y, cv=5, n_jobs=-1)
    
    R21 = r2_score(y, pred)
    RMSE1 = np.sqrt(mean_squared_error(y, pred))
    MedAE1 = median_absolute_error(y, pred)
    MaxE1 = max_error(y, pred)
    
    logging.info('R2: ' + str(R21))
    logging.info('RMSE: ' + str(RMSE1))
    logging.info('MedianAE: ' + str(MedAE1))
    logging.info('MAX Error: ' + str(MaxE1))

except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred re: FTE Impute 1.  See log for details.')

try:
    # work experience based on age & pm experience
    train = tmp[tmp['YearsFullTimeExperience'].notna() 
                & tmp['Age'].notna() 
                & tmp['YearsInPM'].notna()]

    X = train[['Age','YearsInPM']].to_numpy()
    y = train['YearsFullTimeExperience'].to_numpy().reshape(-1,1)

    FTE2 = LinearRegression(n_jobs = -1)
    FTE2.fit(X, y)

    logging.info('FTE2: YearsFTE based on Age & YearsInPM.  Metrics:')
    logging.info('CV R2:' + str(cross_validate(FTE2, X, y, cv=5, n_jobs=-1)['test_score']))
    pred = cross_val_predict(FTE2, X, y, cv=5, n_jobs=-1)

    R22 = r2_score(y, pred)
    RMSE2 = np.sqrt(mean_squared_error(y, pred))
    MedAE2 = median_absolute_error(y, pred)
    MaxE2= max_error(y, pred)
    
    logging.info('R2: ' + str(R22))
    logging.info('RMSE: ' + str(RMSE2))
    logging.info('MedianAE: ' + str(MedAE2))
    logging.info('MAX Error: ' + str(MaxE2))

except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred re: FTE Impute 2.  See log for details.')

try:
    # work experience based on age
    train = tmp[tmp['YearsFullTimeExperience'].notna() & tmp['Age'].notna()]

    X = train['Age'].to_numpy().reshape(-1,1)
    y = train['YearsFullTimeExperience'].to_numpy().reshape(-1,1)

    FTE3 = LinearRegression(n_jobs = -1)
    FTE3.fit(X, y)

    logging.info('FTE3: YearsFTE based on Age.  Metrics:')
    logging.info('CV R2:' + str(cross_validate(FTE3, X, y, cv=5, n_jobs=-1)['test_score']))
    pred = cross_val_predict(FTE3, X, y, cv=5, n_jobs=-1)

    R23 = r2_score(y, pred)
    RMSE3 = np.sqrt(mean_squared_error(y, pred))
    MedAE3 = median_absolute_error(y, pred)
    MaxE3 = max_error(y, pred)
    
    logging.info('R2: ' + str(R23))
    logging.info('RMSE: ' + str(RMSE3))
    logging.info('MedianAE: ' + str(MedAE3))
    logging.info('MAX Error: ' + str(MaxE3))

except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred re: FTE Impute 3.  See log for details.')

try:
    # work experience based on pm experience
    train = tmp[tmp['YearsFullTimeExperience'].notna() & tmp['YearsInPM'].notna()]

    X = train['YearsInPM'].to_numpy().reshape(-1,1)
    y = train['YearsFullTimeExperience'].to_numpy().reshape(-1,1)

    FTE4 = LinearRegression(n_jobs = -1)
    FTE4.fit(X, y)

    logging.info('FTE4: YearsFTE based on YearsInPM.  Metrics:')
    logging.info('CV R2:' + str(cross_validate(FTE4, X, y, cv=5, n_jobs=-1)['test_score']))
    pred = cross_val_predict(FTE4, X, y, cv=5, n_jobs=-1)

    R24 = r2_score(y, pred)
    RMSE4 = np.sqrt(mean_squared_error(y, pred))
    MedAE4 = median_absolute_error(y, pred)
    MaxE4 = max_error(y, pred)
    
    logging.info('R2: ' + str(R24))
    logging.info('RMSE: ' + str(RMSE4))
    logging.info('MedianAE: ' + str(MedAE4))
    logging.info('MAX Error: ' + str(MaxE4))

except Exception as e:
    raise SystemExit('Error occurred re: FTE Impute 4.  See log for details.')

try:
    if RMSE1 > RMSE2:
        raise Exception('ERROR: FTE1 has worse RMSE performance than FTE2.  This breaks model prioritization assumptions!')    
    if RMSE1 > RMSE3:
        raise Exception('ERROR: FTE1 has worse RMSE performance than FTE3.  This breaks model prioritization assumptions!')
    if RMSE1 > RMSE4:
        raise Exception('ERROR: FTE1 has worse RMSE performance than FTE4.  This breaks model prioritization assumptions!')    
    if RMSE2 > RMSE3:
        raise Exception('ERROR: FTE2 has worse RMSE performance than FTE3.  This breaks model prioritization assumptions!')    
    if RMSE2 > RMSE4:
        raise Exception('ERROR: FTE2 has worse RMSE performance than FTE4.  This breaks model prioritization assumptions!')
    if RMSE3 > RMSE4:
        raise Exception('ERROR: FTE3 has worse RMSE performance than FTE4.  This breaks model prioritization assumptions!')

    del R21, R22, R23, R24
    del RMSE1, RMSE2, RMSE3, RMSE4
    del MedAE1, MedAE2, MedAE3, MedAE4
    del MaxE1, MaxE2, MaxE3, MaxE4
    del tmp, train, X, y, pred

except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred in FullTimeExperience checking.  See log for details.')

try:
    with open(OutDir + 'FTE1.joblib', 'wb') as file:
        pickle.dump(FTE1, file)
    with open(OutDir + 'FTE2.joblib', 'wb') as file:
        pickle.dump(FTE2, file)
    with open(OutDir + 'FTE3.joblib', 'wb') as file:
        pickle.dump(FTE3, file)
    with open(OutDir + 'FTE4.joblib', 'wb') as file:
        pickle.dump(FTE4, file)

    del FTE1, FTE2, FTE3, FTE4
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred saving FullTimeExperience models.  See log for details.')


 ## YearsInPM

In [19]:
# imputation flag

# base case - no/impute needed 
# try:
#     uds['PME_impute'] = -1
#     uds.loc[(uds['YearsInPM'].notna()), 'PME_impute'] = 0

#     uds.loc[(uds['YearsInPM'].isna()) 
#             & (uds['YearsFullTimeExperience'].notna()) 
#             & (uds['JobTitle'].notna()), 'PME_impute'] = 1
# except Exception as e:
#     logging.exception(e)
#     raise SystemExit('Error occurred re: PME Impute Flag.  See log for details.')


In [20]:
# YearsInPM thresholds @5, @10
try:
    tmp = uds.copy()[['YearsFullTimeExperience','JobTitle','YearsInPM']]
    tmp['PMExp_5'] = 0
    tmp.loc[(tmp['YearsInPM'] <= 3), 'PMExp_5'] = 1
    tmp['PMExp_10'] = 0
    tmp.loc[(tmp['YearsInPM'] >= 4), 'PMExp_10'] = 1
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred in PME training set creation.  See log for details.')

try:
    train = tmp[tmp['YearsInPM'].notna() 
                & tmp['YearsFullTimeExperience'].notna()
                & tmp['JobTitle'].notna()]

    X = pd.get_dummies(train[['YearsFullTimeExperience','JobTitle']], columns=['JobTitle']).to_numpy()
    y5 = train['PMExp_5'].to_numpy()#.reshape(-1,1)
    y10 = train['PMExp_10'].to_numpy()#.reshape(-1,1)

    lr5 = LogisticRegression(penalty='none', solver='lbfgs', multi_class='auto', class_weight='balanced', max_iter=1000, n_jobs = -1)
    lr5.fit(X, y5)

    logging.info('YearsInPM threshold < 5; >=5')
    logging.info('CV Accuracy:' + str(cross_validate(lr5, X, y5, cv=5, n_jobs=-1)['test_score']))
    pred5 = cross_val_predict(lr5, X, y5, cv=5, n_jobs=-1)
    #logging.info('confusion: ', multilabel_confusion_matrix(y5, pred5))
    logging.info('report: ' + str(classification_report(y5, pred5)))
    logging.info('')

    lr10 = LogisticRegression(penalty='none', solver='lbfgs', multi_class='auto', class_weight='balanced', max_iter=1000, n_jobs = -1)
    lr10.fit(X, y10)

    logging.info('YearsInPM threshold < 10; >=10')
    logging.info('CV Accuracy:' + str(cross_validate(lr10, X, y10, cv=5, n_jobs=-1)['test_score']))
    pred10 = cross_val_predict(lr10, X, y10, cv=5, n_jobs=-1)
    #logging.info('confusion: ', multilabel_confusion_matrix(y10, pred10))
    logging.info('report: ' + str(classification_report(y10, pred10)))
except Exception as e:
    raise SystemExit('Error occurred re: PME Impute.  See log for details.')

try: 
    with open(OutDir + 'PME5.joblib', 'wb') as file:
        pickle.dump(lr5, file)
    with open(OutDir + 'PME10.joblib', 'wb') as file:
        pickle.dump(lr10, file)

    del tmp, train, X, y5, y10, pred5, pred10
    del lr5, lr10
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred saving YearsInPM models.  See log for details.')


 ## ViewOfProjectManagement

In [21]:
# imputation flag

# base case - no/impute needed 
# try:
#     uds['View_impute'] = -1
#     uds.loc[(uds['ViewOfProjectManagement'].notna()), 'View_impute'] = 0

#     uds.loc[(uds['ViewOfProjectManagement'].isna()) 
#             & (uds['YearsFullTimeExperience'].notna()) 
#             & (uds['JobTitle'].notna()), 'View_impute'] = 1
# except Exception as e:
#     logging.exception(e)
#     raise SystemExit('Error occurred re: View Impute Flag.  See log for details.')


In [22]:
try:
    tmp = uds.copy()[['YearsFullTimeExperience','JobTitle','ViewOfProjectManagement']]
    tmp['View'] = 0 # == ViewOfProjectManagement = 'Profession'
    tmp.loc[(tmp['ViewOfProjectManagement'] > 1),'View'] = 1 # == ViewOfProjectManagement = 'Skill','Role','Other'
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred in View training set creation.  See log for details.')

try:
    train = tmp[tmp['ViewOfProjectManagement'].notna() 
                & tmp['YearsFullTimeExperience'].notna()
                & tmp['JobTitle'].notna()]

    X = pd.get_dummies(train[['YearsFullTimeExperience','JobTitle']], columns=['JobTitle']).to_numpy()
    y = train['View'].to_numpy()#.reshape(-1,1)

    lr = LogisticRegression(penalty='none', solver='lbfgs', multi_class='auto', class_weight='balanced', max_iter=1000, n_jobs = -1)
    lr.fit(X, y)

    logging.info('CV Accuracy:' + str(cross_validate(lr, X, y, cv=5, n_jobs=-1)['test_score']))
    pred = cross_val_predict(lr, X, y, cv=5, n_jobs=-1)
    #logging.info('confusion: ', multilabel_confusion_matrix(y, pred))
    logging.info('report: ' + str(classification_report(y, pred)))
except Exception as e:
    raise SystemExit('Error occurred re: PME Impute.  See log for details.')

try:   
    with open(OutDir + 'View.joblib', 'wb') as file:
        pickle.dump(lr, file)

    del tmp, train, X, y, pred
    del lr
except Exception as e:
    logging.exception(e)
    raise SystemExit('Error occurred saving ViewOfPM models.  See log for details.')


In [23]:
logging.shutdown()
