# SAP Standard Example

(tweaked for MTA Projects  & HDI Containers)


In [3]:
## Import and Setup for using HDI Schema
from hana_ml import dataframe
from hana_ml.algorithms import classification
dbSchema = 'HANA_ML_DEMO_HDI_DB_1'
host =   'hxehost'
port =   '39015'
user =   'MLUSER'
password = 'Password1'

#define DataFrames
connection_context = dataframe.ConnectionContext(host, int(port), user, password)
df = connection_context.table('examples.DATA_TBL_FIT', schema=dbSchema)  #'PAL_LOGISTICR_DATA_TBL'

#Train
lr = classification.LogisticRegression(connection_context, solver='newton',
                                        thread_ratio=0.1, max_iter=1000,
                                        categorical_variable=['V3'], pmml_export=1,
                                        stat_inf=True, tol=0.000001)
lr.fit(df, ['V1', 'V2', 'V3'], 'CATEGORY', False)

#Predict
pred_df = connection_context.table('examples.DATA_TBL_PREDICT', schema=dbSchema)  #'PAL_FLOGISTICR_PREDICTDATA_TBL'
result = lr.predict(pred_df, ['V1', 'V2', 'V3'])
result.collect()

Unnamed: 0,ID,CLASS,SCORE,COMPLEMENT_SCORE,LOG_SCORE,LOG_COMPLEMENT_SCORE,DECISION_FUNC
0,0,1,0.9503656,0.049634,-0.050909,-3.003071,-2.952163
1,1,1,0.8485314,0.151469,-0.164248,-1.887377,-1.723129
2,2,1,0.9555893,0.044411,-0.045427,-3.114276,-3.068849
3,3,0,0.03702131,0.962979,-3.296262,-0.037724,3.258538
4,4,0,0.02229288,0.977707,-3.803488,-0.02254513,3.780943
5,5,0,0.2504115,0.749589,-1.38465,-0.2882309,1.096419
6,6,0,0.04946187,0.950538,-3.006553,-0.05072701,2.955826
7,7,0,0.009922804,0.990077,-4.61292,-0.009972363,4.602947
8,8,0,0.2853014,0.714699,-1.254209,-0.3358944,0.918315
9,9,0,0.2689367,0.731063,-1.313279,-0.3132552,1.000024


# SCIKIT Documentation Example

In [63]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
#print (X)  
#print (y)
clf = LogisticRegression(random_state=0, solver='lbfgs', max_iter=1000,
                          multi_class='multinomial').fit(X, y)
#print (X[:2, :])
clf.predict(X[:2, :])
clf.predict_proba(X[:2, :]) 
clf.score(X, y)

0.9733333333333334

# SCIKIT Example with Hana Dataset

In [100]:
## Import and Setup for using HDI Schema
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from hana_ml import dataframe

labelencoder = LabelEncoder()

dbSchema = 'HANA_ML_DEMO_HDI_DB_1'
host =   'hxehost'
port =   '39015'
user =   'MLUSER'
password = 'Password1'


#define DataFrames
connection_context = dataframe.ConnectionContext(host, int(port), user, password)
df = connection_context.table('examples.DATA_TBL_FIT', schema=dbSchema)  #'PAL_LOGISTICR_DATA_TBL'

#Prepare Features Columns as numpy ndarray and Depedant Variable Column as 
dfX = df[['V1', 'V2', 'V3']].collect().values
#print (dfX)
dfy = df[['CATEGORY']].collect().values.ravel()
#print (dfy)

# Encoding V1 ALPHA 
dfX[:, 0] = labelencoder.fit_transform(dfX[:, 0])
#print (dfX[:5,:])

#train
clf = LogisticRegression(random_state=0, solver='newton-cg', max_iter=1000,
                          multi_class='multinomial').fit(dfX, dfy)


### Prepare Prediction dataset
pred_df = connection_context.table('examples.DATA_TBL_PREDICT', schema=dbSchema)  #'PAL_FLOGISTICR_PREDICTDATA_TBL'
pred_dfX = pred_df[['V1', 'V2', 'V3']].collect().values
# Encoding V1 ALPHA 
pred_dfXoriginal =  pred_dfX.copy()
pred_dfX[:, 0] = labelencoder.fit_transform(pred_dfX[:, 0])
#print (pred_dfX[:5,:])


##Predict
predictedCategory      =  clf.predict(pred_dfX)
predictedCategoryProba =  clf.predict_proba(pred_dfX) 

##Show output
resultDF         = pd.DataFrame(pred_dfXoriginal)
resultDF.columns = resultDF.columns+1
resultDF.index   = resultDF.index + 1
resultDF         = resultDF.rename(columns = {1:'V1', 2:'V2', 3:'V3'})
resultDF['predictedCATEGORY'] = predictedCategory
resultDF['predictedProbaCAT0'] = predictedCategoryProba[:,0]
resultDF['predictedProbaCAT1'] = predictedCategoryProba[:,1]

resultDF 

#clf.score(dfX, dfy)

Unnamed: 0,V1,V2,V3,predictedCATEGORY,predictedProbaCAT0,predictedProbaCAT1
1,B,2.62,0,1,0.206817,0.793183
2,B,2.875,0,1,0.329367,0.670633
3,A,2.32,1,1,0.142869,0.857131
4,A,3.215,2,0,0.720803,0.279197
5,B,3.44,3,0,0.904241,0.095759
6,B,3.46,0,0,0.677329,0.322671
7,A,3.57,1,0,0.787868,0.212132
8,B,3.19,2,0,0.751515,0.248485
9,A,3.15,3,0,0.786651,0.213349
10,B,3.44,0,0,0.666382,0.333618
