# Mathematics

## Vector

In [3]:
#representation of simple vector
x = [1, 2, 3, 4, 5]
#display the vector
x

[1, 2, 3, 4, 5]

In [4]:
#import numpy
import numpy as np
#assigning values to array x
x = np.array([1, 2, 3, 4, 5])
#view array x
print(x)
#view the type of array x as non dimensional array (ndarray)
print(type(x))

[1 2 3 4 5]
<class 'numpy.ndarray'>


## Matrix

In [6]:
#assigning values to matrix m
m = np.array([[1, 5, 2],
              [4, 7, 4],
              [2, 0, 9]])
#view matrix m
print(m)

[[1 5 2]
 [4 7 4]
 [2 0 9]]


In [7]:
#view dimensions of the matrix m
print(m.shape)

(3, 3)


In [10]:
#view matrix transpose (\n is for changing the line)
print('Matrix Transpose:\n', m.transpose())

Matrix Transpose:
 [[1 4 2]
 [5 7 0]
 [2 4 9]]


In [11]:
#view matrix determinant calculation
print ('Matrix Determinant:', np.linalg.det(m))

Matrix Determinant: -105.00000000000006


In [12]:
#view matrix inverse
m_inv = np.linalg.inv(m)
print ('Matrix inverse:\n', m_inv)

Matrix inverse:
 [[-0.6         0.42857143 -0.05714286]
 [ 0.26666667 -0.04761905 -0.03809524]
 [ 0.13333333 -0.0952381   0.12380952]]


In [13]:
#identity matrix (result of matrix x matrix_inverse)
iden_m =  np.dot(m, m_inv)
iden_m = np.round(np.abs(iden_m), 0)

#view product of matrix and its inverse
print ('Product of matrix and its inverse:\n', iden_m)

Product of matrix and its inverse:
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


## Eigen Decomposition

In [15]:
#eigen decomposition of the matix
m = np.array([[1, 5, 2],
              [4, 7, 4],
              [2, 0, 9]])

#break down the matrix into eigen values and eigen vectors
eigen_vals, eigen_vecs = np.linalg.eig(m)

#view eigen values
print('Eigen Values:', eigen_vals, '\n')
#view Eigen vectors
print('Eigen Vectors:\n', eigen_vecs)

Eigen Values: [-1.32455532 11.32455532  7.        ] 

Eigen Vectors:
 [[-0.91761521  0.46120352 -0.46829291]
 [ 0.35550789  0.79362022 -0.74926865]
 [ 0.17775394  0.39681011  0.46829291]]


## Singular Value Decomposition

In [21]:
#SVD
m = np.array([[1, 5, 2],
              [4, 7, 4],
              [2, 0, 9]])

#main components of decomposition equation are U,S,VT
U, S, VT = np.linalg.svd(m)

#view message "Getting SVD outputs"
print ('Getting SVD outputs:-')
#view the value of U
print('U:\n', U,'\n')
#view the value of S
print('S:\n', S,'\n')
#view the value of VT
print('VT:\n', VT,'\n')

Getting SVD outputs:-
U:
 [[ 0.3831556  -0.39279153  0.83600634]
 [ 0.68811254 -0.48239977 -0.54202545]
 [ 0.61619228  0.78294653  0.0854506 ]] 

S:
 [12.10668383  6.91783499  1.25370079] 

VT:
 [[ 0.36079164  0.55610321  0.74871798]
 [-0.10935467 -0.7720271   0.62611158]
 [-0.92621323  0.30777163  0.21772844]] 



# Statistics

## Descriptive Statistics

In [30]:
#import scipy
import scipy as sp
from scipy import stats 

#get random integers in data
nums = np.random.randint(1,20, size=(1,15))[0]
#view the data
print('Data: ', nums)

Data:  [14  4 11  2 10 12 12  8  4 15 17  2 11 18 11]


In [31]:
#get descriptive stats
print ('Mean:', sp.mean(nums))
#view median value of the data
print ('Median:', sp.median(nums))
#view mode value of the data
print ('Mode:', sp.stats.mode(nums))
#view standard deviation value of the data
print ('Standard Deviation:', sp.std(nums))
#view variance value of the data
print ('Variance:', sp.var(nums))
#view skew value of the data
print ('Skew:', sp.stats.skew(nums))
#view Kurtosis value of the data
print ('Kurtosis:', sp.stats.kurtosis(nums))

Mean: 10.066666666666666
Median: 11.0
Mode: ModeResult(mode=array([11]), count=array([3]))
Standard Deviation: 4.959390643572611
Variance: 24.59555555555556
Skew: -0.2594934293583228
Kurtosis: -0.9799945508849008


# Data Retrieval

In [1]:
import pandas as pd
# turn of warning messages
pd.options.mode.chained_assignment = None  # default='warn'

# get data from student_records.csv file
df = pd.read_csv('student_records.csv')
df

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Henry,A,Y,90,85,Yes
1,John,C,N,85,51,Yes
2,David,F,N,10,17,No
3,Holmes,B,Y,75,71,No
4,Marvin,E,N,20,30,No
5,Simon,A,Y,92,79,Yes
6,Robert,B,Y,60,59,No
7,Trent,C,Y,75,33,No


# Data Preparation

## Feature Extraction and Engineering

In [2]:
#get features and corresponding outcomes
feature_names = ['OverallGrade', 'Obedient', 'ResearchScore', 'ProjectScore']
training_features = df[feature_names]

outcome_name = ['Recommend']
outcome_labels = df[outcome_name]

In [3]:
#view features
training_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,90,85
1,C,N,85,51
2,F,N,10,17
3,B,Y,75,71
4,E,N,20,30
5,A,Y,92,79
6,B,Y,60,59
7,C,Y,75,33


In [4]:
#view outcome labels
outcome_labels

Unnamed: 0,Recommend
0,Yes
1,Yes
2,No
3,No
4,No
5,Yes
6,No
7,No


In [5]:
#list down features based on type
numeric_feature_names = ['ResearchScore', 'ProjectScore']
categoricial_feature_names = ['OverallGrade', 'Obedient']

In [6]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()

#fit scaler on numeric features
ss.fit(training_features[numeric_feature_names])

#scale numeric features now
training_features[numeric_feature_names] = ss.transform(training_features[numeric_feature_names])

#view updated featureset
training_features

  return self.partial_fit(X, y)
  


Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,0.899583,1.37665
1,C,N,0.730648,-0.091777
2,F,N,-1.80339,-1.560203
3,B,Y,0.392776,0.772004
4,E,N,-1.465519,-0.998746
5,A,Y,0.967158,1.117516
6,B,Y,-0.114032,0.253735
7,C,Y,0.392776,-0.869179


In [7]:
training_features = pd.get_dummies(training_features, columns=categoricial_feature_names)
#view newly engineering features
training_features

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_B,OverallGrade_C,OverallGrade_E,OverallGrade_F,Obedient_N,Obedient_Y
0,0.899583,1.37665,1,0,0,0,0,0,1
1,0.730648,-0.091777,0,0,1,0,0,1,0
2,-1.80339,-1.560203,0,0,0,0,1,1,0
3,0.392776,0.772004,0,1,0,0,0,0,1
4,-1.465519,-0.998746,0,0,0,1,0,1,0
5,0.967158,1.117516,1,0,0,0,0,0,1
6,-0.114032,0.253735,0,1,0,0,0,0,1
7,0.392776,-0.869179,0,0,1,0,0,0,1


# Modeling

In [8]:
from sklearn.linear_model import LogisticRegression
import numpy as np

#fit the model
lr = LogisticRegression() 
model = lr.fit(training_features, np.array(outcome_labels['Recommend']))
#view model parameters
model



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

## Model Evaluation

In [9]:
#simple evaluation on training data
pred_labels = model.predict(training_features)
actual_labels = np.array(outcome_labels['Recommend'])

#evaluate model performance
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

print('Accuracy:', float(accuracy_score(actual_labels, pred_labels))*100, '%')
print('Classification Stats:')
print(classification_report(actual_labels, pred_labels))

Accuracy: 100.0 %
Classification Stats:
              precision    recall  f1-score   support

          No       1.00      1.00      1.00         5
         Yes       1.00      1.00      1.00         3

   micro avg       1.00      1.00      1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



## Model Deployment

In [10]:
from sklearn.externals import joblib
import os
#save models to be deployed on your server
if not os.path.exists('Model'):
    os.mkdir('Model')
if not os.path.exists('Scaler'):
    os.mkdir('Scaler') 
    
joblib.dump(model, r'Model/model.pickle') 
joblib.dump(ss, r'Scaler/scaler.pickle') 

['Scaler/scaler.pickle']

# Prediction in Action

In [11]:
#load model and scaler objects
model = joblib.load(r'Model/model.pickle')
scaler = joblib.load(r'Scaler/scaler.pickle')

In [12]:
#data retrieval
new_data = pd.DataFrame([{'Name': 'Nathan', 'OverallGrade': 'F', 'Obedient': 'N', 'ResearchScore': 30, 'ProjectScore': 20},
                  {'Name': 'Thomas', 'OverallGrade': 'A', 'Obedient': 'Y', 'ResearchScore': 78, 'ProjectScore': 80}])
new_data = new_data[['Name', 'OverallGrade', 'Obedient', 'ResearchScore', 'ProjectScore']]
new_data

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore
0,Nathan,F,N,30,20
1,Thomas,A,Y,78,80


In [19]:
#data preparation
prediction_features = new_data[feature_names]

#scaling
prediction_features[numeric_feature_names] = scaler.transform(prediction_features[numeric_feature_names])

#engineering categorical variables
prediction_features = pd.get_dummies(prediction_features, columns=categoricial_feature_names)

#view feature set
prediction_features

  """


Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_F,Obedient_N,Obedient_Y
0,-1.127647,-1.430636,0,1,1,0
1,0.494137,1.160705,1,0,0,1
