In [None]:
''' Standardization is a rescaling technique that refers to centering the distribution of the data on
    the value 0 and the standard deviation to the value 1.
    
    Together, the mean and the standard deviation can be used to summarize a normal distribution,
    also called the Gaussian distribution or bell curve.
    
    It requires that the mean and standard deviation of the values for each column be known
    prior to scaling.
    
    mean=sum(values[i])/total(values)
    variance=sum(values[i]-mean)**2
    standard_deviation=sqrt(variance)
    Standardization_scale= (value[i]-mean)/standard_deviation
'''

In [37]:
# Building load csv file function
from csv import reader
def load_csv(filename):
    dataset=list()
    open_file=open(filename,'r')
    read_file=reader(open_file)
    for row in read_file:
        if not row:
            continue
        dataset.append(row)
    return dataset

In [38]:
# Building convert string value to float value
def convert_str_to_float(dataset,column):
    for row in dataset:
        row[column]=float(row[column])

In [39]:
# Building mean function 
def data_mean(dataset):
    # intializing all column values as 0
    mean=[0 for row in range(len(dataset[0]))]
    # column count
    for i in range(len(dataset[0])):
        column_values=[row[i] for row in dataset]
        mean[i]=sum(column_values)/float(len(column_values))
    return mean            

In [44]:
# Building Standard Deviation
from math import sqrt
def std_deviation(dataset,mean):
    std_dev=[0 for row in range(len(dataset[0]))]
    for i in range(len(dataset[0])):
        variance=[pow(row[i]-mean[i], 2) for row in dataset]
        std_dev[i]=sum(variance)
    std_dev=[sqrt(value/(float(len(dataset))-1)) for value in std_dev]
    return std_dev

In [45]:
# Standardization scale
def standardization_scale(dataset,mean,std_dev):
    for row in dataset:
        for i in range(len(row)):
            row[i]=(row[i]-mean[i])/std_dev[i]

In [46]:
# Load file
filename='pima-indians-diabetes.csv'
dataset=load_csv(filename)
print(dataset[0])
for column in range(len(dataset[0])):
    convert_str_to_float(dataset,column)
print(dataset[0])
mean=data_mean(dataset)
print("mean:",mean)
std_dev=std_deviation(dataset,mean)
print("standard deviation:",std_dev)
standardization_scale(dataset,mean,std_dev)
print(dataset[0])

['6', '148', '72', '35', '0', '33.6', '0.627', '50', '1']
[6.0, 148.0, 72.0, 35.0, 0.0, 33.6, 0.627, 50.0, 1.0]
('mean:', [3.8450520833333335, 120.89453125, 69.10546875, 20.536458333333332, 79.79947916666667, 31.992578124999977, 0.4718763020833327, 33.240885416666664, 0.3489583333333333])
('standard deviation:', [3.3695780626988623, 31.97261819513622, 19.355807170644777, 15.952217567727677, 115.24400235133837, 7.8841603203754405, 0.33132859501277484, 11.76023154067868, 0.4769513772427971])
[0.6395304921176576, 0.8477713205896718, 0.14954329852954296, 0.9066790623472505, -0.692439324724129, 0.2038799072674717, 0.468186870229798, 1.4250667195933604, 1.3650063669598067]
