In [1]:
# binarization
from sklearn.preprocessing import Binarizer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from pandas import read_csv
from numpy import set_printoptions

In [2]:
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values

In [3]:
print(dataframe)
print(dataframe.groupby('class').size())

     preg  plas  pres  skin  test  mass   pedi  age  class
0       6   148    72    35     0  33.6  0.627   50      1
1       1    85    66    29     0  26.6  0.351   31      0
2       8   183    64     0     0  23.3  0.672   32      1
3       1    89    66    23    94  28.1  0.167   21      0
4       0   137    40    35   168  43.1  2.288   33      1
..    ...   ...   ...   ...   ...   ...    ...  ...    ...
763    10   101    76    48   180  32.9  0.171   63      0
764     2   122    70    27     0  36.8  0.340   27      0
765     5   121    72    23   112  26.2  0.245   30      0
766     1   126    60     0     0  30.1  0.349   47      1
767     1    93    70    31     0  30.4  0.315   23      0

[768 rows x 9 columns]
class
0    500
1    268
dtype: int64


In [4]:
print(dataframe[dataframe.age > 40].groupby('class').size())
print(dataframe[dataframe.preg > 3].groupby('class').size())
print(dataframe[(dataframe.preg > 3) & (dataframe.age < 30)].groupby('class').size())

class
0     92
1    102
dtype: int64
class
0    189
1    155
dtype: int64
class
0    48
1    18
dtype: int64


In [5]:
# separate array into input and output components
X = array[:,0:8]
Y = array[:,8]
binarizer = Binarizer(threshold=50.0).fit(X)
binaryX = binarizer.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(binaryX[0:5,:])

[[0. 1. 1. 0. 0. 0. 0. 0.]
 [0. 1. 1. 0. 0. 0. 0. 0.]
 [0. 1. 1. 0. 0. 0. 0. 0.]
 [0. 1. 1. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 1. 0. 0. 0.]]


In [6]:
X = array[:,0:8]
Y = array[:,8]
scaler = Normalizer().fit(X)
normalizedX = scaler.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(normalizedX[0:5,:])

[[0.034 0.828 0.403 0.196 0.    0.188 0.004 0.28 ]
 [0.008 0.716 0.556 0.244 0.    0.224 0.003 0.261]
 [0.04  0.924 0.323 0.    0.    0.118 0.003 0.162]
 [0.007 0.588 0.436 0.152 0.622 0.186 0.001 0.139]
 [0.    0.596 0.174 0.152 0.731 0.188 0.01  0.144]]


In [7]:
X = array[:,0:8]
Y = array[:,8]
scaler = MinMaxScaler(feature_range=(10, 20))
rescaledX = scaler.fit_transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(rescaledX[0:5,:])

[[13.529 17.437 15.902 13.535 10.    15.007 12.344 14.833]
 [10.588 14.271 15.41  12.929 10.    13.964 11.166 11.667]
 [14.706 19.196 15.246 10.    10.    13.472 12.536 11.833]
 [10.588 14.472 15.41  12.323 11.111 14.188 10.38  10.   ]
 [10.    16.884 13.279 13.535 11.986 16.423 19.436 12.   ]]


In [8]:
X = array[:,0:8]
Y = array[:,8]
scaler = StandardScaler().fit(X)
rescaledX = scaler.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(rescaledX[30:50,:])

[[ 0.343 -0.372  0.305  0.343 -0.693  0.509  0.224  2.277]
 [-0.251  1.161  0.356  0.97   1.434 -0.05   1.145 -0.446]
 [-0.251 -1.03  -0.574 -0.598 -0.224 -0.913 -0.619 -0.956]
 [ 0.64  -0.904  1.184 -1.288 -0.693 -1.535 -0.857 -0.446]
 [ 1.828  0.035  0.46   0.656 -0.693 -0.558  0.121  1.001]
 [ 0.046 -0.56  -0.471  0.782  0.974 -1.014  1.492 -0.02 ]
 [ 2.125  0.535  0.356 -1.288 -0.693  0.153 -0.157  0.15 ]
 [ 1.531 -0.591  0.356  1.033 -0.693  0.115  0.583  1.086]
 [-0.548 -0.967 -0.057  1.346 -0.693  0.788  0.094 -0.531]
 [ 0.046 -0.31   0.15   1.66   1.104  0.648  2.773  1.937]
 [-0.251  1.85  -0.264  0.28  -0.085  0.255 -0.607 -0.616]
 [ 0.937  0.379  0.77  -1.288 -0.693  1.042  0.677  0.32 ]
 [ 0.937 -0.466  1.184 -0.159 -0.693 -1.179 -0.715  1.256]
 [ 1.531  1.568  2.114  0.217  1.391  1.702  0.752  1.766]
 [ 0.937  1.193 -0.264 -1.288 -0.693 -0.583 -0.537  0.575]
 [-1.142  1.85  -0.161  1.158 -0.693  1.27   4.292 -0.701]
 [-0.845  0.786 -0.678 -1.288 -0.693 -0.291  0.278 -0.36

In [11]:
dataframe.skew()

preg     0.901674
plas     0.173754
pres    -1.843608
skin     0.109372
test     2.272251
mass    -0.428982
pedi     1.919911
age      1.129597
class    0.635017
dtype: float64

In [12]:
dataframe.kurt()

preg     0.159220
plas     0.640780
pres     5.180157
skin    -0.520072
test     7.214260
mass     3.290443
pedi     5.594954
age      0.643159
class   -1.600930
dtype: float64

In [51]:
X = array[:,0:8]
Y = array[:,8]
scaler = MinMaxScaler(feature_range=(30, 50))
rescaledX = scaler.fit_transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(rescaledX[0:5,:])

[[37.059 44.874 41.803 37.071 30.    40.015 34.688 39.667]
 [31.176 38.543 40.82  35.859 30.    37.928 32.331 33.333]
 [39.412 48.392 40.492 30.    30.    36.945 35.073 33.667]
 [31.176 38.945 40.82  34.646 32.222 38.376 30.76  30.   ]
 [30.    43.769 36.557 37.071 33.972 42.846 48.873 34.   ]]


In [10]:
import pandas as pd
import numpy as np

dataMatrix = [(65,75,74,73,95,76,62,100),(101,102,103,107,157,160,191,192)];
dataFrame = pd.DataFrame(data=dataMatrix);
kurt = dataFrame.kurt(axis=1);
print("Data:");
print(dataFrame);
print("Kurtosis:");
print(kurt);
dataMatrix = [(70,90,90,100,120,120,100,121,125,115,112),

                        (58.22,39.33,-30.44,36.77,20.80,-73.95,-39.99,91.03,-138.01,-20,None)];

               

dataFrame = pd.DataFrame(data=dataMatrix);

kurt = dataFrame.kurt(axis=1);

print("Data:");

print(dataFrame);

print("Kurtosis:");

print(kurt)

Data:
     0    1    2    3    4    5    6    7
0   65   75   74   73   95   76   62  100
1  101  102  103  107  157  160  191  192
Kurtosis:
0   -0.246357
1   -2.044655
dtype: float64
Data:
       0      1      2       3      4       5       6       7       8    9  \
0  70.00  90.00  90.00  100.00  120.0  120.00  100.00  121.00  125.00  115   
1  58.22  39.33 -30.44   36.77   20.8  -73.95  -39.99   91.03 -138.01  -20   

      10  
0  112.0  
1    NaN  
Kurtosis:
0    0.057451
1    0.067184
dtype: float64
