In [19]:
from daal.data_management import AOSNumericTable
from daal.data_management import SOANumericTable
from daal.data_management import BlockDescriptor_Intc
from daal.data_management import BlockDescriptor
from daal.data_management import BlockDescriptor_Float64
from daal.data_management import readOnly
from daal.data_management import readWrite
from daal.data_management import data_feature_utils
from daal.data_management import HomogenNumericTable
from daal.data_management import NumericTableIface
from daal.data_management import MergedNumericTable
from daal.data_management import FileDataSource
from daal.data_management import StringDataSource
from daal.data_management import DataSourceIface
from daal.data_management import packed_mask


from daal.algorithms import kmeans 
from daal.algorithms.kmeans import (
    Batch_Float64LloydDense, init, data as d, inputCentroids, assignments, centroids, goalFunction
)


from daal.algorithms.linear_regression import training
from daal.algorithms.linear_regression import prediction

import daal.algorithms.normalization.zscore as zscore

from daal import step1Local
from daal import step2Master


import numpy as np

import io

import math

from daal.algorithms import covariance 



In [48]:
def printNumericTable(data_table, message='', num_printed_rows=0, num_printed_cols=0,
                      interval=10):
    num_rows = data_table.getNumberOfRows()
    num_cols = data_table.getNumberOfColumns()
    layout = data_table.getDataLayout()

    if num_printed_rows != 0:
        num_printed_rows = min(num_rows, num_printed_rows)
    else:
        num_printed_rows = num_rows

    if num_printed_cols != 0:
        num_printed_cols = min(num_cols, num_printed_cols)
    else:
        num_printed_cols = num_cols

    block = BlockDescriptor()
    if isFull(layout) or layout == NumericTableIface.csrArray:
        data_table.getBlockOfRows(0, num_rows, readOnly, block)
        printArray(block.getArray(), num_printed_cols, num_printed_rows,
                   num_cols, message, interval)
        data_table.releaseBlockOfRows(block)
    else:
        packed_table = data_table.getBlockOfRowsAsDouble(0, num_rows)
        if isLower(layout):
            printLowerArray(packed_table, num_printed_rows, message, interval)
        elif isUpper(layout):
            printUpperArray(packed_table, num_printed_cols, num_printed_rows,
                            num_cols, message, interval)
        
def isFull(layout):
    layout_int = int(layout)
    if packed_mask & layout_int:
        return False
    return True


def printArray(array, num_printed_cols, num_printed_rows, num_cols, message,
               interval=10, flt64=True):
    print(message)
    flat_array = array.flatten()
    decimals = '3' if flt64 else '0'
    for i in range(num_printed_rows):
        for j in range(num_printed_cols):
            print("{:<{width}.{dec}f}".format(
                flat_array[i * num_cols + j], width=interval, dec=decimals), end=''
            )
        print()
    print()

    
def getNPArray(data_table):
    num_rows = data_table.getNumberOfRows()
    block = BlockDescriptor()
    data_table.getBlockOfRows(0, num_rows, readOnly, block)
    np_array = block.getArray()
    data_table.releaseBlockOfRows(block)
    return np_array


In [47]:
my_data = np.genfromtxt('./wine.csv', delimiter=',', dtype=None)

In [4]:
def toNPArray(dataset):
    converted_data = np.empty([0,len(dataset[0])])
    for line in dataset:
        line_array = np.asarray(list(line))
        converted_data = np.vstack([converted_data, line_array])

    return converted_data

converted_data = toNPArray(my_data)

print(converted_data[:,1])

[ 7.495   8.0393  7.6858  6.9845  6.7772  8.0757  6.5188  8.4937  7.388
  6.7127  7.3094  6.2518  7.7443  6.8398  6.2435  6.3459  7.5883  7.1934
  6.2049  6.6367  6.2941  7.292   7.1211  6.2587  7.186 ]


In [11]:
def getDependentVariable(array, column_number):
    converted_array = toNPArray(array)
    dependent_variable = converted_array[:,column_number]
    other_variables = np.delete(converted_data, np.s_[column_number:column_number+1:1], 1)
    data_str = ""
    for line in other_variables:
        for value in line:
            data_str = data_str+str(value)+"\t"
        data_str = data_str+"\n"
    
    dtypes = []
    for i in range(0, converted_array.shape[1]-1):
        dtypes.append(("f{}".format(i),"<f8"))
    
    other_variables_array = np.genfromtxt(io.BytesIO(data_str.encode()), dtype=np.dtype(dtypes))
    
    dependent_str = ""
    for line in dependent_variable:
        dependent_str = dependent_str + str(line) + "\n"
        
    
    
    dependent_variable_array = np.genfromtxt(io.BytesIO(dependent_str.encode()), dtype=[("y", "<f8")])
    
    
    return (other_variables_array,dependent_variable_array)



(variables,dependent) = getDependentVariable(my_data,1)

print(variables)
print(dependent)



[(1952.0, 600.0, 17.1167, 160.0, 31.0, 43183.569)
 (1953.0, 690.0, 16.7333, 80.0, 30.0, 43495.03)
 (1955.0, 502.0, 17.15, 130.0, 28.0, 44217.857)
 (1957.0, 420.0, 16.1333, 110.0, 26.0, 45152.252)
 (1958.0, 582.0, 16.4167, 187.0, 25.0, 45653.805)
 (1959.0, 485.0, 17.4833, 187.0, 24.0, 46128.638)
 (1960.0, 763.0, 16.4167, 290.0, 23.0, 46583.995)
 (1961.0, 830.0, 17.3333, 38.0, 22.0, 47128.005)
 (1962.0, 697.0, 16.3, 52.0, 21.0, 48088.673)
 (1963.0, 608.0, 15.7167, 155.0, 20.0, 48798.99)
 (1964.0, 402.0, 17.2667, 96.0, 19.0, 49356.943)
 (1965.0, 602.0, 15.3667, 267.0, 18.0, 49801.821)
 (1966.0, 819.0, 16.5333, 86.0, 17.0, 50254.966)
 (1967.0, 714.0, 16.2333, 118.0, 16.0, 50650.406)
 (1968.0, 610.0, 16.2, 292.0, 15.0, 51034.413)
 (1969.0, 575.0, 16.55, 244.0, 14.0, 51470.276)
 (1970.0, 622.0, 16.6667, 89.0, 13.0, 51918.389)
 (1971.0, 551.0, 16.7667, 112.0, 12.0, 52431.647)
 (1972.0, 536.0, 14.9833, 158.0, 11.0, 52894.183)
 (1973.0, 376.0, 17.0667, 123.0, 10.0, 53332.805)
 (1974.0, 574.0, 1

In [13]:
daal_variables = AOSNumericTable(variables)
daal_dependent = AOSNumericTable(dependent)
printNumericTable(daal_variables)
printNumericTable(daal_dependent)
#data_table = AOSNumericTable(my_data)


dataTable = MergedNumericTable()
dataTable.addNumericTable(daal_variables)
dataTable.addNumericTable(daal_dependent)

printNumericTable(dataTable)




1952.000  600.000   17.117    160.000   31.000    43183.569 
1953.000  690.000   16.733    80.000    30.000    43495.030 
1955.000  502.000   17.150    130.000   28.000    44217.857 
1957.000  420.000   16.133    110.000   26.000    45152.252 
1958.000  582.000   16.417    187.000   25.000    45653.805 
1959.000  485.000   17.483    187.000   24.000    46128.638 
1960.000  763.000   16.417    290.000   23.000    46583.995 
1961.000  830.000   17.333    38.000    22.000    47128.005 
1962.000  697.000   16.300    52.000    21.000    48088.673 
1963.000  608.000   15.717    155.000   20.000    48798.990 
1964.000  402.000   17.267    96.000    19.000    49356.943 
1965.000  602.000   15.367    267.000   18.000    49801.821 
1966.000  819.000   16.533    86.000    17.000    50254.966 
1967.000  714.000   16.233    118.000   16.000    50650.406 
1968.000  610.000   16.200    292.000   15.000    51034.413 
1969.000  575.000   16.550    244.000   14.000    51470.276 
1970.000  622.000   16.

In [8]:
data = np.array([(0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),                                                                                                                   
                 (1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2),                                                                                                                   
                 (2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3),                                                                                                                   
                 (3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4),                                                                                                                   
                 (4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5),                                                                                                                   
                 (5.0, 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 1),                                                                                                                   
                 (6.0, 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 2),                                                                                                                   
                 (7.0, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 3),                                                                                                                   
                 (8.0, 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 4),                                                                                                                   
                 (9.0, 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 5),])    
print(data)
dataTable = HomogenNumericTable(data)
printNumericTable(dataTable)

my_other_data = np.genfromtxt('./wine.csv', delimiter=',')
print(my_other_data)
variables = np.delete(my_other_data, np.s_[1:2:1], 1)
variables_table = HomogenNumericTable(variables)

dependent = [[item] for item in my_other_data[:,1]]
print(np.array(dependent))
dependent_variable_table = HomogenNumericTable(np.array(dependent))
printNumericTable(variables_table)


full_table = MergedNumericTable()
full_table.addNumericTable(variables_table)
full_table.addNumericTable(dependent_variable_table)

printNumericTable(full_table)


algorithm = training.Batch_Float64NormEqDense()
                                                                                                   
algorithm.input.set(training.data, variables_table)
algorithm.input.set(training.dependentVariables, dependent_variable_table)
                                     
trainingResult = algorithm.compute()
printNumericTable(trainingResult.get(training.model).getBeta(), 
                  "Linear Regression coefficients:")






[[ 0.   0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1. ]
 [ 1.   1.1  1.2  1.3  1.4  1.5  1.6  1.7  1.8  1.9  2. ]
 [ 2.   2.1  2.2  2.3  2.4  2.5  2.6  2.7  2.8  2.9  3. ]
 [ 3.   3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4. ]
 [ 4.   4.1  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5. ]
 [ 5.   5.1  5.2  5.3  5.4  5.5  5.6  5.7  5.8  5.9  1. ]
 [ 6.   6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9  2. ]
 [ 7.   7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  3. ]
 [ 8.   8.1  8.2  8.3  8.4  8.5  8.6  8.7  8.8  8.9  4. ]
 [ 9.   9.1  9.2  9.3  9.4  9.5  9.6  9.7  9.8  9.9  5. ]]

0.000     0.100     0.200     0.300     0.400     0.500     0.600     0.700     0.800     0.900     1.000     
1.000     1.100     1.200     1.300     1.400     1.500     1.600     1.700     1.800     1.900     2.000     
2.000     2.100     2.200     2.300     2.400     2.500     2.600     2.700     2.800     2.900     3.000     
3.000     3.100     3.200     3.300     3.400     3.500     3.600     3.700     3.800

In [49]:
n_clusters = 3
n_iterations = 10

clustering_data = np.genfromtxt('./iris.csv', delimiter=',')

variables = np.delete(clustering_data, np.s_[4::1], 1)

print(variables.shape)

data_table = HomogenNumericTable(variables)

initAlg = init.Batch_Float64RandomDense(n_clusters)

initAlg.input.set(d, data_table)

res = initAlg.compute()
init_centroids = res.get(centroids)
# Create an algorithm object for the K-Means algorithm

algorithm = Batch_Float64LloydDense(n_clusters, n_iterations)

algorithm.input.set(d,data_table)
algorithm.input.set(inputCentroids, init_centroids)

res = algorithm.compute()

print(getNPArray(res.get(kmeans.assignments)))

printNumericTable(res.get(kmeans.assignments), "Cluster assignments:")
printNumericTable(res.get(kmeans.centroids), "First 3 dimensions of centroids:", 20, 3)
printNumericTable(res.get(kmeans.goalFunction), "Goal function value:")


#print(variables)



(150, 4)
[[ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 1.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 2.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 0.]
 [ 0.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 2.]
 [ 0.]
 [ 0.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 2.]
 [ 0.]
 [ 2.]
 [ 2.]
 [ 2