In [44]:
import pandas as pd

In [45]:
raisin = pd.read_csv('Raisin_Dataset.csv', header=0)

In [46]:
def computeMean(array):
    numRows = len(array)
    numCols = len(array[0])
    
    colSums = [0] * numCols
    
    for row in array:
        for i in range(numCols):
            colSums[i] += row[i]
    
    means = [sum / numRows for sum in colSums]
    
    return means

array = [[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]

datasetMean = computeMean(array)
print("Mean values:", datasetMean)

Mean values: [4.0, 5.0, 6.0]


In [47]:
def computeSampleCovariance(x, y):
    xLength = len(x)
    
    xMean = sum(x) / xLength
    yMean = sum(y) / xLength
    
    covariance = 0
    for i in range(xLength):
        covariance += (x[i] - xMean) * (y[i] - yMean)
    
    covariance /= (xLength - 1)
    
    return covariance

x = [1, 2, 3, 4, 5]
y = [5, 4, 3, 2, 1]

covariance = computeSampleCovariance(x, y)
print("Sample covariance:", covariance)

Sample covariance: -2.5


In [48]:
def computeCorrelation(x, y):
    xLength = len(x)
    
    xMean = sum(x) / xLength
    yMean = sum(y) / xLength
    
    covariance = 0
    for i in range(xLength):
        covariance += (x[i] - xMean) * (y[i] - yMean)
    covariance /= (xLength - 1)
    
    xStdDev = (sum((xi - xMean) ** 2 for xi in x) / (xLength - 1)) ** 0.5
    yStdDev = (sum((yi - yMean) ** 2 for yi in y) / (xLength - 1)) ** 0.5
    
    correlation = covariance / (xStdDev * yStdDev)
    
    return correlation

x = [1, 2, 3, 4, 5]
y = [5, 4, 3, 2, 1]

correlation = computeCorrelation(x, y)
print("Correlation:", correlation)

Correlation: -0.9999999999999998


In [49]:
def rangeNormalize(data):
    minValues = [min(col) for col in zip(*data)]
    maxValues = [max(col) for col in zip(*data)]
    
    normalizedData = []
    for row in data:
        normalizedRow = []
        for val, minVal, maxVal in zip(row, minValues, maxValues):
            normalizedVal = (val - minVal) / (maxVal - minVal)
            normalizedRow.append(normalizedVal)
        normalizedData.append(normalizedRow)
    
    return normalizedData

data = [[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]

normalizedData = rangeNormalize(data)
print("Normalized data:")
for row in normalizedData:
    print(row)

Normalized data:
[0.0, 0.0, 0.0]
[0.5, 0.5, 0.5]
[1.0, 1.0, 1.0]


In [50]:
def standardNormalize(data):
    numRows = len(data)
    numCols = len(data[0])
    
    means = [sum(col) / numRows for col in zip(*data)]
    stdDevs = [(sum((val - means[i]) ** 2 for val in col) / numRows) ** 0.5 for i, col in enumerate(zip(*data))]
    
    normalizedData = []
    for row in data:
        normalizedRow = []
        for val, mean, stdDev in zip(row, means, stdDevs):
            normalizedVal = (val - mean) / stdDev if stdDev != 0 else val
            normalizedRow.append(normalizedVal)
        normalizedData.append(normalizedRow)
    
    return normalizedData

data = [[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]

normalizedData = standardNormalize(data)
print("Normalized data:")
for row in normalizedData:
    print(row)


Normalized data:
[-1.2247448713915892, -1.2247448713915892, -1.2247448713915892]
[0.0, 0.0, 0.0]
[1.2247448713915892, 1.2247448713915892, 1.2247448713915892]


In [51]:
def computeCovarianceMatrix(array):
    numRows = len(array)
    numCols = len(array[0])
    
    means = [sum(col) / numRows for col in zip(*data)]
    
    covarianceMatrix = [[0] * numCols for _ in range(numCols)]
    
    for i in range(numCols):
        for j in range(numCols):
            covariance = 0
            for k in range(numRows):
                covariance += (array[k][i] - means[i]) * (array[k][j] - means[j])
            covariance /= (numRows - 1)
            covarianceMatrix[i][j] = covariance
    
    return covarianceMatrix

array = [[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]

covarianceMatrix = computeCovarianceMatrix(array)
print("Covariance matrix:")
for row in covarianceMatrix:
    print(row)

Covariance matrix:
[9.0, 9.0, 9.0]
[9.0, 9.0, 9.0]
[9.0, 9.0, 9.0]


In [65]:
def labelEncode(array):
    uniqueCategories = set()
    
    # create set of unique values present in a column
    for item in array:
        uniqueCategories.add(item)
    
    # enumerate values in the set, assigning each value to an integer equivalent
    categoryToLabel = {category: i for i, category in enumerate(uniqueCategories)}
    
    # replace each column value in the dataset with its integer equivalent
    print(len(array))
    for i in range(len(array)):
        array[i] = categoryToLabel[item]

In [66]:
labelEncode(raisin['Class'])
raisin

900


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a cop

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  array[i] = categoryToLabel[item]
A value is trying to be set on a cop

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
0,87524,442.246011,253.291155,0.819738,90546,0.758651,1184.040,0
1,75166,406.690687,243.032436,0.801805,78789,0.684130,1121.786,0
2,90856,442.267048,266.328318,0.798354,93717,0.637613,1208.575,0
3,45928,286.540559,208.760042,0.684989,47336,0.699599,844.162,0
4,79408,352.190770,290.827533,0.564011,81463,0.792772,1073.251,0
...,...,...,...,...,...,...,...,...
895,83248,430.077308,247.838695,0.817263,85839,0.668793,1129.072,0
896,87350,440.735698,259.293149,0.808629,90899,0.636476,1214.252,0
897,99657,431.706981,298.837323,0.721684,106264,0.741099,1292.828,0
898,93523,476.344094,254.176054,0.845739,97653,0.658798,1258.548,0
