In [1]:
import csv
import numpy as np

# Step 1: Load the CSV file
with open('data.csv', mode='r') as file:
    reader = csv.reader(file)
    data = list(reader)

# Convert strings to floats, and filter out empty strings or invalid values
def floatit(column_index, data):
    result = []
    for row in data:
        try:
            value = float(row[column_index])
            result.append(value)
        except ValueError:
            # Skip rows with invalid or missing values
            pass
    return result

column_1 = floatit(0, data)
column_2 = floatit(1, data)
column_3 = floatit(2, data)



# Step 2: Define functions for standard deviation

# Method 1: NumPy std
def numpy_std(data):
    return np.std(data)


# Method 2: Manual calculation
def std_loops(x):
    xsum = 0
    xsquares = 0
    a = 0
    for i in x:
        xsum +=i
        xsquares += i ** 2
        a += 1
    xmean = xsum / a
    xmeansquares = xsquares / a
    variance = xmeansquares - (xmean ** 2)
    deviation = variance ** 0.5
    return deviation


# Method 3: Custom calculation using built-in functions
def std_builtin(x):
    xsum = sum(i for i in x)
    xsquares = sum(i ** 2 for i in x)
    xmean = xsum / len(x)
    xmeansquares = xsquares / len(x)
    variance = xmeansquares - xmean ** 2
    deviation = variance ** 0.5
    return deviation

# Step 3: Time the execution
# Assuming you're using Jupyter Notebook
%timeit numpy_std(column_1)
%timeit std_loops(column_1)
%timeit std_builtin(column_1)
print("----")
%timeit numpy_std(column_2)
%timeit std_loops(column_2)
%timeit std_builtin(column_2)
print("----")
%timeit numpy_std(column_3)
%timeit std_loops(column_3)
%timeit std_builtin(column_3)


20.8 μs ± 222 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
40.4 μs ± 3.2 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
43.6 μs ± 1.32 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
----
58.4 μs ± 1.21 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
391 μs ± 16 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
536 μs ± 213 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
----
664 μs ± 287 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
4.32 ms ± 970 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.31 ms ± 406 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
