In [1]:
#Rescaling
from typing import Tuple
from scratch.linear_algebra import vector_mean
from scratch.statistics import standard_deviation

def scale(data:List[vector]) -> Tuple[Vector,Vector]:
    """returns the mean and standad deviation for each position"""
    dim=len(data[0])
    means=vector_mean(data)
    stdevs=[standard_deviation([vector[i] for vector in data])
           for i in range(dim)]
    return means, stdevs

vectors=[[-3,-1,1],[-1,0,1],[1,1,1]]
means,stdevs=scale(vectors)
assert means==[-1,0,1]
assert stdevs==[2,1,0]

ModuleNotFoundError: No module named 'scratch'

In [2]:
# correction
import numpy as np

def scale(data): #data is the input variable that the function receives when called.
    """
    Returns column-wise (feature-wise) mean and standard deviation.
    Equivalent to 'vector_mean' and 'standard_deviation' from scratch.
    """
    data = np.array(data, dtype=float)  # convert list of lists to numpy array
                                        # dtype=float Ensures that every element inside becomes a float (decimal) number — even if it was an integer before.
    means = np.mean(data, axis=0)       # mean for each column
                                        #axis=0 → Column-wise operation
                                        #axis=1 → Row-wise operation
                                        
    stdevs = np.std(data, axis=0)       # std dev for each column
    return means, stdevs


# Example data
vectors = [[-3, -1, 1],
           [-1,  0, 1],
           [ 1,  1, 1]]

means, stdevs = scale(vectors)

print("Means:", means)
print("Standard Deviations:", stdevs)

Means: [-1.  0.  1.]
Standard Deviations: [1.63299316 0.81649658 0.        ]


In [3]:
# create new dataset using above
def rescale(data:List[Vector]) -> List[Vector]:
    dim2=len(data[0])
    means,stdevs=scale(data)

    rescaled=[v[:] for v in data]
    for v in rescaled:
        for i in range(dim):
            if stdevs[i]>0:
                v[i]=(v[i]-means[i])/stdevs[i]
    return rescaled

mean,stdevs=scale(rescale(vectors))
assert means==[0,0,1]
assert means==[1,1,0]

NameError: name 'List' is not defined

In [4]:
#correction 
# ✅ Step 1: Import the required modules
from typing import List  # for type hinting
Vector = list  # or define properly as Vector = List[float]

# ✅ Step 2: Define your scale function (used later)
def scale(data: List[Vector]):
                                #In Python, you can hint the expected types of variables and function parameters using type annotations.
    # Suppose this returns mean and standard deviation for each column
    dim = len(data[0])
    means = [sum([v[i] for v in data]) / len(data) for i in range(dim)]
    stdevs = [
        (sum([(v[i] - means[i]) ** 2 for v in data]) / len(data)) ** 0.5
        for i in range(dim)
    ]
    return means, stdevs


# ✅ Step 3: Define rescale function
def rescale(data: List[Vector]) -> List[Vector]:
    dim = len(data[0])  # number of columns (features)
    means, stdevs = scale(data)

    # Make a copy so we don’t modify original data
    rescaled = [v[:] for v in data]

    for v in rescaled:
        for i in range(dim):
            if stdevs[i] > 0:  # avoid division by zero
                v[i] = (v[i] - means[i]) / stdevs[i] # normailise formula

    return rescaled

#v[:] means a shallow copy of the list v (so we don’t modify the original data directly).
#[v[:] for v in data] means “make a new list of copies of each vector.”
#dim = number of features (columns).


# ✅ Step 4: Example data
vectors = [
    [1.0, 2.0, 3.0],
    [2.0, 3.0, 4.0],
    [3.0, 4.0, 5.0],
]

# ✅ Step 5: Apply rescale
rescaled_data = rescale(vectors)
print(rescaled_data)

# ✅ Step 6: Check mean and standard deviation of rescaled data
means, stdevs = scale(rescaled_data)
print("Means:", means)
print("Standard Deviations:", stdevs)

[[-1.224744871391589, -1.224744871391589, -1.224744871391589], [0.0, 0.0, 0.0], [1.224744871391589, 1.224744871391589, 1.224744871391589]]
Means: [0.0, 0.0, 0.0]
Standard Deviations: [1.0, 1.0, 1.0]


In [6]:
# An aside:tqdm (its a python libarary)
pip install tqdm

SyntaxError: invalid syntax (2219868393.py, line 2)

In [7]:
!pip install tqdm



In [8]:
import tqdm
for i in tqdm.tqdm(range(100)):
    _ = [random.random() for _ in range(1000000)]

  0%|                                                                                          | 0/100 [00:00<?, ?it/s]


NameError: name 'random' is not defined

In [10]:
# correction
import tqdm #import the tqdm module (which provides progress bars).
import random

for i in tqdm.tqdm(range(100)):
    _ = [random.random() for _ in range(1000000)]

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:08<00:00, 12.49it/s]


In [15]:
# set the description of progress bar while its running
from typing import List
def primes_up_to(n:int) -> List[int]:
    primes=[2]
    with tqdm.trange(3,n) as t:
        for i in t:
            i_is_prime=not any(i%p==0 for p in primes)
            if i_is_prime:
                primes.append(i)
            t.set_description(f"{len(primes)} primes")
    return primes

my_primes=primes_up_to(100_000)

429 primes:   3%|█▊                                                             | 2837/99997 [00:01<00:51, 1883.71it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

1229 primes:  10%|██████                                                        | 9811/99997 [00:04<00:45, 1966.34it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

1903 primes:  16%|█████████▉                                                   | 16325/99997 [00:08<00:42, 1948.81it/s]IOPub m