## 1.Data Generation

In [1]:
import numpy as np
np.set_printoptions(precision=2)

# set data params
n_student, n_class = 500, 5
means = np.random.randint(40,60, (n_class, ))
stds = np.random.uniform(5, 10, (n_class, ))

# generate data
scores = np.random.normal(loc = means,
                    scale = stds,
                    size = (n_student, n_class))


# print the setted scores' params
print(f'mean setting: {means}')
print(f'std setting: {stds}\n')

# print the calculated scores' params
print(f'means of data: {np.mean(scores, axis=0)}')
print(f'stds of data: {np.std(scores, axis=0)}')

mean setting: [56 58 50 49 58]
std setting: [5.16 6.97 6.51 7.18 9.6 ]

means of data: [56.04 58.51 50.11 49.35 58.06]
stds of data: [5.03 6.66 6.64 7.05 9.83]


In [2]:
def get_scores():
    n_student, n_class = 500, 5
    means = np.random.randint(40,60, (n_class, ))
    stds = np.random.uniform(5, 10, (n_class, ))

    scores = np.random.normal(loc = means,
                        scale = stds,
                        size = (n_student, n_class))
    return scores

## 2.Min-Max Normalization

In [3]:
import numpy as np
from termcolor import colored
np.set_printoptions(precision=2)

# get scores
scores = get_scores()
print(f'scores.shape: {scores.shape}\n')

# get&print Min, Max before m-M normalization
m = np.min(scores, axis=0)
M = np.max(scores, axis=0)
print(colored('Before m-M normalization', 'green'))
print(f'Min: {m}, shape: {m.shape}')
print(f'Max : {M}, shape: {M.shape}\n')

# Min-Max Normalization
nm_normal = (scores - m)/(M - m)

# get&print Min, Max after m-M normalization
m = np.min(nm_normal, axis=0)
M = np.max(nm_normal, axis=0)
print(colored('After m-M normalization', 'green'))
print(f'Min: {m}')
print(f'Max: {M}')

scores.shape: (500, 5)

[32mBefore m-M normalization[0m
Min: [24.31 36.35 27.18 19.48 28.77], shape: (5,)
Max : [68.86 85.52 68.78 59.57 79.35], shape: (5,)

[32mAfter m-M normalization[0m
Min: [0. 0. 0. 0. 0.]
Max: [1. 1. 1. 1. 1.]


## 3.Standard Deviation

In [4]:
import numpy as np
from termcolor import colored
np.set_printoptions(precision=2)

# get scores
scores = get_scores()
print(f'scores.shape: {scores.shape}\n')

# calculate&print the vars, stds with element-wise operations
vars = np.mean(scores**2, axis=0) - np.mean(scores, axis=0)**2
stds = vars**0.5
print(colored('with element-wise operations', 'green'))
print(f'vars: {vars}')
print(f'stds: {stds}\n')

# calculate&print the vars, stds with APIs
vars = np.var(scores, axis=0)
stds = np.std(scores, axis=0)
print(colored('with APIs', 'green'))
print(f'vars: {vars}')
print(f'stds: {stds}')

scores.shape: (500, 5)

[32mwith element-wise operations[0m
vars: [41.23 46.07 76.75 37.17 99.2 ]
stds: [6.42 6.79 8.76 6.1  9.96]

[32mwith APIs[0m
vars: [41.23 46.07 76.75 37.17 99.2 ]
stds: [6.42 6.79 8.76 6.1  9.96]


## 4.Standardization

In [5]:
import numpy as np
from termcolor import colored
np.set_printoptions(precision=2)

# get scores
scores = get_scores()
print(f'scores.shape: {scores.shape}\n')

# get&print means, stds before Standardization
means = np.mean(scores, axis=0)
stds = np.std(scores, axis=0)
print(colored('Before Standardization', 'green'))
print(f'means: {means}, shape: {means.shape}')
print(f'stds: {stds}, shape: {stds.shape}\n')

# Standardization
standard = (scores - means)/stds

# get&print means, stds after Standardization
means = np.mean(standard, axis=0)
stds = np.std(standard, axis=0)
print(colored('After Standardization', 'green'))
print(f'means: {means.round(2)}')
print(f'stds: {stds}')

scores.shape: (500, 5)

[32mBefore Standardization[0m
means: [44.67 51.18 43.13 50.41 52.5 ], shape: (5,)
stds: [9.31 6.89 7.79 7.87 8.29], shape: (5,)

[32mAfter Standardization[0m
means: [ 0. -0. -0.  0. -0.]
stds: [1. 1. 1. 1. 1.]


## 5.Scores to Grades

In [6]:
import numpy as np
from termcolor import colored

# generate data
n_student = 10
scores = np.random.randint(0, 100, (n_student))
print(f'scores: {scores}, shape: {scores.shape}')

# set cutoffs
cutoffs = np.array([20, 40, 60, 80])
print(f'cutoffs: {cutoffs}, shape: {cutoffs.shape}\n')

# Scores => Grades(0~20:5, 20~40:4, 40~60:3, 60~80:2, 80~100:1)
scores, cutoffs = scores.reshape((-1, 1)), cutoffs.reshape((1, -1))
print(colored('Reshaped data', 'green'))
print(f'scores: {scores}, shape: {scores.shape}')
print(f'cutoffs: {cutoffs}, shape: {cutoffs.shape}\n')

grades = 5 - (scores > cutoffs).astype(np.int).sum(axis=1) # 등급을 5 - (1의 갯수)만큼 줌
print(f'grades:\n{grades}, shape: {grades.shape}')

scores: [48 84 92 28  2 14 21 17  3 20], shape: (10,)
cutoffs: [20 40 60 80], shape: (4,)

[32mReshaped data[0m
scores: [[48]
 [84]
 [92]
 [28]
 [ 2]
 [14]
 [21]
 [17]
 [ 3]
 [20]], shape: (10, 1)
cutoffs: [[20 40 60 80]], shape: (1, 4)

grades:
[3 1 1 4 5 5 4 5 5 5], shape: (10,)
