In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
data = load_iris()
X = data.data[:10]
scaler = StandardScaler()
scaler.fit(X)
print("Mean :", scaler.mean_)
print("Variance :", scaler.var_)
X_std = scaler.transform(X)

Mean : [4.86 3.31 1.45 0.22]
Variance : [0.0764 0.0849 0.0105 0.0056]


# Problem 1

In the previous assignment, I have used a lot of Python class in all the library. In <code>panda</code>, <code>Series</code> and <code>Dataframe</code> are two of the classes used to create panda objects. In <code>matplotlib</code>, there is class named <code>BoundaryNorm</code> in <code>colors</code> module which is generate a colormap index based on discrete intervals. In <code>scikit-learn</code>, <code>model_selection</code> module has a class named <code>KFold</code> to create a K-Folds cross-validator.

# Problem 2

In my solution to all the before assignments, I have used a lot of built-in class of several modules and libraries. Below are few of methods and variables I have used so far:
 - In a <code>Dataframe</code> object:
     - <code>head()</code>: view n-first row
     - <code>drop()</code>: drop specified labels from rows or columns
     - <code>corr()</code>: compute pairwise correlation of columns, excluding null values.
     - <code>count()</code>: count non-null cells for each column or row.
     - <code>describe()</code>: generate descriptive statistics.
 - In a <code>Series</code> object, there are some instance variables:
     - <code>index</code>: the index (axis labels) of the <code>Series</code>.
     - <code>dtype</code>: Return the dtype object of the underlying data.
     - <code>shape</code>: Return a tuple of the shape of the underlying data.
     - <code>name</code>: Return the name of the </code>Series</code>.
     - <code>size</code>: Return the number of elements in the underlying data.

In [2]:
X_std = scaler.transform(X)

# Problem 3

In [3]:
class ScratchStandardScaler():
    """
    標準化のためのクラス
    Attributes
    ----------
    mean_ : 次の形のndarray, shape(n_features,)
        平均
    var_ : 次の形のndarray, shape(n_features,)
        分散
    """
    def fit(self, X):
        """
        標準化のために平均と標準偏差を計算する。
        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            訓練データ
        """
        self.mean_ = np.mean(X, axis=0, dtype=np.float32)
        self.var_ = np.var(X,axis=0,dtype=np.float32)
    def transform(self, X):
        """
        fitで求めた値を使い標準化を行う。
        Parameters
        ----------
        X : 次の形のndarray, shape (n_samples, n_features)
            特徴量
        Returns
        ----------
        X_scaled : 次の形のndarray, shape (n_samples, n_features)
            標準化された特緒量
        """
        X_scaled = (X - self.mean_) / np.sqrt(self.var_)
        return X_scaled

In [4]:
# Driver code
import numpy as np
from sklearn.datasets import load_iris
data = load_iris()
X = data.data[:10]
scratch_scaler = ScratchStandardScaler()
scratch_scaler.fit(X)
print("Mean : {}".format(scratch_scaler.mean_))
print("Variance : {}".format(scratch_scaler.var_))
X_std = scratch_scaler.transform(X)
print(X_std)

Mean : [4.86      3.31      1.4499999 0.22     ]
Variance : [0.0764     0.08490001 0.0105     0.0056    ]
[[ 0.86828909  0.65207849 -0.48794936 -0.26726123]
 [ 0.14471445 -1.06391702 -0.48794936 -0.26726123]
 [-0.5788602  -0.37751882 -1.46384947 -0.26726123]
 [-0.94064752 -0.72071792  0.48795075 -0.26726123]
 [ 0.50650177  0.99527759 -0.48794936 -0.26726123]
 [ 1.95365106  2.02487489  2.43975097  2.40535121]
 [-0.94064752  0.30887939 -0.48794936  1.06904499]
 [ 0.50650177  0.30887939  0.48795075 -0.26726123]
 [-1.66422217 -1.40711612 -0.48794936 -0.26726123]
 [ 0.14471445 -0.72071792  0.48795075 -1.60356745]]


# Problem 4

In [15]:
class ExampleClass():
    """
    説明用の簡単なクラス
    Parameters
    ----------
    value : float or int
        初期値
    Attributes
    ----------
    value : float or int
        計算結果
    """
    def __init__(self, value):
        if isinstance(value,int) or isinstance(value,float):
            self.value = value
            print("Value has been set: {}".format(self.value))
        else:
            raise ValueError('Input should be a numeric value !')
    def add(self, value2):
        """
        受け取った引数をself.valueに加える
        """
        self.value += value2
    
    def sub(self, value2):
        """
        Subtract a value from self.value
        """
        self.value -= value2
        
    def mul(self, value2):
        """
        Multiply self.value with value2
        """
        self.value *= value2
        
    def div(self, value2):
        """
        Divide self.value with value2
        """
        self.value /= value2
example = ExampleClass(1.233)
print("value : {}".format(example.value))
example.add(3)
print("value after adding 3: {}".format(example.value))
example.sub(4)
print("value after subtracting 4: {}".format(example.value))
example.mul(2)
print("value after multiplying 2: {}".format(example.value))
example.div(5)
print("value after dividing 5: {}".format(example.value))

Value has been set: 1.233
value : 1.233
value after adding 3: 4.2330000000000005
value after subtracting 4: 0.23300000000000054
value after multiplying 2: 0.4660000000000011
value after dividing 5: 0.09320000000000021


In [16]:
example1 = ExampleClass('s')

ValueError: Input should be a numeric value !