# Badanie metod skalowania

Skalowanie funkcji przystosowania pozwala uniknąć niekorzystnych zjawisk występujących w algorytmach genetycznych.
W tym zeszycie postaram się zbadać wpływ różnych metod skalowania na wartośći funkcji ewaluacyjnej

In [2]:
import numpy as np
import pandas as pd
from typing import Tuple, List

## Skalowania

Implementacja wykorzystywanych metod skalowania

### Skalowanie liniowe

In [13]:
def calculate_linear_scaling_parameters(data: pd.Series, mulitiplication_parameter: float) -> Tuple[float, float]:
    eps = 0.00000000000001
    smallest_evaluation = data.min()
    biggest_evaluation = data.max()
    mean = data.mean()
    
    is_standard = smallest_evaluation > (mulitiplication_parameter * mean - biggest_evaluation) / (mulitiplication_parameter - 1.0)
    if is_standard:
        divisor = biggest_evaluation - mean + eps
        a = ((mulitiplication_parameter - 1.0) * mean) / divisor
        b = mean * (biggest_evaluation - mulitiplication_parameter * mean) / divisor
        return a, b
    else:
        divisor = mean - smallest_evaluation + eps
        a = mean / divisor
        b = -smallest_evaluation * mean / divisor
        return a, b

In [14]:
def linear_scaling(data: pd.Series, mulitiplication_parameter: float) -> pd.Series:
    a, b = calculate_linear_scaling_parameters(data, mulitiplication_parameter)
    return data.apply(lambda x: a*x + b)

In [16]:
test_data = pd.DataFrame({'eval': [17.2, 30.1, 9.7, 11.9, 21.2, 15.2]})
linear_scaling(test_data['eval'], 1.5)

0    17.305279
1    26.325000
2    12.061255
3    13.599502
4    20.102092
5    15.906873
Name: eval, dtype: float64

### Skalowanie  σ-odcięcia

In [19]:
def sigma_cut_scaling(data: pd.Series, c_parameter: int = 1) -> pd.Series:
    tmp = data - (data.mean() - c_parameter * data.std())
    return tmp.apply(lambda x: x if x >= 0 else 0)

In [20]:
test_data = pd.DataFrame({'eval': [17.2, 30.1, 9.7, 11.9, 21.2, 15.2]})
sigma_cut_scaling(test_data['eval'])

0     6.998673
1    19.898673
2     0.000000
3     1.698673
4    10.998673
5     4.998673
Name: eval, dtype: float64

### Skalowanie logarytmiczne

In [41]:
def logarithmic_scaling(data: pd.Series, b: float = 1) -> pd.Series:
    min_b = np.log10(data).max()
    assert(b >= min_b)
    return b - np.log10(data)

In [42]:
test_data = pd.DataFrame({'eval': [17.2, 30.1, 9.7, 11.9, 21.2, 15.2]})
logarithmic_scaling(test_data['eval'], 1.5)

0    0.264472
1    0.021434
2    0.513228
3    0.424453
4    0.173664
5    0.318156
Name: eval, dtype: float64

## Funkcja De Jonga (paraboloida)

In [69]:
f = lambda x, y, z: x**2 + y**2 + z**2
domain_from = -5.12
domain_to = 5.12

evaluate = lambda x: 80 - x

In [60]:
columns = ['x', 'y', 'z']
small_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (20, 3)), columns=columns)
big_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (100, 3)), columns=columns)

In [61]:
small_sampe.columns

Index(['x', 'y', 'z', 'f(x, y, z)'], dtype='object')

In [73]:
np.random.seed(27)
small_sample['f(x, y, z)'] = small_sample.apply(lambda r: f(r.x, r.y, r.z), axis=1)
small_sample['eval'] = small_sample['f(x, y, z)'].apply(evaluate)

In [86]:
small_sample

Unnamed: 0,x,y,z,"f(x, y, z)",eval
0,-0.760613,3.221338,2.410468,16.765904,63.234096
1,3.768353,-1.194181,4.909636,39.731075,40.268925
2,4.02631,-2.972517,2.476315,31.179165,48.820835
3,1.670588,3.960847,3.66605,31.919095,48.080905
4,2.552445,3.790282,-3.20762,31.17004,48.82996
5,-1.786197,-1.301121,3.007621,13.9292,66.0708
6,-3.573143,-3.379787,-4.288828,42.584358,37.415642
7,-1.995005,2.900888,-3.451841,24.310396,55.689604
8,-4.396633,2.058969,-3.266766,34.241496,45.758504
9,1.012913,-0.8677,0.13898,1.798211,78.201789


## Siodło Rosenbrocka

In [14]:
f = lambda x, y: 100 * (x**2 - y)**2 + (1 - x)**2
domain_from = -2.048
domain_to = 2.048

f(1, 1)

0

In [16]:
evaluate = lambda x: 3910 - x

In [None]:
np.random.seed(291)
columns = ['x', 'y']
small_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (20, 2)), columns=columns)
big_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (100, 2)), columns=columns)