# Badanie metod skalowania

Skalowanie funkcji przystosowania pozwala uniknąć niekorzystnych zjawisk występujących w algorytmach genetycznych.
W tym zeszycie postaram się zbadać wpływ różnych metod skalowania na wartośći funkcji ewaluacyjnej

In [231]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from typing import Tuple, List

## Skalowania

Implementacja wykorzystywanych metod skalowania

### Skalowanie liniowe

In [13]:
def calculate_linear_scaling_parameters(data: pd.Series, mulitiplication_parameter: float) -> Tuple[float, float]:
    eps = 0.00000000000001
    smallest_evaluation = data.min()
    biggest_evaluation = data.max()
    mean = data.mean()
    
    is_standard = smallest_evaluation > (mulitiplication_parameter * mean - biggest_evaluation) / (mulitiplication_parameter - 1.0)
    if is_standard:
        divisor = biggest_evaluation - mean + eps
        a = ((mulitiplication_parameter - 1.0) * mean) / divisor
        b = mean * (biggest_evaluation - mulitiplication_parameter * mean) / divisor
        return a, b
    else:
        divisor = mean - smallest_evaluation + eps
        a = mean / divisor
        b = -smallest_evaluation * mean / divisor
        return a, b

In [61]:
def linear_scaling(data: pd.Series, mulitiplication_parameter: float) -> pd.Series:
    assert(1.2 <= multiplication_parameter <= 2)
    a, b = calculate_linear_scaling_parameters(data, mulitiplication_parameter)
    return data.apply(lambda x: a*x + b)

In [16]:
test_data = pd.DataFrame({'eval': [17.2, 30.1, 9.7, 11.9, 21.2, 15.2]})
linear_scaling(test_data['eval'], 1.5)

0    17.305279
1    26.325000
2    12.061255
3    13.599502
4    20.102092
5    15.906873
Name: eval, dtype: float64

### Skalowanie  σ-odcięcia

In [19]:
def sigma_cut_scaling(data: pd.Series, c_parameter: int = 1) -> pd.Series:
    tmp = data - (data.mean() - c_parameter * data.std())
    return tmp.apply(lambda x: x if x >= 0 else 0)

In [20]:
test_data = pd.DataFrame({'eval': [17.2, 30.1, 9.7, 11.9, 21.2, 15.2]})
sigma_cut_scaling(test_data['eval'])

0     6.998673
1    19.898673
2     0.000000
3     1.698673
4    10.998673
5     4.998673
Name: eval, dtype: float64

### Skalowanie logarytmiczne

In [41]:
def logarithmic_scaling(data: pd.Series, b: float = 1) -> pd.Series:
    min_b = np.log10(data).max()
    assert(b >= min_b)
    return b - np.log10(data)

In [42]:
test_data = pd.DataFrame({'eval': [17.2, 30.1, 9.7, 11.9, 21.2, 15.2]})
logarithmic_scaling(test_data['eval'], 1.5)

0    0.264472
1    0.021434
2    0.513228
3    0.424453
4    0.173664
5    0.318156
Name: eval, dtype: float64

### Skalowanie rankingowe liniowe

In [190]:
def rank_linear_scaling(data: pd.Series, initial_value: float, step: float) -> pd.Series:
    c = initial_value
    r = step
    assert(r <= c / (len(data) - 1))
    ordered = data.sort_values(ascending=False)
    value_map = {i: c - j*r for j, (i, x) in enumerate(zip(ordered.index, ordered))}
    return pd.Series([value_map[i] for i in range(len(data))])

In [191]:
test_data = pd.DataFrame({'eval': [17.2, 30.1, 9.7, 11.9, 21.2, 15.2]})
rank_linear_scaling(test_data['eval'], 100, 5)

0     90
1    100
2     75
3     80
4     95
5     85
dtype: int64

## Funkcja De Jonga (paraboloida)

In [201]:
f = lambda x, y, z: x**2 + y**2 + z**2
domain_from = -5.12
domain_to = 5.12

evaluate = lambda x: 80 - x

In [202]:
np.random.seed(27)
columns = ['x', 'y', 'z']
small_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (20, 3)), columns=columns)
big_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (100, 3)), columns=columns)

### Analiza małego zbioru

In [203]:
small_sample['f(x, y, z)'] = small_sample.apply(lambda r: f(r.x, r.y, r.z), axis=1)
small_sample['eval'] = small_sample['f(x, y, z)'].apply(evaluate)

Skalowanie metodą liniową i σ-odcięcia

In [204]:
multiplication_parameter = 1.5
small_sample['skalowanie liniowe'] = linear_scaling(small_sample['eval'], multiplication_parameter)

In [205]:
small_sample['skalowanie σ-odcinające'] = sigma_cut_scaling(small_sample['eval'])

In [206]:
small_sample

Unnamed: 0,x,y,z,"f(x, y, z)",eval,skalowanie liniowe,skalowanie σ-odcinające
0,-0.760613,3.221338,2.410468,16.765904,63.234096,63.083761,23.133462
1,3.768353,-1.194181,4.909636,39.731075,40.268925,40.42325,0.168291
2,4.02631,-2.972517,2.476315,31.179165,48.820835,48.861709,8.720201
3,1.670588,3.960847,3.66605,31.919095,48.080905,48.131595,7.980271
4,2.552445,3.790282,-3.20762,31.17004,48.82996,48.870713,8.729326
5,-1.786197,-1.301121,3.007621,13.9292,66.0708,65.882832,25.970166
6,-3.573143,-3.379787,-4.288828,42.584358,37.415642,37.60782,0.0
7,-1.995005,2.900888,-3.451841,24.310396,55.689604,55.639356,15.588971
8,-4.396633,2.058969,-3.266766,34.241496,45.758504,45.840004,5.657871
9,1.012913,-0.8677,0.13898,1.798211,78.201789,77.85289,38.101155


In [207]:
df = small_sample
df.iloc[:, 4:].describe().iloc[2:]

Unnamed: 0,eval,skalowanie liniowe,skalowanie σ-odcinające
std,11.801293,11.644734,10.313839
min,25.872711,26.21802,0.0
25%,44.597092,44.693999,4.496458
50%,52.87321,52.860325,12.772577
75%,60.184665,60.074785,20.084031
max,78.201789,77.85289,38.101155


In [208]:
fig = go.Figure(layout=dict(title="Wykres skrzypcowy skalowania małej grupy"))
for column in df.iloc[:, 4:]:
    fig.add_trace(go.Violin(y=df[column], name=column, box_visible=True))

fig.write_image("fig.svg")
fig.show()

In [209]:
for i, column in enumerate(df.columns[4:]):
    fig = px.histogram(df, x=column, title=column)
    fig.show()
    fig.write_image(f"fig{i}.svg")

### Analiza dużego zbioru

In [210]:
big_sample['f(x, y, z)'] = big_sample.apply(lambda r: f(r.x, r.y, r.z), axis=1)
big_sample['eval'] = big_sample['f(x, y, z)'].apply(evaluate)

Skalowanie metodą liniową i σ-odcięcia

In [211]:
multiplication_parameter = 1.5
big_sample['skalowanie liniowe'] = linear_scaling(big_sample['eval'], multiplication_parameter)

In [212]:
big_sample['skalowanie σ-odcinające'] = sigma_cut_scaling(big_sample['eval'])

In [213]:
big_sample

Unnamed: 0,x,y,z,"f(x, y, z)",eval,skalowanie liniowe,skalowanie σ-odcinające
0,0.853710,1.524735,4.208345,20.763808,59.236192,58.557750,22.516371
1,-3.874223,1.800410,2.608654,25.056158,54.943842,54.640292,18.224021
2,4.203882,-2.645116,4.870098,48.387119,31.612881,33.347045,0.000000
3,-2.009808,4.102147,-4.048281,37.255517,42.744483,43.506419,6.024661
4,1.772747,3.784119,0.624922,17.852716,62.147284,61.214588,25.427462
...,...,...,...,...,...,...,...
95,4.348954,-0.340486,-1.250281,20.592539,59.407461,58.714061,22.687640
96,-2.522757,1.627496,2.307300,14.336678,65.663322,64.423537,28.943500
97,-4.735369,4.409538,0.026306,41.868433,38.131567,39.296393,1.411746
98,-0.696990,0.707975,1.879944,4.521213,75.478787,73.381725,38.758966


In [214]:
df = big_sample
df.iloc[:, 4:].describe().iloc[2:]

Unnamed: 0,eval,skalowanie liniowe,skalowanie σ-odcinające
std,14.748499,13.460373,12.928802
min,13.541434,16.853949,0.0
25%,39.922234,40.930663,3.202412
50%,51.575258,51.565918,14.855437
75%,62.517746,61.552695,25.797924
max,79.665181,77.202482,42.94536


In [215]:
fig = go.Figure(layout=dict(title="Wykres skrzypcowy skalowania dużej grupy"))
for column in df.iloc[:, 4:]:
    fig.add_trace(go.Violin(y=df[column], name=column, box_visible=True))

fig.write_image("fig.svg")
fig.show()

In [130]:
for i, column in enumerate(df.columns[4:]):
    fig = px.histogram(df, x=column, title=column)
    fig.show()
    fig.write_image(f"fig{i}.svg")

## Siodło Rosenbrocka

In [216]:
f = lambda x, y: 100 * (x**2 - y)**2 + (1 - x)**2
domain_from = -2.048
domain_to = 2.048

f(1, 1)

0

In [217]:
evaluate = lambda x: 3910 - x

In [218]:
np.random.seed(2291)
columns = ['x', 'y']
small_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (20, 2)), columns=columns)
big_sample = pd.DataFrame(np.random.uniform(domain_from, domain_to, (100, 2)), columns=columns)

### Analiza małego zbioru

In [219]:
small_sample['f(x, y)'] = small_sample.apply(lambda r: f(r.x, r.y), axis=1)
small_sample['eval'] = small_sample['f(x, y)'].apply(evaluate)

Skalowanie metodą rankingową linową i logarytmiczną

In [220]:
np.log10(small_sample['eval']).max()

3.591741623160353

In [221]:
c = 100
r = 5
small_sample['skalowanie rankingowe liniowe'] = rank_linear_scaling(small_sample['eval'], c, r)
b = 3.7
small_sample['skalowanie logarytmiczne'] = logarithmic_scaling(small_sample['eval'], b)

In [222]:
small_sample

Unnamed: 0,x,y,"f(x, y)",eval,skalowanie rankingowe liniowe,skalowanie logarytmiczne
0,0.634713,-1.90651,533.452622,3376.547378,25,0.171527
1,-0.073416,1.870197,348.902599,3561.097401,35,0.148416
2,0.13251,-1.074342,119.977267,3790.022733,75,0.121358
3,1.717462,0.412097,644.446334,3265.553666,20,0.186043
4,0.511012,-0.869054,127.971576,3782.028424,70,0.122275
5,-0.286525,1.434662,184.598369,3725.401631,45,0.128827
6,-1.882055,0.389668,1002.109127,2907.890873,15,0.236422
7,1.564549,-1.656557,1684.90532,2225.09468,5,0.352652
8,-1.76162,1.909416,150.163583,3759.836417,60,0.124831
9,-0.47285,-1.888448,448.238224,3461.761776,30,0.160703


In [223]:
df = small_sample
df.iloc[:, 3:].describe().iloc[2:]

Unnamed: 0,eval,skalowanie rankingowe liniowe,skalowanie logarytmiczne
std,486.934142,29.580399,0.069263
min,2225.09468,5.0,0.108258
25%,3440.458177,28.75,0.119961
50%,3734.232598,52.5,0.127799
75%,3802.297264,76.25,0.163409
max,3906.084402,100.0,0.352652


In [268]:
fig = make_subplots(rows=1, cols=3)
for i, column in enumerate(df.iloc[:, 3:], 1):
    fig.add_trace(go.Violin(y=df[column], name=column, box_visible=True),
                 row=1, col=i)

fig.update_layout(title_text = 'Wykresy skrzypcowe skalowania małej grupy', showlegend=False)
fig.write_image("fig.svg")
fig.show()

In [269]:
fig = make_subplots(rows=3, cols=1, subplot_titles=df.columns[3:])
for i, column in enumerate(df.columns[3:], 1):
    fig.add_trace(go.Histogram(x=df[column], name=column), row=i, col=1)

fig.update_layout(title_text = 'Histogramy skalowania małej grupy', height=800, showlegend=False)
fig.show()
fig.write_image("fig.svg")