In [21]:
import numpy as np
import pandas as pd

In [22]:
data = pd.read_csv('dataset/train.csv', sep=';')
data_test = pd.read_csv('dataset/test.csv', sep=';')

### Intensidade e Simetria (datasets reduzidos)

In [23]:
#label
df_reduzido = pd.DataFrame()

df_reduzido['label'] = data['label']

df_reduzido

Unnamed: 0,label
0,0
1,0
2,0
3,0
4,0
...,...
2181,5
2182,5
2183,5
2184,5


intensidade é $$\frac{\sum_{x=0}^{783} pixelx}{255}$$



In [24]:
#axis = 0 é a soma das colunas; axis = 1 é a soma das linhas
def intensidade(data):
    return data.sum(axis=1) / 255

In [25]:
df_reduzido['intensidade'] = intensidade(data)

df_reduzido['intensidade']

0       145.435294
1       118.576471
2       127.600000
3       138.047059
4       146.215686
           ...    
2181    105.705882
2182    113.070588
2183    110.745098
2184     99.325490
2185    101.192157
Name: intensidade, Length: 2186, dtype: float64

simetria é

1. Simetria Vertical
$$\frac{\sum_{i=0}^{27} \sum_{j=0}^{13} || pixel_{i, j} - pixel_{i, 27-j} ||}{255}$$

2. Simetria Horizontal
$$\frac{\sum_{i=0}^{13} \sum_{j=0}^{27} || pixel_{i, j} - pixel_{27-i, j} ||}{255}$$

3. Simetria Total
$$S_{v} + S_{h}$$


In [26]:
#simetria vertical
def simetria_vertical(data):

    s_v = []
    for n in range(len(data)):
        aux = 0

        for i in range(28):

            for j in range(14):

                pixel_i = i * 28 + j
                pixel_j = i * 28 + (27 - j)
                aux += abs(data.iloc[n, pixel_i+1] - data.iloc[n, pixel_j+1])


        s_v.append(aux / 255)
    
    return s_v


#simetria horizontal
def simetria_horizontal(data):
    s_h = []
    for n in range(len(data)):
        aux = 0

        for i in range(14):

            for j in range(28):

                pixel_i = i * 28 + j
                pixel_j = (27 - i) * 28 + j
                aux += abs(data.iloc[n, pixel_i+1] - data.iloc[n, pixel_j+1])

        s_h.append(aux / 255)

    return s_h

In [27]:
#simetria total
def simetria(data):
    
    vert = simetria_vertical(data)
    hori = simetria_horizontal(data)
    total = [sv + sh for sv, sh in zip(vert, hori)]

    return total

In [28]:
df_reduzido['simetria'] = simetria(data)

In [29]:
df_reduzido

Unnamed: 0,label,intensidade,simetria
0,0,145.435294,148.572549
1,0,118.576471,137.113725
2,0,127.600000,134.047059
3,0,138.047059,151.003922
4,0,146.215686,122.501961
...,...,...,...
2181,5,105.705882,133.890196
2182,5,113.070588,116.698039
2183,5,110.745098,133.019608
2184,5,99.325490,132.133333


#### Teste

In [30]:
#label
df_test_red = pd.DataFrame()

df_test_red['label'] = data_test['label']

df_test_red

Unnamed: 0,label
0,0
1,0
2,0
3,0
4,0
...,...
1110,5
1111,5
1112,5
1113,5


In [31]:
#intensidade
df_test_red['intensidade'] = intensidade(data_test)

In [32]:
#simetria
df_test_red['simetria'] = simetria(data_test)

In [33]:
df_test_red

Unnamed: 0,label,intensidade,simetria
0,0,147.933333,130.470588
1,0,131.054902,122.721569
2,0,134.890196,151.725490
3,0,139.976471,133.921569
4,0,129.580392,130.274510
...,...,...,...
1110,5,96.141176,139.521569
1111,5,98.458824,130.729412
1112,5,95.160784,122.227451
1113,5,106.901961,128.007843


### Novos Datasets

In [34]:
#df_reduzido.to_csv('dataset/train_red.csv', sep=';', index=False)
#df_test_red.to_csv('dataset/test_red.csv', sep=';', index=False)