# Preprocessing

In [2]:
import numpy as np
import pandas as pd
from keras.layers import Dense, Activation, Dropout
from keras.models import Sequential
from sklearn.model_selection import StratifiedKFold
import keras.backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def readFile(filepath, X): # reading first 1000 elements from a file
    with open(filepath) as file:
        array = [line.strip() for line in file]
    full_size = int(array[0].split(' ')[0])
    features = int(array[0].split(' ')[1])
    classes = int(array[0].split(' ')[2])
    sample_size = 0
    for i in range(min(full_size*2, 2000)):
        s = array[i].split(' ')
        if len(s) > 10:
            sample_size += 1
            X.append(s)
    return sample_size, features, classes

In [3]:
def readFullFile(filepath, X):
    with open(filepath) as file:
        array = [line.strip() for line in file]
    full_size = int(array[0].split(' ')[0])
    features = int(array[0].split(' ')[1])
    classes = int(array[0].split(' ')[2])
    sample_size = 0
    for i in range(full_size*2):
        s = array[i].split(' ')
        if len(s) > 10:
            sample_size += 1
            X.append(s)
    return sample_size, features, classes

In [4]:
X_0 = [] # Zhirinovsky
X_1 = [] # Zyuganov
X_4 = [] # Navalny
X_5 = [] # Putin
X_6 = [] # Khodorkovsky
X_7 = [] # Shoigu
X_8 = [] # Don't know
X_9 = [] # Against all

In [5]:
size_0, features, _ = readFile('trainFull/train0.data', X_0)
size_0

1000

In [6]:
size_1, _, _ = readFile('trainFull/train1.data', X_1)
size_1

931

In [7]:
size_4, _, _ = readFile('trainFull/train4.data', X_4)
size_4

1000

In [8]:
size_5, _, _ = readFile('trainFull/train5.data', X_5)
size_5

1000

In [9]:
size_6, _, _ = readFile('trainFull/train6.data', X_6)
size_6

810

In [10]:
size_7, _, _ = readFile('trainFull/train7.data', X_7)
size_7

1000

In [11]:
size_8, _, _ = readFile('trainFull/train8.data', X_8)
size_8

1000

In [12]:
size_9, _, _ = readFile('trainFull/train9.data', X_9)
size_9

1000

In [13]:
X_0 = np.array(X_0).astype(float)
X_1 = np.array(X_1).astype(float)
X_4 = np.array(X_4).astype(float)
X_5 = np.array(X_5).astype(float)

In [14]:
X_6 = np.array(X_6).astype(float)
X_7 = np.array(X_7).astype(float)
X_8 = np.array(X_8).astype(float)
X_9 = np.array(X_9).astype(float)

In [15]:
X_0 = (X_0+1)/2
X_1 = (X_1+1)/2
X_4 = (X_4+1)/2
X_5 = (X_5+1)/2

In [16]:
X_6 = (X_6+1)/2
X_7 = (X_7+1)/2
X_8 = (X_8+1)/2
X_9 = (X_9+1)/2

In [18]:
y_0 = np.zeros(1000)
y_1 = np.ones(1000)

In [55]:
X_4_full = [] # Navalny full
X_5_full = [] # Putin full

In [56]:
size_4, _, _ = readFullFile('trainFull/train4.data', X_4_full)
size_4

1998

In [57]:
size_5, _, _ = readFullFile('trainFull/train5.data', X_5_full)
size_5

5000

In [58]:
X_5_full = np.array(X_5_full).astype(float)
X_4_full = np.array(X_4_full).astype(float)

In [59]:
X_4_full = (X_4_full+1)/2
X_5_full = (X_5_full+1)/2

# Fitting and evaluating

In [21]:
features = 7282

## Navalny+Khodorkovsky vs Putin+Shoigu (79%)

In [3]:
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=42) # cross-validation

In [107]:
X = np.vstack((X_4, X_6, X_5, X_7))
y = np.append(np.zeros(X_4.shape[0]+X_6.shape[0]), np.ones(X_5.shape[0]+X_7.shape[0]))
X.shape

(4410, 7282)

In [110]:
batch_size = 100
epochs = 60
dropout_size = 0.5
num_train = X.shape[0]

cvscores = []

for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(Dense(250, input_dim=features, activation='relu'))
    Dropout(dropout_size)
    model.add(Dense(100, activation='relu'))
    Dropout(dropout_size/2)
    model.add(Dense(40, activation='relu'))
    Dropout(dropout_size/3)
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

    model.fit(X[train], y[train], batch_size=batch_size, epochs=epochs, shuffle=True,
              validation_data=(X[test], y[test]))
    scores = model.evaluate(X[test], y[test], verbose=0)
    cvscores.append(scores[1] * 100)

Train on 2940 samples, validate on 1470 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Train on 2940 samples, validate on 1470 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/6

Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [111]:
cvscores

[79.183673420730898, 80.476190427533624, 78.57142857953805]

## Navalny+Khodorkovsky vs Putin+Shoigu+Zyuganov+Zhirinovsky (78%)

In [24]:
X = np.vstack((X_4, X_6, X_5[0:600,:], X_7[0:600,:], X_0[0:600,:], X_1[0:600,:]))
y = np.append(np.zeros(X_4.shape[0]+X_6.shape[0]), np.ones(2400))
X.shape

(4410, 7282)

In [25]:
y.shape

(4410,)

In [38]:
batch_size = 100
epochs = 50
dropout_size = 0.5
num_train = X.shape[0]

cvscores = []

for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(Dense(350, input_dim=features, activation='relu'))
    Dropout(dropout_size)
    model.add(Dense(200, activation='relu'))
    Dropout(dropout_size/2)
    model.add(Dense(50, activation='relu'))
    Dropout(dropout_size/3)
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

    model.fit(X[train], y[train], batch_size=batch_size, epochs=epochs, shuffle=True,
              validation_data=(X[test], y[test]))
    scores = model.evaluate(X[test], y[test], verbose=0)
    cvscores.append(scores[1] * 100)

Train on 2940 samples, validate on 1470 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Train on 2940 samples, validate on 1470 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/5

Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [30]:
cvscores

[76.938775526423029, 76.598639407125461, 77.41496599450403]

Format: \[loss, accuracy\]

In [58]:
scores4 = model.evaluate(X_4_full, np.zeros(X_4_full.shape[0]))
scores4 # Navalny



[0.4879228075643679, 0.74474474486407338]

In [57]:
scores5 = model.evaluate(X_5_full, np.ones(X_5_full.shape[0]))
scores5 # Putin



[0.27149285383224486, 0.93979999999999997]

In [59]:
scores0 = model.evaluate(X_0_full, np.ones(X_0_full.shape[0]))
scores0 # Zhirinovsky



[0.35467217540292528, 0.87891849529780564]

In [60]:
scores1 = model.evaluate(X_1_full, np.ones(X_1_full.shape[0]))
scores1 # Zyuganov



[0.36663189610400337, 0.87540279276004795]

In [61]:
scores6 = model.evaluate(X_6_full, np.zeros(X_6_full.shape[0]))
scores6 # Khodorkovsky



[0.5458451319623876, 0.71975308612540911]

In [62]:
scores7 = model.evaluate(X_7_full, np.ones(X_7_full.shape[0]))
scores7 # Shoigu



[0.36071023920245093, 0.86727341685188419]

## Navalny vs Putin vs Zhirinovsky vs Zyuganov (56%)

In [69]:
X = np.vstack((X_4, X_5, X_0, X_1))
y_4 = np.array([[1, 0, 0, 0]] * X_4.shape[0])
y_5 = np.array([[0, 1, 0, 0]] * X_5.shape[0])
y_0 = np.array([[0, 0, 1, 0]] * X_0.shape[0])
y_1 = np.array([[0, 0, 0, 1]] * X_1.shape[0])
X.shape

(5731, 7282)

In [70]:
y = np.vstack((y_4, y_5, y_0, y_1))
y.shape

(5731, 4)

In [71]:
batch_size = 100
epochs = 70
dropout_size = 0.5

cvscores = []

for train, test in kfold.split(X, np.zeros(X.shape[0])):
    model = Sequential()
    model.add(Dense(250, input_dim=features, activation='relu'))
    Dropout(dropout_size)
    model.add(Dense(100, activation='relu'))
    Dropout(dropout_size/2)
    model.add(Dense(40, activation='relu'))
    Dropout(dropout_size/3)
    model.add(Dense(4, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='sgd', 
                  metrics=['categorical_accuracy', 'accuracy'])

    model.fit(X[train], y[train], batch_size=batch_size, epochs=epochs, shuffle=True,
              validation_data=(X[test], y[test]))
    scores = model.evaluate(X[test], y[test], verbose=0)
    cvscores.append(scores)
res.append(cvscores)

Train on 3820 samples, validate on 1911 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
Train on 3821 samples, validate on 1910 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7

Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
Train on 3821 samples, validate on 1910 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
E

Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


In [137]:
cvscores

[55.592322967882424, 56.026490068198832, 57.615894041708756]

## 7 classes

In [29]:
X = np.vstack((X_0, X_1, X_4, X_5, X_6, X_7, X_8, X_9))
y_0 = np.array([[1, 0, 0, 0, 0, 0, 0]] * X_0.shape[0])
y_1 = np.array([[0, 1, 0, 0, 0, 0, 0]] * X_1.shape[0])
y_4 = np.array([[0, 0, 1, 0, 0, 0, 0]] * X_4.shape[0])
y_5 = np.array([[0, 0, 0, 1, 0, 0, 0]] * X_5.shape[0])
y_6 = np.array([[0, 0, 0, 0, 1, 0, 0]] * X_6.shape[0])
y_7 = np.array([[0, 0, 0, 0, 0, 1, 0]] * X_7.shape[0])
y_8 = np.array([[0, 0, 0, 0, 0, 0, 1]] * (X_8.shape[0] + X_9.shape[0]))
X.shape

(8941, 7282)

In [30]:
y = np.vstack((y_0, y_1, y_4, y_5, y_6, y_7, y_8))
y.shape

(8941, 7)

In [34]:
batch_size = 100
epochs = 35
dropout_size = 0.5

cvscores = []

for train, test in kfold.split(X, np.zeros(X.shape[0])):
    model = Sequential()
    model.add(Dense(250, input_dim=features, activation='relu'))
    Dropout(dropout_size)
    model.add(Dense(100, activation='relu'))
    Dropout(dropout_size/2)
    model.add(Dense(40, activation='relu'))
    Dropout(dropout_size/3)
    model.add(Dense(7, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

    model.fit(X[train], y[train], batch_size=batch_size, epochs=epochs, shuffle=True,
              validation_data=(X[test], y[test]))
    scores = model.evaluate(X[test], y[test], verbose=0)
    cvscores.append(scores[1] * 100)
res.append(cvscores)

Train on 5960 samples, validate on 2981 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
Train on 5961 samples, validate on 2980 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
Train on 5961 samples, validate on 2980 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
E

In [None]:
[0.3415, 0.3386, 0.3389]

In [62]:
cvscores

[[0.54713927256308925,
  0.79382522224069951,
  0.81771215206974157,
  0.73835688118283382],
 [0.55007647416451999,
  0.79633507847161822,
  0.82604867805049054,
  0.74816753929822233],
 [0.57767656100816367,
  0.77225130894733351,
  0.80378332451688062,
  0.72460732988974186]]

## 3 metaclasses

In [35]:
X = np.vstack((X_0, X_1, X_5, X_7, X_4, X_6, X_8, X_9))
y_0 = np.array([[1, 0, 0]] * (X_0.shape[0] + X_1.shape[0] + X_5.shape[0] + X_7.shape[0]))
y_1 = np.array([[0, 1, 0]] * (X_4.shape[0] + X_6.shape[0]))
y_4 = np.array([[0, 0, 1]] * (X_8.shape[0] + X_9.shape[0]))

X.shape

(8941, 7282)

In [36]:
y = np.vstack((y_0, y_1, y_4))
y.shape

(8941, 3)

In [37]:
batch_size = 100
epochs = 70
dropout_size = 0.5

cvscores = []

for train, test in kfold.split(X, np.zeros(X.shape[0])):
    model = Sequential()
    model.add(Dense(250, input_dim=features, activation='relu'))
    Dropout(dropout_size)
    model.add(Dense(100, activation='relu'))
    Dropout(dropout_size/2)
    model.add(Dense(40, activation='relu'))
    Dropout(dropout_size/3)
    model.add(Dense(3, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

    model.fit(X[train], y[train], batch_size=batch_size, epochs=epochs, shuffle=True,
              validation_data=(X[test], y[test]))
    scores = model.evaluate(X[test], y[test], verbose=0)
    cvscores.append(scores)
res.append(cvscores)

Train on 5960 samples, validate on 2981 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
Train on 5961 samples, validate on 2980 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7

Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70
Train on 5961 samples, validate on 2980 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
E

In [None]:
[0.6159, 0.5977, 0.5993]

## Putin vs others

In [30]:
X = np.vstack((X_0, X_1, X_4, X_6, X_7, X_5_full))
X.shape

(9741, 7282)

In [26]:
y_0.shape

(4741,)

In [27]:
y_1.shape

(5000,)

In [28]:
y = np.hstack((y_0, y_1))
y.shape

(9741,)

In [32]:
batch_size = 100
epochs = 35
dropout_size = 0.4
num_train = X.shape[0]

cvscores = []

for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(Dense(300, input_dim=features, activation='relu'))
    Dropout(dropout_size)
    model.add(Dense(150, activation='relu'))
    Dropout(dropout_size/2)
    model.add(Dense(60, activation='relu'))
    Dropout(dropout_size/3)
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

    model.fit(X[train], y[train], batch_size=batch_size, epochs=epochs, shuffle=True,
              validation_data=(X[test], y[test]))
    scores = model.evaluate(X[test], y[test], verbose=0)
    cvscores.append(scores[1] * 100)

Train on 6493 samples, validate on 3248 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
Train on 6494 samples, validate on 3247 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35
Train on 6495 samples, validate on 3246 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
E

## All binaries

In [22]:
cols = ['Candidate 0', 'Candidate 1', 'Accuracy', 'TPR (recall)', 'TNR', 'PPV (precision)', 'NPV', 'F1']
df = pd.DataFrame(columns=cols)

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1


In [23]:
dfmean = pd.DataFrame(columns=cols)

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1


In [9]:
keys = ['Жириновский', 'Зюганов', 'Навальный', 'Путин', 'Ходорковский', 'Шойгу', 'Не определились', 'Против всех']

In [33]:
batch_size = 100
epochs = 80
dropout_size = 0.5

for i in range(4, 8):
    for j in range(i+1, 8):
        X = np.vstack((ftr[i], ftr[j]))
        y0 = np.zeros(ftr[i].shape[0])
        y1 = np.ones(ftr[j].shape[0])
        y = np.hstack((y0, y1))
        num_train = X.shape[0]

        Acc = []
        Tpr = []
        Tnr = []
        Ppv = []
        Npv = []
        F1 = []
        no = 0

        for train, test in kfold.split(X, y):
            model = Sequential()

            model.add(Dense(250, input_dim=features, activation='relu'))
            Dropout(dropout_size)
            model.add(Dense(100, activation='relu'))
            Dropout(dropout_size/2)
            model.add(Dense(40, activation='relu'))
            Dropout(dropout_size/3)
            model.add(Dense(1, activation='sigmoid'))

            model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])
            model.fit(X[train], y[train], batch_size=batch_size, epochs=epochs, shuffle=True,
                      validation_data=(X[test], y[test]))
            
            model.save(('models/model' + '{}{}_{}' + '.h5').format(i, j, no))
            no += 1
            
            y_pred = model.predict(X[test]).round().reshape(-1)
            tp = (y_pred * y[test]).sum() # network - Putin, really - Putin
            tn = ((1-y_pred) * (1-y[test])).sum() # network - Navalny, really - Navalny
            fp = (y_pred * (1-y[test])).sum() # network - Putin, really - Navalny
            fn = ((1-y_pred) * y[test]).sum() # network - Navalny, really - Putin
            
            acc = (tp + tn) / (tp + tn + fp + fn)
            tpr = tp / (tp + fn)
            tnr = tn / (tn + fp)
            ppv = tp / (tp + fp)
            npv = tn / (tn + fn)
            f1 = (2 * tpr * ppv) / (tpr + ppv)
            
            Acc.append(int(acc * 1000) / 10.0)
            Tpr.append(int(tpr * 1000) / 10.0)
            Tnr.append(int(tnr * 1000) / 10.0)
            Ppv.append(int(ppv * 1000) / 10.0)
            Npv.append(int(npv * 1000) / 10.0)
            F1.append(int(f1 * 1000) / 1000.0)
        
        Acc = np.array(Acc).astype(float)
        Tpr = np.array(Tpr).astype(float)
        Tnr = np.array(Tnr).astype(float)
        Ppv = np.array(Ppv).astype(float)
        Npv = np.array(Npv).astype(float)
        F1 = np.array(F1).astype(float)
        
        for k in range(3):
            batch = pd.Series([keys[i], keys[j], Acc[k], Tpr[k], Tnr[k], Ppv[k], Npv[k], F1[k]], index=cols)
            df = df.append(batch, ignore_index=True)
        
        Acc = int(Acc.mean() * 10) / 10.0
        Tpr = int(Tpr.mean() * 10) / 10.0
        Tnr = int(Tnr.mean() * 10) / 10.0
        Ppv = int(Ppv.mean() * 10) / 10.0
        Npv = int(Npv.mean() * 10) / 10.0
        F1 = int(F1.mean() * 1000) / 1000.0
        
        batch = pd.Series([keys[i], keys[j], Acc, Tpr, Tnr, Ppv, Npv, F1], index=cols)
        dfmean = dfmean.append(batch, ignore_index=True)

Train on 1206 samples, validate on 604 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Tr

Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1207 samples, validate on 603 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Ep

Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1206 samples, validate on 604 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Ep

Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1207 samples, validate on 603 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Ep

Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1206 samples, validate on 604 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Ep

Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1207 samples, validate on 603 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Ep

Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1332 samples, validate on 668 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Ep

Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1334 samples, validate on 666 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Ep

Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1332 samples, validate on 668 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Ep

Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1334 samples, validate on 666 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Ep

Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1332 samples, validate on 668 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Ep

Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Train on 1334 samples, validate on 666 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Ep

Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


# Results

In [34]:
df

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1
0,Жириновский,Зюганов,61.8,42.4,79.9,66.3,59.8,0.517
1,Жириновский,Зюганов,62.3,59.0,65.4,61.4,63.1,0.601
2,Жириновский,Зюганов,64.3,52.5,75.3,66.5,63.0,0.587
3,Жириновский,Навальный,81.4,82.0,80.8,81.0,81.8,0.815
4,Жириновский,Навальный,79.4,79.2,79.5,79.5,79.3,0.793
5,Жириновский,Навальный,82.2,82.2,82.2,82.2,82.2,0.822
6,Жириновский,Путин,70.8,77.5,64.0,68.3,74.0,0.726
7,Жириновский,Путин,66.3,73.5,59.1,64.3,69.1,0.686
8,Жириновский,Путин,71.6,77.4,65.7,69.3,74.4,0.731
9,Жириновский,Ходорковский,75.0,64.4,83.5,75.9,74.4,0.697


In [35]:
dfmean

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1
0,Жириновский,Зюганов,62.7,51.3,73.5,64.7,61.9,0.568
1,Жириновский,Навальный,81.0,81.1,80.8,80.8,81.1,0.81
2,Жириновский,Путин,69.5,76.1,62.9,67.3,72.5,0.714
3,Жириновский,Ходорковский,76.4,69.5,81.9,75.8,77.0,0.724
4,Жириновский,Шойгу,58.4,61.5,55.2,57.9,59.0,0.596
5,Жириновский,Не определились,59.5,65.6,53.3,58.4,60.8,0.618
6,Жириновский,Против всех,74.3,80.1,68.6,71.9,77.6,0.757
7,Зюганов,Навальный,81.7,83.8,79.4,81.3,82.1,0.826
8,Зюганов,Путин,68.5,80.8,55.2,66.4,73.4,0.726
9,Зюганов,Ходорковский,74.7,66.7,81.7,76.0,73.8,0.71


In [36]:
df.to_csv('df.csv', index=False)
dfmean.to_csv('dfmean.csv', index=False)

In [37]:
dfmean_sorted = dfmean.sort_values(by='F1', ascending=False)
dfmean_sorted

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1
13,Навальный,Путин,83.5,90.6,76.4,79.4,89.2,0.846
15,Навальный,Шойгу,84.1,82.1,86.1,85.6,82.8,0.838
7,Зюганов,Навальный,81.7,83.8,79.4,81.3,82.1,0.826
1,Жириновский,Навальный,81.0,81.1,80.8,80.8,81.1,0.81
22,Ходорковский,Шойгу,75.8,80.9,69.5,76.6,74.8,0.787
16,Навальный,Не определились,78.8,77.8,79.8,79.4,78.3,0.786
26,Шойгу,Против всех,77.1,83.1,71.1,74.2,80.9,0.784
12,Зюганов,Против всех,74.7,82.3,66.5,72.5,77.8,0.771
23,Ходорковский,Не определились,72.0,81.8,59.9,71.6,72.8,0.763
6,Жириновский,Против всех,74.3,80.1,68.6,71.9,77.6,0.757


In [38]:
dfmean_sorted.to_csv('dfmean_sorted.csv', index=False)

In [17]:
keys_eng = ['Zhirinovsky', 'Zyuganov', 'Navalny', 'Putin', 'Khodorkovsky', 'Shoigu', 'Undecided', 'Against all']

In [11]:
df_eng = df.replace(to_replace=keys, value=keys_eng)
df_eng

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1
0,Zhirinovsky,Zyuganov,61.8,42.4,79.9,66.3,59.8,0.517
1,Zhirinovsky,Zyuganov,62.3,59.0,65.4,61.4,63.1,0.601
2,Zhirinovsky,Zyuganov,64.3,52.5,75.3,66.5,63.0,0.587
3,Zhirinovsky,Navalny,81.4,82.0,80.8,81.0,81.8,0.815
4,Zhirinovsky,Navalny,79.4,79.2,79.5,79.5,79.3,0.793
5,Zhirinovsky,Navalny,82.2,82.2,82.2,82.2,82.2,0.822
6,Zhirinovsky,Putin,70.8,77.5,64.0,68.3,74.0,0.726
7,Zhirinovsky,Putin,66.3,73.5,59.1,64.3,69.1,0.686
8,Zhirinovsky,Putin,71.6,77.4,65.7,69.3,74.4,0.731
9,Zhirinovsky,Khodorkovsky,75.0,64.4,83.5,75.9,74.4,0.697


In [12]:
dfmean_eng = dfmean.replace(to_replace=keys, value=keys_eng)
dfmean_eng

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1
0,Zhirinovsky,Zyuganov,62.7,51.3,73.5,64.7,61.9,0.568
1,Zhirinovsky,Navalny,81.0,81.1,80.8,80.8,81.1,0.81
2,Zhirinovsky,Putin,69.5,76.1,62.9,67.3,72.5,0.714
3,Zhirinovsky,Khodorkovsky,76.4,69.5,81.9,75.8,77.0,0.724
4,Zhirinovsky,Shoigu,58.4,61.5,55.2,57.9,59.0,0.596
5,Zhirinovsky,Don't know,59.5,65.6,53.3,58.4,60.8,0.618
6,Zhirinovsky,Against all,74.3,80.1,68.6,71.9,77.6,0.757
7,Zyuganov,Navalny,81.7,83.8,79.4,81.3,82.1,0.826
8,Zyuganov,Putin,68.5,80.8,55.2,66.4,73.4,0.726
9,Zyuganov,Khodorkovsky,74.7,66.7,81.7,76.0,73.8,0.71


In [13]:
dfmean_sorted_eng = dfmean_sorted.replace(to_replace=keys, value=keys_eng)
dfmean_sorted_eng

Unnamed: 0,Candidate 0,Candidate 1,Accuracy,TPR (recall),TNR,PPV (precision),NPV,F1
0,Navalny,Putin,83.5,90.6,76.4,79.4,89.2,0.846
1,Navalny,Shoigu,84.1,82.1,86.1,85.6,82.8,0.838
2,Zyuganov,Navalny,81.7,83.8,79.4,81.3,82.1,0.826
3,Zhirinovsky,Navalny,81.0,81.1,80.8,80.8,81.1,0.81
4,Khodorkovsky,Shoigu,75.8,80.9,69.5,76.6,74.8,0.787
5,Navalny,Don't know,78.8,77.8,79.8,79.4,78.3,0.786
6,Shoigu,Against all,77.1,83.1,71.1,74.2,80.9,0.784
7,Zyuganov,Against all,74.7,82.3,66.5,72.5,77.8,0.771
8,Khodorkovsky,Don't know,72.0,81.8,59.9,71.6,72.8,0.763
9,Zhirinovsky,Against all,74.3,80.1,68.6,71.9,77.6,0.757


In [16]:
dfmean_sorted_eng.to_csv('Results_mean_sorted.csv', index=False)
dfmean_eng.to_csv('Results_mean.csv', index=False)
df_eng.to_csv('Results.csv', index=False)