## ラベルとデータセットの構築

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from tqdm import tqdm

In [6]:
tqdm.pandas()

In [3]:
with open('../data/fer_with_landmarks.csv', 'r') as f:
    datas = pd.read_csv(f)
datas = datas.dropna(subset=['landmarks'])
datas

Unnamed: 0.1,Unnamed: 0,landmarks,pixels,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown
0,0,"{""chin"": [[-1, 15], [-2, 22], [-2, 28], [-1, 3...",70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,4,0,0,1,3,2,0,0,0
1,1,"{""chin"": [[5, 25], [6, 29], [7, 33], [9, 37], ...",151 150 147 155 148 133 111 140 170 174 182 15...,6,0,1,1,0,0,0,0,2
2,2,"{""chin"": [[7, 13], [5, 17], [4, 22], [3, 27], ...",231 212 156 164 174 138 161 173 182 200 106 38...,5,0,0,3,1,0,0,0,1
4,4,"{""chin"": [[2, 20], [3, 25], [4, 30], [5, 35], ...",4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,9,0,0,1,0,0,0,0,0
5,5,"{""chin"": [[3, 23], [4, 28], [4, 33], [5, 38], ...",55 55 55 55 55 54 60 68 54 85 151 163 170 179 ...,6,0,0,1,0,0,1,1,1
7,7,"{""chin"": [[10, 22], [10, 25], [10, 28], [11, 3...",77 78 79 79 78 75 60 55 47 48 58 73 77 79 57 5...,0,10,0,0,0,0,0,0,0
8,8,"{""chin"": [[3, 20], [4, 25], [6, 29], [8, 34], ...",85 84 90 121 101 102 133 153 153 169 177 189 1...,0,10,0,0,0,0,0,0,0
9,9,"{""chin"": [[4, 20], [4, 26], [5, 31], [7, 37], ...",255 254 255 254 254 179 122 107 95 124 149 150...,0,0,6,0,0,0,4,0,0
10,10,"{""chin"": [[3, 18], [3, 24], [3, 30], [4, 36], ...",30 24 21 23 25 25 49 67 84 103 120 125 130 139...,2,0,0,0,8,0,0,0,0
11,11,"{""chin"": [[4, 22], [5, 27], [7, 32], [9, 37], ...",39 75 78 58 58 45 49 48 103 156 81 45 41 38 49...,10,0,0,0,0,0,0,0,0


In [5]:
def get_features(landmarks_str):
    landmarks = json.loads(landmarks_str)
    features = []
    for parts_name in ['left_eyebrow', 'right_eyebrow', 'left_eye', 'right_eye', 'top_lip', 'bottom_lip']:
        features.append(landmarks[parts_name])
    return json.dumps(features)

In [7]:
datas['features'] = datas['landmarks'].progress_map(get_features)
datas

100%|██████████| 24796/24796 [00:01<00:00, 16264.16it/s]


Unnamed: 0.1,Unnamed: 0,landmarks,pixels,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown,features
0,0,"{""chin"": [[-1, 15], [-2, 22], [-2, 28], [-1, 3...",70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,4,0,0,1,3,2,0,0,0,"[[[9, 14], [13, 12], [17, 12], [21, 14], [25, ..."
1,1,"{""chin"": [[5, 25], [6, 29], [7, 33], [9, 37], ...",151 150 147 155 148 133 111 140 170 174 182 15...,6,0,1,1,0,0,0,0,2,"[[[6, 18], [8, 16], [11, 15], [15, 15], [19, 1..."
2,2,"{""chin"": [[7, 13], [5, 17], [4, 22], [3, 27], ...",231 212 156 164 174 138 161 173 182 200 106 38...,5,0,0,3,1,0,0,0,1,"[[[9, 10], [12, 9], [16, 11], [19, 12], [21, 1..."
4,4,"{""chin"": [[2, 20], [3, 25], [4, 30], [5, 35], ...",4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,9,0,0,1,0,0,0,0,0,"[[[6, 14], [9, 12], [12, 11], [16, 11], [20, 1..."
5,5,"{""chin"": [[3, 23], [4, 28], [4, 33], [5, 38], ...",55 55 55 55 55 54 60 68 54 85 151 163 170 179 ...,6,0,0,1,0,0,1,1,1,"[[[7, 18], [9, 15], [12, 13], [17, 13], [20, 1..."
7,7,"{""chin"": [[10, 22], [10, 25], [10, 28], [11, 3...",77 78 79 79 78 75 60 55 47 48 58 73 77 79 57 5...,0,10,0,0,0,0,0,0,0,"[[[10, 19], [11, 18], [13, 17], [15, 17], [17,..."
8,8,"{""chin"": [[3, 20], [4, 25], [6, 29], [8, 34], ...",85 84 90 121 101 102 133 153 153 169 177 189 1...,0,10,0,0,0,0,0,0,0,"[[[5, 17], [7, 15], [10, 14], [13, 15], [17, 1..."
9,9,"{""chin"": [[4, 20], [4, 26], [5, 31], [7, 37], ...",255 254 255 254 254 179 122 107 95 124 149 150...,0,0,6,0,0,0,4,0,0,"[[[6, 13], [9, 10], [12, 9], [15, 10], [19, 12..."
10,10,"{""chin"": [[3, 18], [3, 24], [3, 30], [4, 36], ...",30 24 21 23 25 25 49 67 84 103 120 125 130 139...,2,0,0,0,8,0,0,0,0,"[[[5, 16], [9, 13], [14, 13], [18, 14], [23, 1..."
11,11,"{""chin"": [[4, 22], [5, 27], [7, 32], [9, 37], ...",39 75 78 58 58 45 49 48 103 156 81 45 41 38 49...,10,0,0,0,0,0,0,0,0,"[[[6, 17], [8, 14], [11, 13], [15, 13], [19, 1..."


In [8]:
features = datas['features']

In [23]:
tmp = np.array([np.array(feature) for feature in json.loads(features[900])])
for t in tmp:
    print(t.shape)

(5, 2)
(5, 2)
(6, 2)
(6, 2)
(12, 2)
(12, 2)


In [24]:
def points_min_max_norm(points):
    return (points - points.min(axis=0)) / (points.max(axis=0) - points.min(axis=0))

In [34]:
def feature_normalize(features_str):
    features = json.loads(features_str)
    features = [np.array(feature) for feature in features]
    features_norm = list(map(points_min_max_norm, features))
    features_norm = list(map(lambda x: x.tolist(), features_norm))
    return json.dumps(features_norm)

In [36]:
datas['features_norm'] = datas['features'].progress_map(feature_normalize)

  from ipykernel import kernelapp as app
100%|██████████| 24796/24796 [00:05<00:00, 4793.02it/s]


In [37]:
datas

Unnamed: 0.1,Unnamed: 0,landmarks,pixels,neutral,happiness,surprise,sadness,anger,disgust,fear,contempt,unknown,features,features_norm
0,0,"{""chin"": [[-1, 15], [-2, 22], [-2, 28], [-1, 3...",70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,4,0,0,1,3,2,0,0,0,"[[[9, 14], [13, 12], [17, 12], [21, 14], [25, ...","[[[0.0, 0.6666666666666666], [0.25, 0.0], [0.5..."
1,1,"{""chin"": [[5, 25], [6, 29], [7, 33], [9, 37], ...",151 150 147 155 148 133 111 140 170 174 182 15...,6,0,1,1,0,0,0,0,2,"[[[6, 18], [8, 16], [11, 15], [15, 15], [19, 1...","[[[0.0, 1.0], [0.15384615384615385, 0.33333333..."
2,2,"{""chin"": [[7, 13], [5, 17], [4, 22], [3, 27], ...",231 212 156 164 174 138 161 173 182 200 106 38...,5,0,0,3,1,0,0,0,1,"[[[9, 10], [12, 9], [16, 11], [19, 12], [21, 1...","[[[0.0, 0.16666666666666666], [0.25, 0.0], [0...."
4,4,"{""chin"": [[2, 20], [3, 25], [4, 30], [5, 35], ...",4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,9,0,0,1,0,0,0,0,0,"[[[6, 14], [9, 12], [12, 11], [16, 11], [20, 1...","[[[0.0, 1.0], [0.21428571428571427, 0.33333333..."
5,5,"{""chin"": [[3, 23], [4, 28], [4, 33], [5, 38], ...",55 55 55 55 55 54 60 68 54 85 151 163 170 179 ...,6,0,0,1,0,0,1,1,1,"[[[7, 18], [9, 15], [12, 13], [17, 13], [20, 1...","[[[0.0, 1.0], [0.15384615384615385, 0.4], [0.3..."
7,7,"{""chin"": [[10, 22], [10, 25], [10, 28], [11, 3...",77 78 79 79 78 75 60 55 47 48 58 73 77 79 57 5...,0,10,0,0,0,0,0,0,0,"[[[10, 19], [11, 18], [13, 17], [15, 17], [17,...","[[[0.0, 1.0], [0.14285714285714285, 0.5], [0.4..."
8,8,"{""chin"": [[3, 20], [4, 25], [6, 29], [8, 34], ...",85 84 90 121 101 102 133 153 153 169 177 189 1...,0,10,0,0,0,0,0,0,0,"[[[5, 17], [7, 15], [10, 14], [13, 15], [17, 1...","[[[0.0, 1.0], [0.16666666666666666, 0.33333333..."
9,9,"{""chin"": [[4, 20], [4, 26], [5, 31], [7, 37], ...",255 254 255 254 254 179 122 107 95 124 149 150...,0,0,6,0,0,0,4,0,0,"[[[6, 13], [9, 10], [12, 9], [15, 10], [19, 12...","[[[0.0, 1.0], [0.23076923076923078, 0.25], [0...."
10,10,"{""chin"": [[3, 18], [3, 24], [3, 30], [4, 36], ...",30 24 21 23 25 25 49 67 84 103 120 125 130 139...,2,0,0,0,8,0,0,0,0,"[[[5, 16], [9, 13], [14, 13], [18, 14], [23, 1...","[[[0.0, 1.0], [0.2222222222222222, 0.0], [0.5,..."
11,11,"{""chin"": [[4, 22], [5, 27], [7, 32], [9, 37], ...",39 75 78 58 58 45 49 48 103 156 81 45 41 38 49...,10,0,0,0,0,0,0,0,0,"[[[6, 17], [8, 14], [11, 13], [15, 13], [19, 1...","[[[0.0, 1.0], [0.15384615384615385, 0.25], [0...."


In [39]:
data_neutral = datas[datas['neutral'] >= 6]
data_happiness = datas[datas['happiness'] >= 6]
data_anger = datas[datas['anger'] >= 5]
print(len(data_neutral))
print(len(data_happiness))
print(len(data_anger))

6723
6870
1670


In [41]:
data_neutral['label'] = 0
data_happiness['label'] = 1
data_anger['label'] = 2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [45]:
data_learn_dataset = pd.concat([data_neutral, data_happiness, data_anger])[['label', 'features_norm']]
data_learn_dataset

Unnamed: 0,label,features_norm
1,0,"[[[0.0, 1.0], [0.15384615384615385, 0.33333333..."
4,0,"[[[0.0, 1.0], [0.21428571428571427, 0.33333333..."
5,0,"[[[0.0, 1.0], [0.15384615384615385, 0.4], [0.3..."
11,0,"[[[0.0, 1.0], [0.15384615384615385, 0.25], [0...."
13,0,"[[[0.0, 1.0], [0.23529411764705882, 0.25], [0...."
19,0,"[[[0.0, 1.0], [0.16666666666666666, 0.0], [0.3..."
31,0,"[[[0.0, 1.0], [0.2727272727272727, 0.5], [0.45..."
33,0,"[[[0.0, 1.0], [0.25, 0.3333333333333333], [0.5..."
35,0,"[[[0.0, 0.5], [0.23076923076923078, 0.0], [0.4..."
36,0,"[[[0.0, 1.0], [0.21428571428571427, 0.25], [0...."


In [46]:
with open('../data/lean_data.csv', 'w') as f:
    data_learn_dataset.to_csv(f, index=None)