<a href="https://colab.research.google.com/github/YNNJN/colorPool_Data/blob/master/%EB%8D%B0%EC%9D%B4%ED%84%B0%EC%A0%84%EC%B2%98%EB%A6%AC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 0. 데이터 로드 & 상위 데이터 추출

- COMPLETE : 각 배색에 대한 조화도 평점을 기준으로, 내림차순 정렬하여 상위 3000개를 데이터 프레임으로 추출 완료 !
    - 상위 3000개 데이터의 조화도 평균 평점 : 4.35, 표준편차 : 0.26
- TODO : 데이터프레임에 포함된 색채 각각을 색공간 내에서 가장 가까운 색채 표본으로 근사

In [None]:
!git clone https://github.com/YNNJN/colorPool_Data.git

In [None]:
!ls -ltr

In [None]:
# connect to google dirve

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

rgbData = pd.read_csv('gdrive/My Drive/colorPOOL/kulerData/kulerData_data.csv', names=['R1', 'R2', 'R3', 'R4', 'R5', 'G1', 'G2', 'G3', 'G4', 'G5', 'B1', 'B2', 'B3', 'B4', 'B5' ])
ratingData = pd.read_csv('gdrive/My Drive/colorPOOL/kulerData/kulerData_targets.csv', names=['rating'])

rgbRatingData = pd.concat([rgbData, ratingData], axis=1)

In [None]:
df = pd.DataFrame(rgbRatingData)
df_sorted = df.sort_values(by=['rating'], ascending=[False])
df_extracted = df_sorted[0:3000]
df_rgbExtracted = df_extracted.drop(df_extracted.columns[-1], axis=1)

df_rgbExtracted

# 1. 표본 색채 설정 & 색차 허용치 보정
- 표본 색채 설정 이유
    - Kuler에서 활용되는 색채들 중 서로 비슷한 색들을 근사하여 분석하기 위해, 인지적으로 균등한 표본 색채를 설정함
- 색차 허용치 보정 이유
    - 색차 표본간의 거리를 사람이 인식하는 색차와 균등하게 보정하기 위함

#### McDonald의 색차 계산식 이용
- 이유 : CIE L*a*b 색공간 내에서의 기하학적 거리 ≠ 사람의 인지적 색차
    - 방법 : 색차허용치 = 색공간 내 기하학적 거리/(1+0.022(채도값)) = 6.35
    - 의의 : 색채 표본간의 거리를 사람이 인식하는 색차와 균등하게 보정 완료
- 진행과정 :

## 1) 데이터 변환
- RGB → XYZ → CIE L*a*b

In [6]:
# 데이터프레임 구조 변환 3000*15 => 15000*3

df1=df_rgbExtracted.iloc[:,[0,5,10]].reset_index(drop=True).rename(columns={"R1":'R',"G1":'G',"B1":'B'})
df2=df_rgbExtracted.iloc[:,[1,6,11]].reset_index(drop=True).rename(columns={"R2":'R',"G2":'G',"B2":'B'})
df3=df_rgbExtracted.iloc[:,[2,7,12]].reset_index(drop=True).rename(columns={"R3":'R',"G3":'G',"B3":'B'})
df4=df_rgbExtracted.iloc[:,[3,8,13]].reset_index(drop=True).rename(columns={"R4":'R',"G4":'G',"B4":'B'})
df5=df_rgbExtracted.iloc[:,[4,9,14]].reset_index(drop=True).rename(columns={"R5":'R',"G5":'G',"B5":'B'})

df_all=pd.concat([df1,df2,df3,df4,df4], axis=0).reset_index(drop=True)
df_all

Unnamed: 0,R,G,B
0,0.43922,0.28627,0.172550
1,0.05098,0.05098,0.050980
2,0.93333,0.56863,0.058824
3,0.76863,0.00000,0.000000
4,1.00000,0.98431,0.839220
...,...,...,...
14995,0.34510,0.80000,0.623530
14996,0.15294,0.17255,0.188240
14997,0.47451,0.65098,0.090196
14998,0.94902,0.56863,0.239220


In [7]:
!pip install colormath

Collecting colormath
  Downloading https://files.pythonhosted.org/packages/ce/cf/70ea34103a76cc6fb1892289bda321cd0cc73b1a5500ee7fe9ef9f64acef/colormath-3.0.0.tar.gz
Building wheels for collected packages: colormath
  Building wheel for colormath (setup.py) ... [?25l[?25hdone
  Created wheel for colormath: filename=colormath-3.0.0-cp36-none-any.whl size=39392 sha256=adbac26c9def99c7a0e7e980d142e0c9687889a58db31087acf432d9d29ced58
  Stored in directory: /root/.cache/pip/wheels/20/49/7b/9635bccd6136cc713f623caa85ced89c0c63878f8b2a58fe99
Successfully built colormath
Installing collected packages: colormath
Successfully installed colormath-3.0.0


In [8]:
# RGB → XYZ

from colormath.color_objects import XYZColor, sRGBColor
from colormath.color_conversions import convert_color

x=[]
y=[]
z=[]

for i in range(len(df_all)):
    rgb = sRGBColor(df_all['R'][i], df_all['G'][i], df_all['B'][i])
    xyz = convert_color(rgb, XYZColor, target_illuminant='d50') 
    x.append(xyz.xyz_x)
    y.append(xyz.xyz_y)
    z.append(xyz.xyz_z)

In [9]:
xyz=pd.DataFrame({"X":x,"Y":y,"Z":z})
xyz.head()

Unnamed: 0,X,Y,Z
0,0.099912,0.085339,0.026717
1,0.003881,0.004025,0.003322
2,0.482525,0.3935,0.042819
3,0.240701,0.122816,0.00769
4,0.903736,0.954817,0.587924


In [10]:
!pip install -q colour-science

[K     |████████████████████████████████| 1.6MB 2.9MB/s 
[?25h

In [11]:
# XYZ → CIE Lab

import colour

l=[]
a=[]
b=[]

for i in range(len(xyz)):
    lab = colour.XYZ_to_Lab([xyz['X'][i], xyz['Y'][i], xyz['Z'][i]])

    l.append(lab[0])
    a.append(lab[1])
    b.append(lab[2])

In [12]:
lab = pd.DataFrame({"L":l,"a":a,"b":b})
lab

Unnamed: 0,L,a,b
0,35.070886,15.841340,29.939625
1,3.635469,0.226806,1.517953
2,69.004011,32.474404,78.551124
3,41.660251,67.802908,60.830670
4,98.225945,-0.683832,34.091692
...,...,...,...
14995,74.546928,-40.763407,24.887424
14996,17.627638,-0.797361,2.030083
14997,63.046363,-28.491513,66.729849
14998,69.708712,34.969050,66.456883


## 2) 컬러 데이터 양자화


In [13]:
# quantize

def quant(x):
    if x % 5 == 0:
        return int(x)
    else:
        return int(int((x + 5) / 5) * 5)

quant_l = []
quant_a = []
quant_b = []

for i in range(len(lab)):
    quant_l.append(quant(lab.L[i]))
    quant_a.append(quant(lab.a[i]))
    quant_b.append(quant(lab.b[i]))

In [14]:
quant_lab = pd.DataFrame({"L" : quant_l, "a" : quant_a, "b": quant_b})
quant_lab

Unnamed: 0,L,a,b
0,40,20,30
1,5,5,5
2,70,35,80
3,45,70,65
4,100,0,35
...,...,...,...
14995,75,-35,25
14996,20,0,5
14997,65,-20,70
14998,70,35,70


In [15]:
# 적정 dEh 값과 비교하기 위한 테스트 진행
# Cab = (a ** 2 + b ** 2) ** 1/2
# dEh = dE / (1 + 0.022 * Cab)

dE = []

for i in range(len(lab)):
    previous = [lab.L[i], lab.a[i], lab.b[i]]
    next = [quant_lab.L[i], quant_lab.a[i], quant_lab.b[i],]

    de = colour.delta_E(previous, next, method='CIE 2000')
    dE.append(de)

In [16]:
cab = []

for i in range(len(lab)):
    cal = (lab.a[i] ** 2 + lab.b[i] ** 2) ** 1/2
    cab.append(cal)

In [18]:
tmp = pd.DataFrame({"dE": dE, "Cab" : cab})
tmp

Unnamed: 0,dE,Cab
0,5.069017,573.664600
1,6.630312,1.177812
2,1.347346,3612.433003
3,3.300368,4148.802352
4,1.208111,581.355545
...,...,...
14995,2.321504,1140.519609
14996,3.280936,2.378510
14997,4.923121,2632.319556
14998,1.198079,2819.675865


In [19]:
deh = []
for i in range(len(tmp)):
    cal = tmp.dE[i] / (1 + 0.022 * tmp.Cab[i])
    deh.append(cal)

In [21]:
deh = pd.DataFrame(data=deh, columns=['dEh'])
deh

Unnamed: 0,dEh
0,0.372158
1,6.462847
2,0.016743
3,0.035767
4,0.087609
...,...
14995,0.088976
14996,3.117790
14997,0.083569
14998,0.019007


In [22]:
# 평균
deh.mean() # 0.914265

dEh    0.914265
dtype: float64

In [23]:
# 분산
deh.var() # 1.934336

dEh    1.934336
dtype: float64