In [None]:
# I used code provided in Karan Bhanot's Medium article below to extract color in this notebook
# (https://towardsdatascience.com/color-identification-in-images-machine-learning-application-b26e770c4c71)

### Imports

In [1]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np
import cv2
from collections import Counter
from skimage.color import rgb2lab, deltaE_cie76
import os
import time
from time import sleep
from tqdm import tqdm

%matplotlib inline

In [2]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import argparse
import cv2
import numpy as np
import pandas as pd

### Collecting Images and RGB Values with CV2

In [3]:
def get_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

In [4]:
IMAGE_DIRECTORY = 'images'

images = []

for file in os.listdir(IMAGE_DIRECTORY):
    if not file.startswith('.'):
        image_dict = {}
        image_dict['array'] = get_image(os.path.join(IMAGE_DIRECTORY, file))
        image_dict['name'] = file 
        images.append(image_dict)

In [5]:
images_df = pd.DataFrame(images)

In [6]:
len(images_df)

2078

In [7]:
images_df.head()

Unnamed: 0,array,name
0,"[[[238, 240, 219], [244, 246, 225], [250, 252,...",january 2019 .jpg
1,"[[[167, 150, 132], [178, 161, 143], [190, 173,...",19080102.jpg
2,"[[[206, 191, 160], [206, 191, 160], [205, 190,...",19080109.jpg
3,"[[[58, 93, 86], [55, 90, 83], [53, 86, 79], [5...",19080116.jpg
4,"[[[216, 205, 177], [218, 207, 179], [221, 210,...",19080123.jpg


In [8]:
def image_reshape(image_list): 
    for i in range(len(image_list)): 
        image_list[i] = image_list[i].reshape((image_list[i].shape[0] * image_list[i].shape[1], 3))
    return image_list

In [9]:
image_reshape(images_df['array'])

0       [[238, 240, 219], [244, 246, 225], [250, 252, ...
1       [[167, 150, 132], [178, 161, 143], [190, 173, ...
2       [[206, 191, 160], [206, 191, 160], [205, 190, ...
3       [[58, 93, 86], [55, 90, 83], [53, 86, 79], [52...
4       [[216, 205, 177], [218, 207, 179], [221, 210, ...
                              ...                        
2073    [[248, 251, 234], [233, 236, 219], [229, 233, ...
2074    [[216, 198, 184], [216, 198, 184], [216, 198, ...
2075    [[121, 110, 108], [121, 110, 108], [121, 110, ...
2076    [[163, 153, 152], [163, 153, 152], [169, 161, ...
2077    [[84, 79, 73], [84, 79, 73], [84, 79, 73], [84...
Name: array, Length: 2078, dtype: object

In [10]:
images_df['array'][2000].shape

(125580, 3)

In [11]:
images_df.shape

(2078, 2)

### Pulling Color from an Image with KMeans

In [12]:
my_list = list(range(100))

# Code adapted from Karan Bhanot's function in "Color Identification in Images" Medium post

# using 8 clusters since it returns the most unique colors for each image 

def get_colors(image_list):
    for x in range(len(image_list)):
        clf = KMeans(n_clusters = 8)
        labels = clf.fit_predict(image_list[x])
        counts = Counter(labels)
        center_colors = clf.cluster_centers_
        
        frequency = list(Counter(labels).values())
    
        total_pixels = image_list[x].shape[0]*image_list[x].shape[1]
        
    # get ordered colors by iterating through the keys
        ordered_colors = [center_colors[i] for i in counts.keys()]
        rgb_colors = [ordered_colors[i] for i in counts.keys()]
    
    # progress bar
        with tqdm(total=len(my_list)) as pbar:
            for i in my_list:
                pbar.update(1)

    return rgb_colors, frequency

In [13]:
out3 = [get_colors([e]) for e in images_df['array']]

100%|██████████| 100/100 [00:00<00:00, 69304.43it/s]
100%|██████████| 100/100 [00:00<00:00, 102877.21it/s]
100%|██████████| 100/100 [00:00<00:00, 169466.83it/s]
100%|██████████| 100/100 [00:00<00:00, 132104.06it/s]
100%|██████████| 100/100 [00:00<00:00, 142663.40it/s]
100%|██████████| 100/100 [00:00<00:00, 173318.35it/s]
100%|██████████| 100/100 [00:00<00:00, 213668.06it/s]
100%|██████████| 100/100 [00:00<00:00, 152243.34it/s]
100%|██████████| 100/100 [00:00<00:00, 168108.38it/s]
100%|██████████| 100/100 [00:00<00:00, 87636.94it/s]
100%|██████████| 100/100 [00:00<00:00, 156737.82it/s]
100%|██████████| 100/100 [00:00<00:00, 110609.28it/s]
100%|██████████| 100/100 [00:00<00:00, 187580.68it/s]
100%|██████████| 100/100 [00:00<00:00, 130704.39it/s]
100%|██████████| 100/100 [00:00<00:00, 154771.37it/s]
100%|██████████| 100/100 [00:00<00:00, 87199.67it/s]
100%|██████████| 100/100 [00:00<00:00, 8441.79it/s]
100%|██████████| 100/100 [00:00<00:00, 151747.61it/s]
100%|██████████| 100/100 [00:00<0

100%|██████████| 100/100 [00:00<00:00, 203212.40it/s]
100%|██████████| 100/100 [00:00<00:00, 123616.39it/s]
100%|██████████| 100/100 [00:00<00:00, 357266.10it/s]
100%|██████████| 100/100 [00:00<00:00, 298314.65it/s]
100%|██████████| 100/100 [00:00<00:00, 183799.47it/s]
100%|██████████| 100/100 [00:00<00:00, 373158.72it/s]
100%|██████████| 100/100 [00:00<00:00, 217321.45it/s]
100%|██████████| 100/100 [00:00<00:00, 214652.20it/s]
100%|██████████| 100/100 [00:00<00:00, 129174.75it/s]
100%|██████████| 100/100 [00:00<00:00, 115323.18it/s]
100%|██████████| 100/100 [00:00<00:00, 161257.36it/s]
100%|██████████| 100/100 [00:00<00:00, 206717.79it/s]
100%|██████████| 100/100 [00:00<00:00, 127563.99it/s]
100%|██████████| 100/100 [00:00<00:00, 349816.85it/s]
100%|██████████| 100/100 [00:00<00:00, 308858.91it/s]
100%|██████████| 100/100 [00:00<00:00, 183157.38it/s]
100%|██████████| 100/100 [00:00<00:00, 220289.08it/s]
100%|██████████| 100/100 [00:00<00:00, 129214.54it/s]
100%|██████████| 100/100 [00

100%|██████████| 100/100 [00:00<00:00, 119021.11it/s]
100%|██████████| 100/100 [00:00<00:00, 196178.86it/s]
100%|██████████| 100/100 [00:00<00:00, 123144.57it/s]
100%|██████████| 100/100 [00:00<00:00, 115609.26it/s]
100%|██████████| 100/100 [00:00<00:00, 60358.38it/s]
100%|██████████| 100/100 [00:00<00:00, 133321.81it/s]
100%|██████████| 100/100 [00:00<00:00, 100921.66it/s]
100%|██████████| 100/100 [00:00<00:00, 116573.21it/s]
100%|██████████| 100/100 [00:00<00:00, 339894.98it/s]
100%|██████████| 100/100 [00:00<00:00, 296626.87it/s]
100%|██████████| 100/100 [00:00<00:00, 173965.33it/s]
100%|██████████| 100/100 [00:00<00:00, 214982.27it/s]
100%|██████████| 100/100 [00:00<00:00, 82989.79it/s]
100%|██████████| 100/100 [00:00<00:00, 292285.99it/s]
100%|██████████| 100/100 [00:00<00:00, 30700.51it/s]
100%|██████████| 100/100 [00:00<00:00, 236432.02it/s]
100%|██████████| 100/100 [00:00<00:00, 147530.92it/s]
100%|██████████| 100/100 [00:00<00:00, 403298.46it/s]
100%|██████████| 100/100 [00:00

100%|██████████| 100/100 [00:00<00:00, 99015.68it/s]
100%|██████████| 100/100 [00:00<00:00, 229950.88it/s]
100%|██████████| 100/100 [00:00<00:00, 173318.35it/s]
100%|██████████| 100/100 [00:00<00:00, 184527.23it/s]
100%|██████████| 100/100 [00:00<00:00, 166176.86it/s]
100%|██████████| 100/100 [00:00<00:00, 147065.36it/s]
100%|██████████| 100/100 [00:00<00:00, 181414.53it/s]
100%|██████████| 100/100 [00:00<00:00, 286300.61it/s]
100%|██████████| 100/100 [00:00<00:00, 19223.17it/s]
100%|██████████| 100/100 [00:00<00:00, 151309.67it/s]
100%|██████████| 100/100 [00:00<00:00, 115196.48it/s]
100%|██████████| 100/100 [00:00<00:00, 109056.27it/s]
100%|██████████| 100/100 [00:00<00:00, 103435.36it/s]
100%|██████████| 100/100 [00:00<00:00, 139345.65it/s]
100%|██████████| 100/100 [00:00<00:00, 134089.00it/s]
100%|██████████| 100/100 [00:00<00:00, 245712.01it/s]
100%|██████████| 100/100 [00:00<00:00, 139484.67it/s]
100%|██████████| 100/100 [00:00<00:00, 387643.62it/s]
100%|██████████| 100/100 [00:0

100%|██████████| 100/100 [00:00<00:00, 106077.49it/s]
100%|██████████| 100/100 [00:00<00:00, 371835.46it/s]
100%|██████████| 100/100 [00:00<00:00, 133704.30it/s]
100%|██████████| 100/100 [00:00<00:00, 206615.96it/s]
100%|██████████| 100/100 [00:00<00:00, 142614.89it/s]
100%|██████████| 100/100 [00:00<00:00, 186579.36it/s]
100%|██████████| 100/100 [00:00<00:00, 137518.16it/s]
100%|██████████| 100/100 [00:00<00:00, 236432.02it/s]
100%|██████████| 100/100 [00:00<00:00, 171265.99it/s]
100%|██████████| 100/100 [00:00<00:00, 317509.77it/s]
100%|██████████| 100/100 [00:00<00:00, 188592.81it/s]
100%|██████████| 100/100 [00:00<00:00, 377185.61it/s]
100%|██████████| 100/100 [00:00<00:00, 176305.34it/s]
100%|██████████| 100/100 [00:00<00:00, 378547.29it/s]
100%|██████████| 100/100 [00:00<00:00, 274676.10it/s]
100%|██████████| 100/100 [00:00<00:00, 192752.94it/s]
100%|██████████| 100/100 [00:00<00:00, 158037.08it/s]
100%|██████████| 100/100 [00:00<00:00, 207844.60it/s]
100%|██████████| 100/100 [00

100%|██████████| 100/100 [00:00<00:00, 267323.39it/s]
100%|██████████| 100/100 [00:00<00:00, 354548.10it/s]
100%|██████████| 100/100 [00:00<00:00, 408006.23it/s]
100%|██████████| 100/100 [00:00<00:00, 358487.52it/s]
100%|██████████| 100/100 [00:00<00:00, 81888.01it/s]
100%|██████████| 100/100 [00:00<00:00, 147738.78it/s]
100%|██████████| 100/100 [00:00<00:00, 119666.31it/s]
100%|██████████| 100/100 [00:00<00:00, 33088.55it/s]
100%|██████████| 100/100 [00:00<00:00, 105067.74it/s]
100%|██████████| 100/100 [00:00<00:00, 358487.52it/s]
100%|██████████| 100/100 [00:00<00:00, 171546.18it/s]
100%|██████████| 100/100 [00:00<00:00, 137158.40it/s]
100%|██████████| 100/100 [00:00<00:00, 110115.62it/s]
100%|██████████| 100/100 [00:00<00:00, 174544.49it/s]
100%|██████████| 100/100 [00:00<00:00, 124830.48it/s]
100%|██████████| 100/100 [00:00<00:00, 257793.73it/s]
100%|██████████| 100/100 [00:00<00:00, 126258.40it/s]
100%|██████████| 100/100 [00:00<00:00, 54001.60it/s]
100%|██████████| 100/100 [00:00

100%|██████████| 100/100 [00:00<00:00, 219138.14it/s]
100%|██████████| 100/100 [00:00<00:00, 119054.90it/s]
100%|██████████| 100/100 [00:00<00:00, 308631.64it/s]
100%|██████████| 100/100 [00:00<00:00, 299593.14it/s]
100%|██████████| 100/100 [00:00<00:00, 126106.55it/s]
100%|██████████| 100/100 [00:00<00:00, 176157.24it/s]
100%|██████████| 100/100 [00:00<00:00, 399457.52it/s]
100%|██████████| 100/100 [00:00<00:00, 123762.29it/s]
100%|██████████| 100/100 [00:00<00:00, 236298.82it/s]
100%|██████████| 100/100 [00:00<00:00, 111491.33it/s]
100%|██████████| 100/100 [00:00<00:00, 352166.58it/s]
100%|██████████| 100/100 [00:00<00:00, 131236.05it/s]
100%|██████████| 100/100 [00:00<00:00, 94786.53it/s]
100%|██████████| 100/100 [00:00<00:00, 296626.87it/s]
100%|██████████| 100/100 [00:00<00:00, 392357.72it/s]
100%|██████████| 100/100 [00:00<00:00, 213233.55it/s]
100%|██████████| 100/100 [00:00<00:00, 349816.85it/s]
100%|██████████| 100/100 [00:00<00:00, 256532.35it/s]
100%|██████████| 100/100 [00:

In [14]:
len(out3)

2078

In [15]:
out3 # looking at output - confirming both rbg arrays and frequencies were collected 

[([array([130.36040468, 123.82819751, 118.64785595]),
   array([219.47058039, 218.99292862, 212.37811875]),
   array([109.11345776, 100.61777996,  94.01252456]),
   array([196.09892931, 195.23805648, 189.31143948]),
   array([156.16064505, 152.11596757, 146.35909411]),
   array([252.31422647, 252.88033317, 245.36761088]),
   array([176.65623047, 175.48281152, 170.20387522]),
   array([89.81883617, 80.88128548, 74.19028792])],
  [14990, 28454, 17599, 15971, 10775, 11353, 20354, 9864]),
 ([array([157.54506376, 146.45307351, 132.70275111]),
   array([120.19886549, 112.50335789, 102.73991002]),
   array([179.28076383, 164.03306693, 145.5309011 ]),
   array([206.49659783, 193.03576232, 176.38187357]),
   array([105.4711716 ,  98.69625881,  89.78859175]),
   array([90.50604078, 82.70953939, 72.87805185]),
   array([138.64376692, 128.76317465, 116.55524598]),
   array([192.98350525, 178.92697778, 161.38728586])],
  [14000, 16424, 21932, 25278, 15337, 12562, 14621, 7946]),
 ([array([122.286525

In [16]:
out3[0]

([array([130.36040468, 123.82819751, 118.64785595]),
  array([219.47058039, 218.99292862, 212.37811875]),
  array([109.11345776, 100.61777996,  94.01252456]),
  array([196.09892931, 195.23805648, 189.31143948]),
  array([156.16064505, 152.11596757, 146.35909411]),
  array([252.31422647, 252.88033317, 245.36761088]),
  array([176.65623047, 175.48281152, 170.20387522]),
  array([89.81883617, 80.88128548, 74.19028792])],
 [14990, 28454, 17599, 15971, 10775, 11353, 20354, 9864])

In [17]:
# Will Sutton, Data Science instructor at General Assembly, provided the code/loop below 

# this loop will flatten and and create a list of RGB values and frequencies for each image 
# this makes it easier to put the data for each image into it's own row 

tmp = []
for elem in out3:
    z = [(a,b) for a,b in zip(elem[0], elem[1])]
    z.sort(key=lambda x:x[1], reverse=True)
    tmp2 = []
    for subitem in z:
        newz = subitem[0].tolist()
        newz.append(subitem[1])
        tmp2.append(newz)
    tmp2 = np.array(tmp2).flatten().tolist()
    tmp.append(tmp2)

In [18]:
tmp  # confirming the loop output is correct 

[[219.47058038692722,
  218.99292861906915,
  212.37811874581902,
  28454.0,
  176.65623046764003,
  175.4828115233817,
  170.20387522018325,
  20354.0,
  109.11345776034221,
  100.61777996069537,
  94.01252455795463,
  17599.0,
  196.0989293093686,
  195.23805647735776,
  189.31143948406518,
  15971.0,
  130.36040467792114,
  123.82819751252052,
  118.64785594950703,
  14990.0,
  252.31422647082698,
  252.88033316932695,
  245.3676108807358,
  11353.0,
  156.1606450475823,
  152.11596757137235,
  146.35909411350048,
  10775.0,
  89.81883617195352,
  80.88128548255185,
  74.19028791562778,
  9864.0],
 [206.49659783210325,
  193.03576232297613,
  176.38187356596404,
  25278.0,
  179.28076383380863,
  164.0330669269709,
  145.53090110426842,
  21932.0,
  120.1988654886841,
  112.50335789267655,
  102.73991002151132,
  16424.0,
  105.47117160248604,
  98.69625880582595,
  89.7885917515921,
  15337.0,
  138.64376691609348,
  128.76317465371633,
  116.55524597994244,
  14621.0,
  157.545063

In [19]:
tmp[0]

[219.47058038692722,
 218.99292861906915,
 212.37811874581902,
 28454.0,
 176.65623046764003,
 175.4828115233817,
 170.20387522018325,
 20354.0,
 109.11345776034221,
 100.61777996069537,
 94.01252455795463,
 17599.0,
 196.0989293093686,
 195.23805647735776,
 189.31143948406518,
 15971.0,
 130.36040467792114,
 123.82819751252052,
 118.64785594950703,
 14990.0,
 252.31422647082698,
 252.88033316932695,
 245.3676108807358,
 11353.0,
 156.1606450475823,
 152.11596757137235,
 146.35909411350048,
 10775.0,
 89.81883617195352,
 80.88128548255185,
 74.19028791562778,
 9864.0]

### Creating DataFrame for RGB values and Frequencies

In [20]:
# Will Sutton, Data Science instructor at General Assembly, provided the code/loop below 

# this function will put each r,g,b, and frequency (f) value into it's own column 

k=8
def cc3(i):
    if (i+0) % 4 == 0:
        return 'r'
    elif (i+3) % 4 == 0:
        return 'g'
    elif (i+2) % 4 ==0:
        return 'b'
    elif (i+1) % 4 == 0:
        return 'f'
    
cols = [cc3(i) + str(int(i/4)) for i in range(k*4)]
cols

['r0',
 'g0',
 'b0',
 'f0',
 'r1',
 'g1',
 'b1',
 'f1',
 'r2',
 'g2',
 'b2',
 'f2',
 'r3',
 'g3',
 'b3',
 'f3',
 'r4',
 'g4',
 'b4',
 'f4',
 'r5',
 'g5',
 'b5',
 'f5',
 'r6',
 'g6',
 'b6',
 'f6',
 'r7',
 'g7',
 'b7',
 'f7']

In [21]:
df = pd.DataFrame(tmp, columns=cols)

In [22]:
df.head() # confirming the output for the column names is correct 

Unnamed: 0,r0,g0,b0,f0,r1,g1,b1,f1,r2,g2,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
0,219.47058,218.992929,212.378119,28454.0,176.65623,175.482812,170.203875,20354.0,109.113458,100.61778,...,245.367611,11353.0,156.160645,152.115968,146.359094,10775.0,89.818836,80.881285,74.190288,9864.0
1,206.496598,193.035762,176.381874,25278.0,179.280764,164.033067,145.530901,21932.0,120.198865,112.503358,...,132.702751,14000.0,90.506041,82.709539,72.878052,12562.0,192.983505,178.926978,161.387286,7946.0
2,213.239401,203.939856,175.851195,26391.0,122.286526,119.561368,105.70179,20687.0,198.432826,187.972332,...,88.985173,13387.0,228.067468,219.855147,193.5713,12394.0,83.772314,80.510716,67.194656,6812.0
3,44.41053,71.157669,61.167589,30717.0,223.275022,234.645936,224.750355,24177.0,200.461389,212.325185,...,104.050577,10787.0,58.619696,88.030446,78.186187,10527.0,184.428664,196.321197,186.983537,9828.0
4,103.161347,103.430537,96.155619,32433.0,200.08924,198.532863,183.145113,20101.0,239.208429,236.909643,...,118.174323,9586.0,151.909542,151.541018,140.562824,8931.0,77.621831,77.185074,69.36098,6981.0


In [23]:
df.shape

(2078, 32)

In [25]:
df.isnull().sum() 

r0    0
g0    0
b0    0
f0    0
r1    0
g1    0
b1    0
f1    0
r2    0
g2    0
b2    0
f2    0
r3    0
g3    0
b3    0
f3    0
r4    0
g4    0
b4    0
f4    0
r5    0
g5    0
b5    0
f5    0
r6    0
g6    0
b6    0
f6    0
r7    0
g7    0
b7    0
f7    0
dtype: int64

In [24]:
df.to_csv('images_final.csv', index=False) # saving the new dataframe to a csv

In [27]:
df_out3 = pd.DataFrame(out3)  # saving the initial out3 ouput as a csv for safe keeping 

In [28]:
df_out3.head()

Unnamed: 0,0,1
0,"[[130.36040467792114, 123.82819751252052, 118....","[14990, 28454, 17599, 15971, 10775, 11353, 203..."
1,"[[157.5450637627164, 146.45307350623258, 132.7...","[14000, 16424, 21932, 25278, 15337, 12562, 146..."
2,"[[122.2865258870062, 119.56136777788446, 105.7...","[20687, 26391, 12394, 13387, 18658, 14170, 168..."
3,"[[200.46138892338664, 212.32518509326104, 202....","[16509, 10527, 9828, 10787, 12458, 30717, 2417..."
4,"[[151.90954213372774, 151.541018348121, 140.56...","[8931, 32433, 20101, 9586, 17931, 14323, 16974..."


In [29]:
df_out3.to_csv('df_out3', index=False)

### Creating DataFrame for 1950-present

In [None]:
# creating a dataframe with a smaller subset of the data so I can start testing out models/themes
# I will eventually want to apply my work on this data to the large df dataset 

In [31]:
data = pd.read_csv('./all_images_combined_2.csv')

In [34]:
data.head()

Unnamed: 0,date,r0,g0,b0,r1,g1,b1,r2,g2,b2,...,b5,r6,g6,b6,r7,g7,b7,month,year,name
0,2019-01-01,89,80,73,108,100,93,129,122,117,...,189,219,219,212,252,252,245,1,2019,january 2019 .jpg
1,1908-01-02,91,83,73,107,100,91,123,115,105,...,146,193,179,162,206,193,176,1,1908,19080102.jpg
2,1908-01-09,83,80,66,101,99,88,121,119,105,...,158,212,203,175,227,219,193,1,1908,19080109.jpg
3,1908-01-16,45,73,63,61,89,79,97,116,105,...,187,200,212,202,223,234,224,1,1908,19080116.jpg
4,1908-01-23,77,77,69,103,103,96,126,126,118,...,183,227,222,201,239,236,218,1,1908,19080123.jpg


In [39]:
data['date'].head()

0    2019-01-01
1    1908-01-02
2    1908-01-09
3    1908-01-16
4    1908-01-23
Name: date, dtype: object

In [41]:
img_info = data[['date', 'month', 'year', 'name']]

In [42]:
img_info.head()

Unnamed: 0,date,month,year,name
0,2019-01-01,1,2019,january 2019 .jpg
1,1908-01-02,1,1908,19080102.jpg
2,1908-01-09,1,1908,19080109.jpg
3,1908-01-16,1,1908,19080116.jpg
4,1908-01-23,1,1908,19080123.jpg


In [45]:
df_all = pd.concat([img_info, df], axis=1)

In [46]:
df_all.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
0,2019-01-01,1,2019,january 2019 .jpg,219.47058,218.992929,212.378119,28454.0,176.65623,175.482812,...,245.367611,11353.0,156.160645,152.115968,146.359094,10775.0,89.818836,80.881285,74.190288,9864.0
1,1908-01-02,1,1908,19080102.jpg,206.496598,193.035762,176.381874,25278.0,179.280764,164.033067,...,132.702751,14000.0,90.506041,82.709539,72.878052,12562.0,192.983505,178.926978,161.387286,7946.0
2,1908-01-09,1,1908,19080109.jpg,213.239401,203.939856,175.851195,26391.0,122.286526,119.561368,...,88.985173,13387.0,228.067468,219.855147,193.5713,12394.0,83.772314,80.510716,67.194656,6812.0
3,1908-01-16,1,1908,19080116.jpg,44.41053,71.157669,61.167589,30717.0,223.275022,234.645936,...,104.050577,10787.0,58.619696,88.030446,78.186187,10527.0,184.428664,196.321197,186.983537,9828.0
4,1908-01-23,1,1908,19080123.jpg,103.161347,103.430537,96.155619,32433.0,200.08924,198.532863,...,118.174323,9586.0,151.909542,151.541018,140.562824,8931.0,77.621831,77.185074,69.36098,6981.0


In [47]:
df_final = pd.concat([img_info, df], axis=1)

In [49]:
df_final.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
0,2019-01-01,1,2019,january 2019 .jpg,219.47058,218.992929,212.378119,28454.0,176.65623,175.482812,...,245.367611,11353.0,156.160645,152.115968,146.359094,10775.0,89.818836,80.881285,74.190288,9864.0
1,1908-01-02,1,1908,19080102.jpg,206.496598,193.035762,176.381874,25278.0,179.280764,164.033067,...,132.702751,14000.0,90.506041,82.709539,72.878052,12562.0,192.983505,178.926978,161.387286,7946.0
2,1908-01-09,1,1908,19080109.jpg,213.239401,203.939856,175.851195,26391.0,122.286526,119.561368,...,88.985173,13387.0,228.067468,219.855147,193.5713,12394.0,83.772314,80.510716,67.194656,6812.0
3,1908-01-16,1,1908,19080116.jpg,44.41053,71.157669,61.167589,30717.0,223.275022,234.645936,...,104.050577,10787.0,58.619696,88.030446,78.186187,10527.0,184.428664,196.321197,186.983537,9828.0
4,1908-01-23,1,1908,19080123.jpg,103.161347,103.430537,96.155619,32433.0,200.08924,198.532863,...,118.174323,9586.0,151.909542,151.541018,140.562824,8931.0,77.621831,77.185074,69.36098,6981.0


In [51]:
df_final.drop(df.index[0], inplace=True)

In [52]:
df_final.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
1,1908-01-02,1,1908,19080102.jpg,206.496598,193.035762,176.381874,25278.0,179.280764,164.033067,...,132.702751,14000.0,90.506041,82.709539,72.878052,12562.0,192.983505,178.926978,161.387286,7946.0
2,1908-01-09,1,1908,19080109.jpg,213.239401,203.939856,175.851195,26391.0,122.286526,119.561368,...,88.985173,13387.0,228.067468,219.855147,193.5713,12394.0,83.772314,80.510716,67.194656,6812.0
3,1908-01-16,1,1908,19080116.jpg,44.41053,71.157669,61.167589,30717.0,223.275022,234.645936,...,104.050577,10787.0,58.619696,88.030446,78.186187,10527.0,184.428664,196.321197,186.983537,9828.0
4,1908-01-23,1,1908,19080123.jpg,103.161347,103.430537,96.155619,32433.0,200.08924,198.532863,...,118.174323,9586.0,151.909542,151.541018,140.562824,8931.0,77.621831,77.185074,69.36098,6981.0
5,1908-01-30,1,1908,19080130.jpg,219.37173,214.733313,190.633343,41602.0,137.356591,134.308191,...,206.513215,8093.0,192.100053,188.77372,166.756517,7241.0,251.423465,250.216192,224.55543,5725.0


In [53]:
df_final.shape

(2077, 36)

In [61]:
df_final['year'].value_counts()

1908    53
1909    51
1910    28
1929    26
1930    26
        ..
2016    12
2017    12
1973    12
1975    12
2019     7
Name: year, Length: 112, dtype: int64

In [67]:
df_1950 = df_final.drop(df_final[df_final['year'] < 1950 ].index)

In [68]:
df_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
1059,1950-01-01,1,1950,19500101.jpg,131.541414,131.055556,120.750505,56603.0,211.670159,211.204689,...,150.231408,3956.0,200.125817,199.735859,191.718527,3812.0,164.49558,100.605108,94.752456,2036.0
1060,1950-02-01,2,1950,19500201.jpg,164.679724,153.260183,126.485867,48448.0,225.865464,219.980391,...,161.55667,4218.0,233.019585,227.780454,215.797791,3486.0,144.474756,71.609868,54.158348,1089.0
1061,1950-02-15,2,1950,19500215.jpg,102.84535,83.28602,52.787148,47713.0,156.747323,141.456103,...,97.857501,8399.0,204.126569,188.173915,153.594262,4733.0,188.914591,173.856003,139.809697,4670.0
1062,1950-03-01,3,1950,19500301.jpg,13.191093,12.109306,18.190105,46334.0,197.867815,184.078241,...,32.34933,3673.0,218.561515,204.202892,189.265602,3082.0,85.108513,70.892369,62.384208,2270.0
1063,1950-03-15,3,1950,19500315.jpg,127.160136,113.416471,108.501408,31224.0,170.853608,151.210379,...,150.363928,8525.0,209.500128,200.652069,191.812448,5916.0,86.422988,77.405029,76.355166,4727.0


In [69]:
df_1950.shape

(1019, 36)

In [70]:
df_1950.dtypes

date      object
month      int64
year       int64
name      object
r0       float64
g0       float64
b0       float64
f0       float64
r1       float64
g1       float64
b1       float64
f1       float64
r2       float64
g2       float64
b2       float64
f2       float64
r3       float64
g3       float64
b3       float64
f3       float64
r4       float64
g4       float64
b4       float64
f4       float64
r5       float64
g5       float64
b5       float64
f5       float64
r6       float64
g6       float64
b6       float64
f6       float64
r7       float64
g7       float64
b7       float64
f7       float64
dtype: object

In [77]:
df_1950[df_1950['year'] == 1951]

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
1079,1951-01-01,1,1951,19510101.jpg,119.527829,117.834251,109.62263,52604.0,204.473499,198.948225,...,28.732083,1795.0,216.917045,212.296011,200.301173,1635.0,173.48301,116.032767,79.678398,822.0
1080,1951-02-01,2,1951,19510201.jpg,215.652166,203.127474,182.205792,57598.0,149.445924,127.882337,...,52.634092,3211.0,192.465894,171.052124,138.54601,3109.0,170.572674,149.403984,108.807345,1802.0
1081,1951-02-15,2,1951,19510215.jpg,218.76908,205.642654,182.533223,41090.0,45.318837,40.410946,...,191.731297,11309.0,78.553819,68.646645,60.330765,7876.0,169.79638,149.687618,122.12571,4093.0
1082,1951-03-01,3,1951,19510301.jpg,102.34145,120.888626,142.281592,44883.0,222.087225,198.650982,...,199.554243,6746.0,151.452676,134.679871,115.576843,5266.0,92.41207,92.260528,93.113731,4108.0
1083,1951-03-15,3,1951,19510315.jpg,174.392446,171.688635,160.057986,34484.0,89.636311,103.459661,...,127.680482,9032.0,192.083918,188.344686,174.74521,8566.0,205.495795,201.62861,188.655985,7354.0
1084,1951-04-01,4,1951,19510401.jpg,25.278815,27.019307,33.139387,49772.0,178.621606,145.729378,...,100.430376,8012.0,130.761006,100.794296,82.528844,6816.0,193.539855,160.291447,130.95704,2825.0
1085,1951-04-15,4,1951,19510415.jpg,186.053756,156.743927,94.15857,71192.0,159.638376,123.579838,...,100.819158,6026.0,31.68359,31.652485,18.771183,3403.0,211.277917,190.139713,138.173284,2798.0
1086,1951-05-01,5,1951,19510501.jpg,212.701814,185.135528,157.674968,44424.0,142.74533,121.771196,...,76.453225,6270.0,174.398962,148.865218,123.960296,4202.0,240.608506,227.852697,202.866174,3013.0
1087,1951-05-15,5,1951,19510515.jpg,222.480959,211.036326,201.41653,50102.0,96.176673,73.784645,...,146.333525,10542.0,46.709349,38.999073,39.110699,9993.0,130.930058,107.336676,89.234495,7555.0
1088,1951-06-01,6,1951,19510601.jpg,212.509594,197.935794,188.016486,36266.0,168.488869,85.549977,...,138.479632,11087.0,186.774125,106.261799,84.249225,7599.0,191.043364,146.259045,112.977915,6093.0


In [371]:
data_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b7,f7,color0,color1,color2,color3,color4,color5,color6,color7
0,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,94,2036,[131 131 120],[211 211 201],[45 45 45],[188 187 175],[100 83 78],[166 164 150],[200 199 191],[164 100 94]
1,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,54,1089,[164 153 126],[225 219 207],[132 122 92],[217 210 195],[46 50 81],[195 183 161],[233 227 215],[144 71 54]
2,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,139,4670,[102 83 52],[156 141 104],[62 56 42],[183 132 78],[141 108 64],[202 154 97],[204 188 153],[188 173 139]
3,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,62,2270,[13 12 18],[197 184 169],[210 196 180],[167 142 128],[131 109 95],[34 31 32],[218 204 189],[85 70 62]
4,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,76,4727,[127 113 108],[170 151 138],[198 190 180],[140 136 138],[37 38 43],[188 165 150],[209 200 191],[86 77 76]


In [117]:
df_1950[df_1950.columns[0:]].head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
1059,1950-01-01,1,1950,19500101.jpg,131.541414,131.055556,120.750505,56603.0,211.670159,211.204689,...,150.231408,3956.0,200.125817,199.735859,191.718527,3812.0,164.49558,100.605108,94.752456,2036.0
1060,1950-02-01,2,1950,19500201.jpg,164.679724,153.260183,126.485867,48448.0,225.865464,219.980391,...,161.55667,4218.0,233.019585,227.780454,215.797791,3486.0,144.474756,71.609868,54.158348,1089.0
1061,1950-02-15,2,1950,19500215.jpg,102.84535,83.28602,52.787148,47713.0,156.747323,141.456103,...,97.857501,8399.0,204.126569,188.173915,153.594262,4733.0,188.914591,173.856003,139.809697,4670.0
1062,1950-03-01,3,1950,19500301.jpg,13.191093,12.109306,18.190105,46334.0,197.867815,184.078241,...,32.34933,3673.0,218.561515,204.202892,189.265602,3082.0,85.108513,70.892369,62.384208,2270.0
1063,1950-03-15,3,1950,19500315.jpg,127.160136,113.416471,108.501408,31224.0,170.853608,151.210379,...,150.363928,8525.0,209.500128,200.652069,191.812448,5916.0,86.422988,77.405029,76.355166,4727.0


In [163]:
df_1950[df_1950.columns[4:]] = df_1950[df_1950.columns[4:]].astype(int)

In [164]:
df_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b5,f5,r6,g6,b6,f6,r7,g7,b7,f7
1059,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,150,3956,200,199,191,3812,164,100,94,2036
1060,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,161,4218,233,227,215,3486,144,71,54,1089
1061,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,97,8399,204,188,153,4733,188,173,139,4670
1062,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,32,3673,218,204,189,3082,85,70,62,2270
1063,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,150,8525,209,200,191,5916,86,77,76,4727


### Function to Collect Color Distance

In [331]:
df_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b7,f7,color0,color1,color2,color3,color4,color5,color6,color7
1059,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,94,2036,"[131, 131, 120]","[211, 211, 201]","[45, 45, 45]","[188, 187, 175]","[100, 83, 78]","[166, 164, 150]","[200, 199, 191]","[164, 100, 94]"
1060,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,54,1089,"[164, 153, 126]","[225, 219, 207]","[132, 122, 92]","[217, 210, 195]","[46, 50, 81]","[195, 183, 161]","[233, 227, 215]","[144, 71, 54]"
1061,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,139,4670,"[102, 83, 52]","[156, 141, 104]","[62, 56, 42]","[183, 132, 78]","[141, 108, 64]","[202, 154, 97]","[204, 188, 153]","[188, 173, 139]"
1062,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,62,2270,"[13, 12, 18]","[197, 184, 169]","[210, 196, 180]","[167, 142, 128]","[131, 109, 95]","[34, 31, 32]","[218, 204, 189]","[85, 70, 62]"
1063,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,76,4727,"[127, 113, 108]","[170, 151, 138]","[198, 190, 180]","[140, 136, 138]","[37, 38, 43]","[188, 165, 150]","[209, 200, 191]","[86, 77, 76]"


In [377]:
# use this one to get color from one array 

def dist1(array, colorlist): 
    r,g,b = array
    min_distance = 1_000_000_000
    for k, v in colorlist.items(): 
        _r,_g,_b = v
        distance = (((r - _r)**2) + ((g - _g)**2) + ((b - _b)**2))**.5 
        if distance < min_distance: 
            min_distance = distance 
            closest_color = k   
        
    return closest_color

In [373]:
def dist2(array, colorlist): 
    r,g,b = array
    min_distance = 1_000_000_000
    for k, v in colorlist.items(): 
        _r,_g,_b = v
        distance = (((r - _r)**2) + ((g - _g)**2) + ((b - _b)**2))**.5 
        if distance < min_distance: 
            min_distance = distance 
            closest_color = k 
        
        
    return min_distance

In [375]:
data_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b7,f7,color0,color1,color2,color3,color4,color5,color6,color7
0,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,94,2036,[131 131 120],[211 211 201],[45 45 45],[188 187 175],[100 83 78],[166 164 150],[200 199 191],[164 100 94]
1,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,54,1089,[164 153 126],[225 219 207],[132 122 92],[217 210 195],[46 50 81],[195 183 161],[233 227 215],[144 71 54]
2,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,139,4670,[102 83 52],[156 141 104],[62 56 42],[183 132 78],[141 108 64],[202 154 97],[204 188 153],[188 173 139]
3,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,62,2270,[13 12 18],[197 184 169],[210 196 180],[167 142 128],[131 109 95],[34 31 32],[218 204 189],[85 70 62]
4,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,76,4727,[127 113 108],[170 151 138],[198 190 180],[140 136 138],[37 38 43],[188 165 150],[209 200 191],[86 77 76]


In [381]:
df_1950['xkcd0'] = df_1950['color0'].apply(lambda x: dist1(x, colors_dict)) 
df_1950['xkcd1'] = df_1950['color1'].apply(lambda x: dist1(x, colors_dict)) 
df_1950['xkcd2'] = df_1950['color2'].apply(lambda x: dist1(x, colors_dict)) 
df_1950['xkcd3'] = df_1950['color3'].apply(lambda x: dist1(x, colors_dict)) 
df_1950['xkcd4'] = df_1950['color4'].apply(lambda x: dist1(x, colors_dict)) 
df_1950['xkcd5'] = df_1950['color5'].apply(lambda x: dist1(x, colors_dict)) 
df_1950['xkcd6'] = df_1950['color6'].apply(lambda x: dist1(x, colors_dict)) 
df_1950['xkcd7'] = df_1950['color7'].apply(lambda x: dist1(x, colors_dict)) 

In [382]:
df_1950['distance0'] = df_1950['color0'].apply(lambda x: dist2(x, colors_dict)) 
df_1950['distance1'] = df_1950['color1'].apply(lambda x: dist2(x, colors_dict)) 
df_1950['distance2'] = df_1950['color2'].apply(lambda x: dist2(x, colors_dict)) 
df_1950['distance3'] = df_1950['color3'].apply(lambda x: dist2(x, colors_dict)) 
df_1950['distance4'] = df_1950['color4'].apply(lambda x: dist2(x, colors_dict)) 
df_1950['distance5'] = df_1950['color5'].apply(lambda x: dist2(x, colors_dict)) 
df_1950['distance6'] = df_1950['color6'].apply(lambda x: dist2(x, colors_dict)) 
df_1950['distance7'] = df_1950['color7'].apply(lambda x: dist2(x, colors_dict)) 

In [379]:
df_1950['xkcd0'] = df_1950['color0'].apply(lambda x: dist1(x, colors_dict)) 

In [383]:
df_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,xkcd6,xkcd7,distance0,distance1,distance2,distance3,distance4,distance5,distance6,distance7
1059,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,LIGHT PINK,DARK PINK,98.534258,49.234135,43.289722,85.305334,91.618775,90.60905,64.412732,53.99074
1060,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,LIGHT PINK,DARK PINK,97.061836,35.44009,92.276758,47.212287,48.435524,86.469648,29.529646,79.536155
1061,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,LIGHT GREEN,LIGHT GREEN,91.815031,89.409172,54.598535,75.696763,86.844689,89.565618,86.815897,86.463865
1062,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,LIGHT PINK,DARK PURPLE,45.880279,83.09633,63.584589,87.555697,85.229103,43.428102,50.497525,71.554175
1063,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,LIGHT PINK,DARK PURPLE,89.894382,97.190535,73.885046,99.854895,40.509258,86.884981,56.753854,79.536155


In [384]:
df_1950.shape

(1019, 60)

In [385]:
df_1950.to_csv('images_1950_final.csv', index=False) 

In [387]:
data_1950_2 = pd.read_csv('./images_1950_final.csv')

In [389]:
data_1950_2.shape

(1019, 60)

In [353]:
dist([131, 131, 120], colors_dict)

('DARK PINK', 98.53425800197614)

In [376]:
df_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,color6,color7,xkcd0,xkcd1,xkcd2,xkcd3,xkcd4,xkcd5,xkcd6,xkcd7
1059,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,"[200, 199, 191]","[164, 100, 94]",<bound method Series.apply of 1059 (DARK ...,"(LIGHT PINK, 49.23413450036468)","(DARK PURPLE, 43.289721643826724)","(LIGHT PINK, 85.30533394811839)","(DARK PURPLE, 91.61877536837086)","(LIGHT GREEN, 90.60905032059435)","(LIGHT PINK, 64.41273166075166)",<bound method Series.apply of 1059 (DARK ...
1060,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,"[233, 227, 215]","[144, 71, 54]",<bound method Series.apply of 1059 (DARK ...,"(LIGHT PINK, 35.4400902933387)","(DARK PINK, 92.27675763701279)","(LIGHT PINK, 47.21228653645151)","(DARK PURPLE, 48.43552415324934)","(PINK, 86.46964785403026)","(LIGHT PINK, 29.5296461204668)",<bound method Series.apply of 1059 (DARK ...
1061,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,"[204, 188, 153]","[188, 173, 139]",<bound method Series.apply of 1059 (DARK ...,"(DARK PINK, 89.40917178902845)","(DARK PURPLE, 54.59853477887479)","(DARK PINK, 75.69676347110224)","(DARK PINK, 86.84468895678077)","(DARK PINK, 89.56561840349231)","(LIGHT GREEN, 86.81589716175259)",<bound method Series.apply of 1059 (DARK ...
1062,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,"[218, 204, 189]","[85, 70, 62]",<bound method Series.apply of 1059 (DARK ...,"(LIGHT PINK, 83.09632964216891)","(LIGHT PINK, 63.58458932791813)","(DARK PINK, 87.55569655939013)","(DARK PINK, 85.22910301065006)","(DARK PURPLE, 43.42810150121693)","(LIGHT PINK, 50.49752469181039)",<bound method Series.apply of 1059 (DARK ...
1063,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,"[209, 200, 191]","[86, 77, 76]",<bound method Series.apply of 1059 (DARK ...,"(DARK PINK, 97.19053451854249)","(LIGHT PINK, 73.8850458482635)","(DARK PINK, 99.8548947222919)","(DARK PURPLE, 40.50925820105819)","(PINK, 86.88498144098322)","(LIGHT PINK, 56.753854494650845)",<bound method Series.apply of 1059 (DARK ...


In [367]:
df_1950[['xkcd0', 'distance']] = df_1950.xkcd0.apply
(lambda x: pd.Series(str(x).replace('(', '').replace(')', '').split(",")))

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


<function __main__.<lambda>(x)>

In [368]:
df_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,color6,color7,xkcd0,xkcd1,xkcd2,xkcd3,xkcd4,xkcd5,xkcd6,xkcd7
1059,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,"[200, 199, 191]","[164, 100, 94]",<bound method Series.apply of 1059 (DARK ...,"(LIGHT PINK, 49.23413450036468)","(DARK PURPLE, 43.289721643826724)","(LIGHT PINK, 85.30533394811839)","(DARK PURPLE, 91.61877536837086)","(LIGHT GREEN, 90.60905032059435)","(LIGHT PINK, 64.41273166075166)",<bound method Series.apply of 1059 (DARK ...
1060,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,"[233, 227, 215]","[144, 71, 54]",<bound method Series.apply of 1059 (DARK ...,"(LIGHT PINK, 35.4400902933387)","(DARK PINK, 92.27675763701279)","(LIGHT PINK, 47.21228653645151)","(DARK PURPLE, 48.43552415324934)","(PINK, 86.46964785403026)","(LIGHT PINK, 29.5296461204668)",<bound method Series.apply of 1059 (DARK ...
1061,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,"[204, 188, 153]","[188, 173, 139]",<bound method Series.apply of 1059 (DARK ...,"(DARK PINK, 89.40917178902845)","(DARK PURPLE, 54.59853477887479)","(DARK PINK, 75.69676347110224)","(DARK PINK, 86.84468895678077)","(DARK PINK, 89.56561840349231)","(LIGHT GREEN, 86.81589716175259)",<bound method Series.apply of 1059 (DARK ...
1062,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,"[218, 204, 189]","[85, 70, 62]",<bound method Series.apply of 1059 (DARK ...,"(LIGHT PINK, 83.09632964216891)","(LIGHT PINK, 63.58458932791813)","(DARK PINK, 87.55569655939013)","(DARK PINK, 85.22910301065006)","(DARK PURPLE, 43.42810150121693)","(LIGHT PINK, 50.49752469181039)",<bound method Series.apply of 1059 (DARK ...
1063,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,"[209, 200, 191]","[86, 77, 76]",<bound method Series.apply of 1059 (DARK ...,"(DARK PINK, 97.19053451854249)","(LIGHT PINK, 73.8850458482635)","(DARK PINK, 99.8548947222919)","(DARK PURPLE, 40.50925820105819)","(PINK, 86.88498144098322)","(LIGHT PINK, 56.753854494650845)",<bound method Series.apply of 1059 (DARK ...


### New Columns for Arrays

In [257]:
dist1([0, 225, 0], colors_dict)

('GREEN', 59.312730505347666)

In [195]:
df_1950.as_matrix(columns=df_1950.columns[4:7])

  """Entry point for launching an IPython kernel.


array([[131, 131, 120],
       [164, 153, 126],
       [102,  83,  52],
       ...,
       [160, 143, 134],
       [155, 147, 143],
       [ 79,  82,  41]])

In [194]:
dist1(df_1950.as_matrix(columns=df_1950.columns[4:7])[4], colors_dict)

  """Entry point for launching an IPython kernel.


('DARK PINK', 89.89438247187641)

In [217]:
df_1950[df_1950.columns[32:35]].head()

Unnamed: 0,r7,g7,b7
1059,164,100,94
1060,144,71,54
1061,188,173,139
1062,85,70,62
1063,86,77,76


In [218]:
df_1950['color0'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[4:7])]
df_1950['color1'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[8:11])]
df_1950['color2'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[12:15])]
df_1950['color3'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[16:19])]
df_1950['color4'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[20:23])]
df_1950['color5'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[24:27])]
df_1950['color6'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[28:31])]
df_1950['color7'] = [e for e in df_1950.as_matrix(columns=df_1950.columns[32:35])]

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
  """
  
  import sys
  


In [219]:
df_1950.head()

Unnamed: 0,date,month,year,name,r0,g0,b0,f0,r1,g1,...,b7,f7,color0,color1,color2,color3,color4,color5,color6,color7
1059,1950-01-01,1,1950,19500101.jpg,131,131,120,56603,211,211,...,94,2036,"[131, 131, 120]","[211, 211, 201]","[45, 45, 45]","[188, 187, 175]","[100, 83, 78]","[166, 164, 150]","[200, 199, 191]","[164, 100, 94]"
1060,1950-02-01,2,1950,19500201.jpg,164,153,126,48448,225,219,...,54,1089,"[164, 153, 126]","[225, 219, 207]","[132, 122, 92]","[217, 210, 195]","[46, 50, 81]","[195, 183, 161]","[233, 227, 215]","[144, 71, 54]"
1061,1950-02-15,2,1950,19500215.jpg,102,83,52,47713,156,141,...,139,4670,"[102, 83, 52]","[156, 141, 104]","[62, 56, 42]","[183, 132, 78]","[141, 108, 64]","[202, 154, 97]","[204, 188, 153]","[188, 173, 139]"
1062,1950-03-01,3,1950,19500301.jpg,13,12,18,46334,197,184,...,62,2270,"[13, 12, 18]","[197, 184, 169]","[210, 196, 180]","[167, 142, 128]","[131, 109, 95]","[34, 31, 32]","[218, 204, 189]","[85, 70, 62]"
1063,1950-03-15,3,1950,19500315.jpg,127,113,108,31224,170,151,...,76,4727,"[127, 113, 108]","[170, 151, 138]","[198, 190, 180]","[140, 136, 138]","[37, 38, 43]","[188, 165, 150]","[209, 200, 191]","[86, 77, 76]"


In [220]:
df_1950.to_csv('images_1950.csv', index=False) 

In [294]:
column_list = []
def get_colors_array(array, colors_list): 
    for i in range(len(array)): 
        x = dist1(array.iloc[i], colors_list)
        columns_list = []
        columns_list = columns_list.append(x)
        return columns_list

In [295]:
df_1950['color0'].iloc[1]

array([164, 153, 126])

In [82]:
colors_dict = {'LIGHT GREEN': (150, 249, 123),
 'GREEN': (21, 176, 26),
 'DARK GREEN': (3, 53, 0),
 'LIGHT BLUE': (149, 208, 252),
 'BLUE': (34, 66, 199),
 'DARK BLUE': (0, 3, 91),
 'LIGHT YELLOW': (255, 254, 122),
 'YELLOW': (255, 255, 20),
 'DARK YELLOW': (213, 182, 10),
 'ORANGE': (249, 115, 6),
 'LIGHT RED': (255, 71, 76),
 'RED': (229, 0, 0),
 'DARK RED': (132, 0, 0),
 'LIGHT PURPLE': (191, 119, 246),
 'PURPLE': (126, 30, 156),
 'DARK PURPLE': (53, 6, 62),
 'LIGHT PINK': (255, 209, 223),
 'PINK': (255, 129, 192),
 'DARK PINK': (203, 65, 107)}

In [83]:
len(colors_dict)

19