# Installs

In [1]:
# pip install pillow
# pip install altair vega_datasets

# Import Libraries

In [2]:
from PIL import Image, ImageColor
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt

# Process Photos Function

In [3]:
def process_photo(id1):
    # Open unprocessed photo
    im = Image.open("Unprocessed/IMG_" + id1 + ".JPG").resize((300, 300))

    # Classify image in 10 most dominant colors
    result = im.convert('P', palette=Image.ADAPTIVE, colors=10)

    # Save the result photo
    result.save('Processed/IMG_' + id1 + '_processed.png')

# Apply Process Photos Function

In [4]:
for x in range(30):
    x = x + 1
    process_photo(str(x))

# Define rgb_to_hex Function

In [5]:
def rgb_to_hex(rgb):
  return('{:X}{:X}{:X}').format(rgb[0], rgb[1], rgb[2])

# Define get_date_taken Function

In [6]:

def get_date_taken(path):
    return Image.open(path)._getexif()[36867]

# Encapsulating Function

In [7]:
def calculate_df(id2):
  # Open photo
  path_im2 = 'Processed/IMG_' + str(id2) + '_processed.png'
  im2 = Image.open(path_im2).resize((300,300))

  # Convert photo to RGB and make into Numpy array
  na = np.array(im2.convert('RGB')) 

  # Get used colours and counts of each
  colours, counts = np.unique(na.reshape(-1,3), axis=0, return_counts=1)

  # Calculate my hex
  myhex = []
  for i in colours:
    data = '#' + str(rgb_to_hex(i))
    myhex.append(data)

  # Total pixel count
  totalpixel = sum(counts)

  # Calculate date time
  path_im1 = 'Unprocessed/IMG_' + str(id2) + '.JPG'
  datetimetaken = get_date_taken(path_im1)

  # Create empty df
  df = pd.DataFrame(columns=['id', 'datetime', 'colorhex', 'numpixel', 'percentage'],
                   index=[0,1,2,3,4,5,6,7,8,9])
  
  # Fill df
  df['id']=id2
  df['datetime']=datetimetaken
  df['colorhex']=myhex
  df['numpixel']=counts
  df['percentage']=(counts/totalpixel)*100

  # Return df
  return df

# Apply Encapsulating Function

In [8]:
# Day 1
df1 = calculate_df(1)
df2 = calculate_df(2)
df3 = calculate_df(3)
df4 = calculate_df(4)
df5 = calculate_df(5)
df6 = calculate_df(6)
df7 = calculate_df(7)
df8 = calculate_df(8)
df9 = calculate_df(9)
df10 = calculate_df(10)

# Day 2
df11 = calculate_df(11)
df12 = calculate_df(12)
df13 = calculate_df(13)
df14 = calculate_df(14)
df15 = calculate_df(15)
df16 = calculate_df(16)
df17 = calculate_df(17)
df18 = calculate_df(18)
df19 = calculate_df(19)
df20 = calculate_df(20)

# Day 3
df21 = calculate_df(21)
df22 = calculate_df(22)
df23 = calculate_df(23)
df24 = calculate_df(24)
df25 = calculate_df(25)
df26 = calculate_df(26)
df27 = calculate_df(27)
df28 = calculate_df(28)
df29 = calculate_df(29)
df30 = calculate_df(30)

# Concatenate Data Frames

In [9]:
frames = [df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, 
          df14, df15, df16, df17, df18, df19, df20, df21, df22, df23, df24, 
          df25, df26, df27, df28, df29, df30]
  
result = pd.concat(frames)
print(result)

    id             datetime colorhex  numpixel  percentage
0    1  2022:02:08 16:42:20  #B9BCBC      7035    7.816667
1    1  2022:02:08 16:42:20  #BFC3C6     10975   12.194444
2    1  2022:02:08 16:42:20  #CBCCC9     13707   15.230000
3    1  2022:02:08 16:42:20  #CCC2B4     12656   14.062222
4    1  2022:02:08 16:42:20  #CFC7BD      7510    8.344444
..  ..                  ...      ...       ...         ...
5   30  2022:02:10 17:40:43  #9BA0A4     11951   13.278889
6   30  2022:02:10 17:40:43  #9FA3A7     11406   12.673333
7   30  2022:02:10 17:40:43  #A5AAAE      6976    7.751111
8   30  2022:02:10 17:40:43  #A8AEB3      9488   10.542222
9   30  2022:02:10 17:40:43  #AEB1B5      6069    6.743333

[300 rows x 5 columns]


# Convert and Export CSV

In [10]:
result_project = result.to_csv()

with open ('result_project.csv', 'w') as f:
     f.write(result_project)

# Import CSV

In [11]:
df1 = pd.read_csv('result_project.csv')
print(df1)

     Unnamed: 0  id             datetime colorhex  numpixel  percentage
0             0   1  2022:02:08 16:42:20  #B9BCBC      7035    7.816667
1             1   1  2022:02:08 16:42:20  #BFC3C6     10975   12.194444
2             2   1  2022:02:08 16:42:20  #CBCCC9     13707   15.230000
3             3   1  2022:02:08 16:42:20  #CCC2B4     12656   14.062222
4             4   1  2022:02:08 16:42:20  #CFC7BD      7510    8.344444
..          ...  ..                  ...      ...       ...         ...
295           5  30  2022:02:10 17:40:43  #9BA0A4     11951   13.278889
296           6  30  2022:02:10 17:40:43  #9FA3A7     11406   12.673333
297           7  30  2022:02:10 17:40:43  #A5AAAE      6976    7.751111
298           8  30  2022:02:10 17:40:43  #A8AEB3      9488   10.542222
299           9  30  2022:02:10 17:40:43  #AEB1B5      6069    6.743333

[300 rows x 6 columns]


# Create Chart

In [12]:
chart = alt.Chart(df1).mark_bar().encode(
    x=alt.X('datetime:O', axis=None),
    y=alt.Y('percentage:Q', axis=None),
    color=alt.Color(
        'colorhex',
        scale = alt.Scale(
            domain = df1['colorhex'].to_numpy(),
            range = df1['colorhex'].to_numpy()
        ),
        legend = None
    )
).properties(width=1200, height=600)
chart