# User Preprocessing
- The aim here is to build a user database from our user pool summary statistics

### Imports

In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

#### Display options
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Jupytper Config
%config Completer.use_jedi = False
%config IPCompleter.greedy=True
pd.set_option('display.max_columns', None)

np.random.seed(42) # this is how to set a random seed.

### Import External Files

In [4]:
# Data file produced by tests.
df = pd.read_csv('./CSV/colour_data_notebooks.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5821 entries, 0 to 5820
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   user             5821 non-null   object
 1   correct          5821 non-null   int64 
 2   recorded_result  5821 non-null   object
 3   mask_image       5821 non-null   object
 4   cb_type1         5821 non-null   int64 
 5   cb_type2         5821 non-null   int64 
 6   ncb              5821 non-null   int64 
 7   datetime         5821 non-null   object
 8   random_spread    5821 non-null   int64 
 9   pallet_used      5821 non-null   object
 10  pallet_values    5821 non-null   object
 11  ishihara_list    5821 non-null   object
 12  COLORS_ON        5821 non-null   object
 13  COLORS_OFF       5821 non-null   object
dtypes: int64(5), object(9)
memory usage: 636.8+ KB


### This data comes directly from the postgres DB.

In [5]:
# df['user'] = [i[:5] for i in df['user']] # shorten uncessesary long usernames.
df.head(25).append(df.tail(25))

  df.head(25).append(df.tail(25))


Unnamed: 0,user,correct,recorded_result,mask_image,cb_type1,cb_type2,ncb,datetime,random_spread,pallet_used,pallet_values,ishihara_list,COLORS_ON,COLORS_OFF
0,unknown_1,1,5,5,0,0,1,16/6/20 11:22,10,bear,"[#e26f39,#f6c458,#e09c3e,#e69e40,#ed7b3d,#f5be...","[(220,105,53),(239,192,86),(227,162,61),(222,1...","[(220,105,53),(239,192,86),(227,162,61),(222,1...","[(71,138,38),(92,73,38),(153,165,49),(221,221,..."
1,unknown_1,1,C,C,0,0,1,16/6/20 11:20,10,bear,"[#e26f39,#f6c458,#e09c3e,#e69e40,#ed7b3d,#f5be...","[(232,116,65),(247,198,95),(221,152,56),(220,1...","[(232,116,65),(247,198,95),(221,152,56),(220,1...","[(65,149,38),(99,72,41),(168,165,50),(223,211,..."
2,unknown_1,1,D,D,0,0,1,16/6/20 11:30,10,bear,"[#e26f39,#f6c458,#e09c3e,#e69e40,#ed7b3d,#f5be...","[(226,114,54),(253,186,87),(233,148,70),(236,1...","[(226,114,54),(253,186,87),(233,148,70),(236,1...","[(75,142,40),(96,64,34),(162,152,41),(224,219,..."
3,unknown_1,1,N,N,0,0,1,16/6/20 11:21,10,bear,"[#e26f39,#f6c458,#e09c3e,#e69e40,#ed7b3d,#f5be...","[(231,116,53),(253,203,81),(215,149,67),(224,1...","[(231,116,53),(253,203,81),(215,149,67),(224,1...","[(70,149,26),(96,73,48),(163,158,57),(219,214,..."
4,unknown_1,0,N,E,0,0,1,16/6/20 11:22,10,bear,"[#e26f39,#f6c458,#e09c3e,#e69e40,#ed7b3d,#f5be...","[(223,102,47),(244,188,85),(217,147,55),(222,1...","[(223,102,47),(244,188,85),(217,147,55),(222,1...","[(63,147,29),(103,82,46),(155,153,53),(214,215..."
5,male_ncb_2,1,2,2,0,0,1,pre_timestamp,25,bear,"[#e26f39, #f6c458, #e09c3e, #e69e40, #ed7b3d, ...","[(244, 130, 52), (250, 188, 96), (201, 135, 61...","[(244, 130, 52), (250, 188, 96), (201, 135, 61...","[(73, 168, 50), (75, 79, 60), (137, 182, 27), ..."
6,male_ncb_2,1,3,3,0,0,1,pre_timestamp,25,bear,"[#e26f39, #f6c458, #e09c3e, #e69e40, #ed7b3d, ...","[(213, 88, 48), (226, 180, 98), (235, 169, 44)...","[(213, 88, 48), (226, 180, 98), (235, 169, 44)...","[(48, 170, 47), (99, 61, 52), (160, 163, 68), ..."
7,male_ncb_2,1,3,3,0,0,1,pre_timestamp,25,bear,"[#e26f39, #f6c458, #e09c3e, #e69e40, #ed7b3d, ...","[(232, 130, 52), (240, 179, 100), (225, 176, 6...","[(232, 130, 52), (240, 179, 100), (225, 176, 6...","[(74, 131, 36), (95, 74, 16), (174, 180, 62), ..."
8,male_ncb_2,1,3,3,0,0,1,pre_timestamp,25,bear,"[#e26f39, #f6c458, #e09c3e, #e69e40, #ed7b3d, ...","[(240, 113, 77), (248, 178, 63), (220, 152, 68...","[(240, 113, 77), (248, 178, 63), (220, 152, 68...","[(55, 143, 40), (105, 93, 21), (135, 163, 29),..."
9,male_ncb_2,1,A,A,0,0,1,pre_timestamp,25,bear,"[#e26f39, #f6c458, #e09c3e, #e69e40, #ed7b3d, ...","[(239, 108, 59), (236, 173, 74), (199, 165, 52...","[(239, 108, 59), (236, 173, 74), (199, 165, 52...","[(75, 122, 18), (74, 82, 52), (147, 146, 37), ..."
