In [1]:
import pandas as pd
import os,glob
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append(os.path.expanduser("~") + "/collproc/")
from raw import *
from coll import *
sys.path.append(os.path.expanduser("~"))
from ss2csv.ss2csv import file2table,cleancols

In [2]:
DIR = os.path.expanduser("~") + "/Dropbox/lml/genome_measurements/"

In [3]:
allcolor = dirwalk(DIR,require='lor',omit=['Tests','flags','Issues'])

In [4]:
df = pd.DataFrame()

for f in allcolor:
    tmp = cleancols(file2table(f))
    tmp['fname'] = f
    df = pd.concat([df,tmp])

In [5]:
df = df.reset_index(drop=True)

In [6]:
# occasionally, there is something in these columns, but not here

del df['SAMPLE_ID2']
del df['SAMPLE_ID3']

# Color conversion

For some reason, we occasionally have LUV or LCH measurements instead of LAB. But we can convert easily:

In [7]:
from colormath.color_objects import LabColor,LuvColor,LCHabColor,sRGBColor
from colormath.color_conversions import convert_color

In [8]:
def luv2lab(i):
    l = df.LUV_L.loc[i]
    u = df.LUV_U.loc[i]
    v = df.LUV_V.loc[i]
    luv = LuvColor(l,u,v,observer='2',illuminant='d65')
    lab = convert_color(luv,LabColor)
    return lab.get_value_tuple()

In [9]:
def lch2lab(i):
    l = df.LAB_L.loc[i]
    c = df.LAB_C.loc[i]
    h = df.LAB_H.loc[i]
    lch = LCHabColor(l,c,h,observer='2',illuminant='d65')
    lab = convert_color(lch,LabColor)
    return lab.get_value_tuple()

In [10]:
def lab2rgb(i):
    l = df.LAB_L.loc[i]
    a = df.LAB_A.loc[i]
    b = df.LAB_B.loc[i]
    lab = LabColor(l,a,b,observer='2',illuminant='d65')
    rgb = convert_color(lab,sRGBColor)
    return rgb.get_rgb_hex()

In [11]:
luvidxs = df.index[df.LUV_L.notnull()]
lchidxs = df.index[df.LAB_C.notnull()]

In [12]:
for luvidx in luvidxs:
    lab = luv2lab(luvidx)
    df.LAB_L.loc[luvidx] = lab[0]
    df.LAB_A.loc[luvidx] = lab[1]
    df.LAB_B.loc[luvidx] = lab[2]

In [13]:
for lchidx in lchidxs:
    lab = lch2lab(lchidx)
    df.LAB_L.loc[lchidx] = lab[0]
    df.LAB_A.loc[lchidx] = lab[1]
    df.LAB_B.loc[lchidx] = lab[2]

In [14]:
del df['LUV_L']
del df['LUV_U']
del df['LUV_V']
del df['LAB_C']
del df['LAB_H']

In [15]:
df['rgbhex'] = [lab2rgb(i) for i in df.index]

# Parse Sample ID

In [16]:
df.SAMPLE_ID1 = [item.strip('"') for item in df.SAMPLE_ID1]

In [17]:
df['mmode'] = [item.split(" ")[-1] for item in df.SAMPLE_ID1]

In [18]:
#df = df.loc[df.mmode.isin(['M0','M2'])]

In [19]:
#df = df.reset_index(drop=True)

In [20]:
df['ss'] =  [' '.join(item.split(" ")[:-1]).strip() for item in df.SAMPLE_ID1]

In [21]:
del df['SAMPLE_ID1']

In [22]:
df.ss = df.ss.apply(lambda x:x.lower())

In [23]:
df = pd.concat([df,pd.DataFrame(list(df.ss.apply(parse_sample_id)))],axis=1)

In [24]:
pkl(df,'raw.pkl')