# 16a. Green band classifier tool U1474
The color analysis outputs the percent of pixels at a given depth which are green. The question is: What meaning does this percent green have? Is it actually meaningful on its own? The current plan is to set a threshold for a percent green-ness that will constitute a binary positive confirmation of a green layer. 

## Setup
### Import Packages

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.ticker import AutoMinorLocator
# %matplotlib inline
from scipy import signal,interpolate,stats,linalg
# from IPython.display import clear_output
import time
import numpy as np
import seaborn as sns
from PIL import Image
import os
import cv2 as cv
import pickle
import random

### File Paths

In [2]:
data_path='/Users/danielbabin/GitHub/Green_Bands/Data/'
table_path='/Users/danielbabin/GitHub/Green_Bands/Tables/'
saturated_cores='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Saturated/'
filtered_cores='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Filtered/Green/'
raw_cores='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Raw/'
figure_path='/Users/danielbabin/GitHub/Green_Bands/Figures/Main/'
sup_figure_path='/Users/danielbabin/GitHub/Green_Bands/Figures/Supplementary/'
checkpoints_path='/Users/danielbabin/GitHub/Green_Bands/Data/Checkpoints/'

### Import Splice info

In [3]:
splice=pd.read_csv(data_path+'SpliceTables/spliceConstructionE361.csv').iloc[0:57]
splice['CoreLabel']=(splice['Site']+splice['Hole']+'_'+splice['Core'].map(str))

section_list=pd.read_csv(data_path+'SectionSummaries/sectionSummaryU1474.csv')
section_list['SectionLabel']=(section_list['Site']+section_list['Hole']+'_'+
                              section_list['Core'].map(str)+section_list['Type']+section_list['Sect'].map(str))
section_list.rename(columns={'Top depth CCSF-361-U1474-ABDEF-20160220 (m)':'Top Depth CCSF (m)',
                            'Bottom depth CCSF-361-U1474-ABDEF-20160220 (m)':'Bottom Depth CCSF (m)'},inplace=True)
section_list['Length (m)']=section_list['Bottom Depth CCSF (m)']-section_list['Top Depth CCSF (m)']
section_list=section_list.set_index('SectionLabel',drop=False)

In [4]:
sections=section_list[(section_list['Top Depth CCSF (m)']<205)&
                      (section_list['Sect']!='CC')]['SectionLabel'].reset_index(drop=True)

### Import Green

In [9]:
green_intensity_extension='Green_Data/Green_Intensity_Splices/U1474/'

In [10]:
greenIntensity={}
for hole in ['A','B','C','D','F']:
    greenIntensity[hole]=pd.read_csv(data_path+green_intensity_extension+hole+'.csv',index_col='CCSF Depth (m)')

### Load core photos

In [11]:
cpmd=pd.read_csv(data_path+'U1474/corePhotosMetadataU1474.csv')
cpmd['SectionLabel']=cpmd['Site']+cpmd['Hole']+'_'+cpmd['Core'].map(str)+cpmd['Type']+cpmd['Sect'].map(str)
cpmd['Cropped image filename (JPEG)']=cpmd['Cropped image filename'].str[:-4]+'.jpeg'
iddrop=cpmd[(cpmd['SectionLabel']=='U1474F_6H2')&(cpmd['A/W']=='W')].index
cpmd=cpmd.drop(iddrop)
cpmd.set_index('SectionLabel',inplace=True)
cpmd=cpmd[cpmd['Display status (T/F)']=='T']

In [12]:
def path_finder(section):
    if section[5]=='A':
        path='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Raw/361-U1474A-LSIMG/'
    elif section[5]=='B':
        path='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Raw/361-U1474B-LSIMG/'
    elif section[5]=='C':
        path='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Raw/361-U1474C-LSIMG/'
    elif section[5]=='D':
        path='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Raw/361-U1474D-LSIMG/'
    elif section[5]=='E':
        path='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Raw/361-U1474E-LSIMG/'
    else:
        path='/Volumes/SanDisk.Data/IODPU1474CorePhotos/Raw/361-U1474F-LSIMG/'
    return path

In [13]:
greens=pd.read_csv(checkpoints_path+'green.peak.labeling.U1474.csv',index_col='Top Depth CCSF (m)')

## Classify
### Define Threshold-Picking Function

In [15]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [14]:
def check(n):
    greens=pd.read_csv(checkpoints_path+'green.peak.labeling.U1474.csv',index_col='Top Depth CCSF (m)')
    sample=greens.iloc[n]
    top_depth=sample.name
    bottom_depth=sample['Bottom Depth CCSF (m)']
    top_offset=sample['Top Offset (cm)']/100
    bottom_offset=sample['Bottom Offset (cm)']/100
    sect=sample['SectionLabel']
    hole=sect[5]
    number=sections[sections=='U1474A_1H1'].index[0]
    buffer=.2
    green=greenIntensity[hole].copy()
    greendata=green[green['SectionLabel']==sect]
    greendata=greendata.loc[top_depth-buffer:bottom_depth+buffer]

    fig=plt.figure(figsize=(14,7))
    gs = gridspec.GridSpec(8, 1, figure=fig,hspace=.02,left=.05,right=.95,top=.95)

    c1=fig.add_subplot(gs[:3,:])
    c2=fig.add_subplot(gs[3:6,:])
    ax1=fig.add_subplot(gs[6:8,:])

    c1.set_title('Hole '+sect[5]+' Core '+sect[7:],
                  fontweight='bold',fontsize=20,pad=0,loc='left')
    c1.annotate('Number '+str(number+1)+'/213',xy=(1,1),xycoords='axes fraction',
                fontweight='bold',fontsize=20,horizontalalignment='right',verticalalignment='bottom')
    
    sect_length=(section_list.loc[sect,'Bottom Depth CCSF (m)']-
                  section_list.loc[sect,'Top Depth CCSF (m)'])
    
    raw_path=path_finder(sect)
    raw_img=Image.open(raw_path+cpmd.loc[sect,'Cropped image filename'][:-4]+'-LSIMG.jpg')
    filt_img=Image.open(filtered_cores+cpmd.loc[sect,'Cropped image filename'])
    
    h=raw_img.size[0]
    w=raw_img.size[1]
    aspect=(w/h)
    naspest=((16.6/aspect)-1)/2
    
    
    pxl_m=w/sect_length
    extentlist=[top_depth-buffer,
                bottom_depth+buffer,
                0,sect_length/aspect]
    sample_top_pxl=int(top_offset*pxl_m)
    sample_bottom_pxl=int(bottom_offset*pxl_m)
    pxl_offset=int(buffer*pxl_m)
    
    raw=raw_img.rotate(90,expand=True).crop((sample_top_pxl-pxl_offset,0,sample_bottom_pxl+pxl_offset,h))
    filt=filt_img.crop((sample_top_pxl-pxl_offset,0, sample_bottom_pxl+pxl_offset,h))
    
    c1.imshow(raw,extent=extentlist)
    c1.axis('off')
    
    c2.imshow(filt,extent=extentlist)
    c2.axis('off')
    
#     data.plot(ax=ax1,marker='o',ms=3,c=colors[-3])
    greendata['Green%'].plot(kind='area',ax=ax1,color='tab:green')
#     ax1.set_ylim(0,100)
    
    cols=['% Pixels']
    for i,a in enumerate([ax1]):
        a.set_ylabel(cols[i])
    for i,a in enumerate([c1,c2,ax1]):
        a.set_xlim(top_depth-buffer,bottom_depth+buffer)

    ## Annotate
    ax1.set_xlabel('Depth (m CCSF)');
    box=dict(boxstyle="round",ec=(0,0,0),fc=(1,1,1),clip_on=True)
    for ax in [ax1]:
        ax.axvline(top_depth,color='k')
        ax.axvline(bottom_depth,color='k')
        ax1.annotate(n,xy=(bottom_depth+.015,greendata['Green%'].max()*.9),ha='left',size=16,bbox=box)
    
    ## Question
    print('Is ',n,'\n',
          '(1) a diffuse, mottled green band?\t',
          '(2) a diffuse, continuous green band?\n',
          '(3) a crusty, mottled green band?\t',
          '(4) a crusty, continuous green band?\n',
          '(5) flag algorithm problem\t\t',
          '(0) not a green layer')
    plt.show(block=False)
    gTF=int(input())
    plt.close()
    greens.loc[top_depth,'Label']=gTF
    greens.to_csv(checkpoints_path+'green.peak.labeling.U1474.csv')

## Check

In [16]:
print('Do you need to classify green bands?: (y/n)')
classify=str(input())
if classify=='y':
    print('Enter start n: ')
    start_n=int(input())
    for i in range(start_n,len(greens)):
        check(i)

## Fix Algorithm Problems

In [39]:
def makeGraph(n):
    greens_depth_fix=pd.read_csv(checkpoints_path+'green.peak.labeling.depths.fixed.csv')
    sample=greens_depth_fix.iloc[n]
    index=sample.name
    top_depth=sample['Top Depth CCSF (m)']
    bottom_depth=sample['Bottom Depth CCSF (m)']
    top_offset=sample['Top Offset (cm)']/100
    bottom_offset=sample['Bottom Offset (cm)']/100
    sect=sample['SectionLabel']
    hole=sect[5]
    number=sections[sections=='U1474A_1H1'].index[0]
    buffer=.2
    green=greenIntensity[hole].copy()
    greendata=green[green['SectionLabel']==sect]
    greendata=greendata.loc[top_depth-buffer:bottom_depth+buffer]

    fig=plt.figure(figsize=(14,7))
    gs = gridspec.GridSpec(8, 1, figure=fig,hspace=.02,left=.05,right=.95,top=.95)

    c1=fig.add_subplot(gs[:3,:])
    c2=fig.add_subplot(gs[3:6,:])
    ax1=fig.add_subplot(gs[6:8,:])

    c1.set_title('Hole '+sect[5]+' Core '+sect[7:],
                  fontweight='bold',fontsize=20,pad=0,loc='left')
    c1.annotate('Number '+str(number+1)+'/213',xy=(1,1),xycoords='axes fraction',
                fontweight='bold',fontsize=20,horizontalalignment='right',verticalalignment='bottom')
    
    sect_length=(section_list.loc[sect,'Bottom Depth CCSF (m)']-
                  section_list.loc[sect,'Top Depth CCSF (m)'])
    
    raw_path=path_finder(sect)
    raw_img=Image.open(raw_path+cpmd.loc[sect,'Cropped image filename'][:-4]+'-LSIMG.jpg')
    filt_img=Image.open(filtered_cores+cpmd.loc[sect,'Cropped image filename'])
    
    h=raw_img.size[0]
    w=raw_img.size[1]
    aspect=(w/h)
    naspest=((16.6/aspect)-1)/2
    
    
    pxl_m=w/sect_length
    extentlist=[top_depth-buffer,
                bottom_depth+buffer,
                0,sect_length/aspect]
    sample_top_pxl=int(top_offset*pxl_m)
    sample_bottom_pxl=int(bottom_offset*pxl_m)
    pxl_offset=int(buffer*pxl_m)
    
    raw=raw_img.rotate(90,expand=True).crop((sample_top_pxl-pxl_offset,0,sample_bottom_pxl+pxl_offset,h))
    filt=filt_img.crop((sample_top_pxl-pxl_offset,0, sample_bottom_pxl+pxl_offset,h))
    
    c1.imshow(raw,extent=extentlist)
    c1.axis('off')
    
    c2.imshow(filt,extent=extentlist)
    c2.axis('off')
    
#     data.plot(ax=ax1,marker='o',ms=3,c=colors[-3])
    greendata['Green%'].plot(kind='area',ax=ax1,color='tab:green')
#     ax1.set_ylim(0,100)
    
    cols=['% Pixels']
    for i,a in enumerate([ax1]):
        a.set_ylabel(cols[i])
    for i,a in enumerate([c1,c2,ax1]):
        a.set_xlim(top_depth-buffer,bottom_depth+buffer)

    ## Annotate
    ax1.set_xlabel('Depth (m CCSF)');
    box=dict(boxstyle="round",ec=(0,0,0),fc=(1,1,1),clip_on=True)
    for ax in [ax1]:
        ax.axvline(top_depth,color='k')
        ax.axvline(bottom_depth,color='k')
        ax1.annotate(n,xy=(bottom_depth+.015,greendata['Green%'].max()*.9),ha='left',size=16,bbox=box)
    
    ax1.xaxis.set_minor_locator(AutoMinorLocator(10))
    return index,fig,c1,c2,ax1

In [44]:
def fix(n):
    ## Question
    correct='n'
    while correct=='n':
        index,fig,c1,c2,ax1=makeGraph(n)
        print('What should be the correct top depth for ',n,'?')
        plt.show(block=False)
        new_top=float(input())
        print('What should be the correct bottom depth for ',n,'?')
        new_bottom=float(input())
        plt.close()
    
        q=0
        while q==0:
            index,fig,c1,c2,ax1=makeGraph(n)
            ax1.axvline(new_top,color='r')
            ax1.axvline(new_bottom,color='r')
            plt.show(block=False)
            print('Does this look correct?')
            correct=str(input())
            if correct=='y':
                q=1
            elif start=='n':
                q=1
            else:
                print('Invalid input')
        plt.close()
        
    index,fig,c1,c2,ax1=makeGraph(n)
    ax1.axvline(new_top,color='r')
    ax1.axvline(new_bottom,color='r')
    plt.show(block=False)
    print('Is ',n,'\n',
          '(1) a diffuse, mottled green layer?\t',
          '(2) a diffuse, continuous green layer?\n',
          '(3) a crusty, mottled green layer?\t',
          '(4) a crusty, continuous green layer?\n',
          '(0) not a green layer')
    gTF=int(input())
    plt.close()
        
    greens_depth_fix.loc[index,'Label']=gTF
    greens_depth_fix.loc[index,'Top Depth CCSF (m)']=new_top
    greens_depth_fix.loc[index,'Bottom Depth CCSF (m)']=new_bottom
    greens_depth_fix.to_csv(checkpoints_path+'green.peak.labeling.depths.fixed.csv',index=False)

In [50]:
print('Do you need to fix depths?: (y/n)')
depths=str(input())
if depths=='y':
    q=0
    while q==0:
        print('Do you want to start from scratch?: (y/n)')
        start=str(input())
        if start=='y':
            greens_depth_fix=pd.read_csv(checkpoints_path+'greenLayerLabeling.csv')
            greens_depth_fix.to_csv(checkpoints_path+'green.peak.labeling.depths.fixed.csv',index=False)
            q=1
        elif start=='n':
            greens_depth_fix=pd.read_csv(checkpoints_path+'green.peak.labeling.depths.fixed.csv')
            q=1
        else:
            print('Invalid input')
            
    fixes=greens_depth_fix.loc[greens_depth_fix['Label']==5,'N (new)'].to_list()
    
    print('Enter start n: ')
    start_n=int(input())
    for l in fixes[start_n:]:
        fix(l)

In [46]:
# greens_depth_fix=pd.read_csv(checkpoints_path+'green.peak.labeling.depths.fixed.csv')
# greens_depth_fix.to_csv(table_path+'Supplementary Table 6A - U1474 green bands.csv',index=False)