J. I. Camacho Garcia

This notebook shows an example of how TPS files containing morphometric coordinates were read and processed to extract eye size data. 
This data is saved as a table, containing sample photo ID, body length, and eye diameter and body size-corrected eye diameter.
The curated table linking eye size data to all indivuals used in this study is available in Supplementary Data 1.

In [None]:
import pandas as pd
import numpy as np
import math

In [None]:
# Modified from: https://gist.github.com/jinyung/1b8fe5735fbfdf07378197cc4c9acc3a

def readtps(input):
    """
    Function to read a .TPS file
    Args:
        input (str): path to the .TPS file
    Returns:
        lm (str list): info extracted from 'LM=' field
        im (str list): info extracted from 'IMAGE=' field
        id (str list): info extracted from 'ID=' filed
        coords: returns a 3D numpy array if all the individuals have same
                number of landmarks, otherwise returns a list containing 2d
                matrices of landmarks
    """

    # open the file
    tps_file = open(input, 'r')  # 'r' = read
    tps = tps_file.read().splitlines()  # read as lines and split by new lines
    tps_file.close()

    # initiate lists to take fields of "LM=","IMAGE=", "ID=" and the coords
    lm, im, ID, SCALE, coords_array = [], [], [], [], []

    # looping through the lines
    for i, ln in enumerate(tps):

        # Each individual starts with "LM="
        if ln.startswith("LM"):
            # number of landmarks of this ind
            lm_num = int(ln.split('=')[1])
            # fill the info to the list for all inds
            lm.append(lm_num)
            # initiate a list to take 2d coordinates
            coords_mat = []

            # fill the coords list by reading next lm_num of lines
            for j in range(i + 1, i + 1 + lm_num):
                coords_mat.append(tps[j].split(' '))  # split lines into values

            # change the list into a numpy matrix storing float vals
            coords_mat = np.array(coords_mat, dtype=float)
            # fill the ind 2d matrix into the 3D coords array of all inds
            coords_array.append(coords_mat)
            # coords_array.append(coords_mat)

        # Get info of IMAGE= , ID= and SCALE fields
        if ln.startswith("IMAGE"):
            im.append(ln.split('=')[1])

        if ln.startswith("ID"):
            ID.append(ln.split('=')[1])
            
        if ln.startswith("SCALE"):
            SCALE.append(ln.split('=')[1])

    # check if all inds contain same number of landmarks
    all_lm_same = all(x == lm[0] for x in lm)
    # if all same change the list into a 3d numpy array
    if all_lm_same:
        coords_array = np.dstack(coords_array)

    # return results in dictionary form
    return {'lm': lm, 'im': im, 'id': ID, 'scale': SCALE, 'coords': coords_array}

In [None]:
# Read in tps file.
# This file includes coordinates for 18 homologous landmark points
tps = readtps("../data/01_morphological_data_landmark_coordinates_Malawi_radiation.tps")

In [None]:
tps.keys()

In [None]:
# How to read the coordinates data. Examples:
tps['coords'][10][0][0]   # [landmark point][axis, 'x=0','y=1'][individual]
tps['coords'][10][0][1]   # Landmark 11, x coordinate, ind 1
tps['coords'][9][0][1]   # Landmark 10, x coordinate, ind 1

In [None]:
# Get landmark coordinates for upper jaw and the caudal fin base.
# The distance between these two points will be used as standard lenght of the fish.
# The caudal fin base coordinates are calculated as the midpoint between landmarks 10 (top) and 11 (bottom)
# See Supplementary Figure 7 for reference.
x_cfin = (tps['coords'][9][0]+tps['coords'][10][0])/2
y_cfin = (tps['coords'][9][1]+tps['coords'][10][1])/2
x_ujaw = tps['coords'][0][0]
y_ujaw = tps['coords'][0][1]

In [None]:
# Calculate standard length
SL = [math.sqrt(i) for i in list((x_ujaw - x_cfin)**2 + (y_ujaw - y_cfin)**2)]

In [None]:
# Get landmark coordinates for eye (the two points that cross the eye horizontally = landmarks 4 and 5)
# These two points will be used to calculate the eye width, as the linear distance between the two points.
xeye_lm4 = tps['coords'][3][0]
xeye_lm5 = tps['coords'][4][0]
yeye_lm4 = tps['coords'][3][1]
yeye_lm5 = tps['coords'][4][1]

In [None]:
# Calculate eye diameter
ED = [math.sqrt(i) for i in list((xeye_lm4 - xeye_lm5)**2 + (yeye_lm4 - yeye_lm5)**2)]

In [None]:
# Translate coordinates distances into real distances, using the scale provided.
# Scale values in the tps dict are strings. Convert to float before calculating distances.

scale = [float(i) for i in tps['scale']]   # Transform scale to float

SL_dist = [a*b for a,b in zip(SL,scale)]   # Multiply SL and scale element by element 
ED_dist = [a*b for a,b in zip(ED,scale)]

In [None]:
SL_dist[1:5]   # Looks ok?

In [None]:
# Calculate eye diameter corrected for body size 
size_corr_ED_to_scale = [a/b for a,b in zip(ED_dist,SL_dist)]

In [None]:
# Save data to dataframe
df = pd.DataFrame(zip(list(tps["im"]),tps['scale'],SL_dist,ED_dist,size_corr_ED_to_scale),
                columns =['id','scale','SL','ED','ED.SL']) 

In [None]:
df.head()

In [None]:
# Photo IDs can be matched to species and sequences using the study metadata. This information is presented in Supplementary Data 1