In [1]:
import numpy as np
import pandas as pd
import os
from os import listdir
from os.path import isfile, join, getsize, dirname
from collections import Counter, OrderedDict
import shutil
import warnings
import h5py
import pickle
import matplotlib.pyplot as plt
import random
warnings.filterwarnings('ignore')
path = "../../../../../zion/OpenSNP/people"
meta = "../../../../../zion/OpenSNP/original"
beacons = "../../../../../zion/OpenSNP/beacon"
phenotype = "../../../../../zion/OpenSNP/phenotypes"
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

In [2]:
def findUserIndex(filename):
    filename = filename[4:]
    return int(filename.split("_")[0])

def findFileIndex(filename):
    return int(filename.split("_")[1][4:])

In [3]:
# Corrupted file cut
files = np.array([f for f in listdir(path) if isfile(join(path, f))], dtype=str)
types = []
sizes = []
for f in files:
    types.append(f.split(".")[-2])
    sizes.append(getsize(join(path,f)))
types = np.array(types)
sizes = np.array(sizes)
Counter(types)
ind = np.logical_and(types == "23andme", sizes > 15 * 1000000)
files = files[ind]

# Deal with multiple file people, select newest one
user_filename = {}
for f in files:
    user_filename.setdefault(int(findUserIndex(f)),[]).append(f)
multiple_files = {k:v for (k,v) in user_filename.items() if len(v) > 1}

for m in multiple_files:
    f_names = multiple_files.get(m)
    selected = [findFileIndex(item) for item in f_names]
    selected = selected.index(max(selected))
    for i in range(len(f_names)):
        if i != selected:
            index = np.argwhere(files==f_names[i])
            files = np.delete(files, index)

user_filename = {}
for f in files:
    user_filename[int(findUserIndex(f))] = f
user_ind = np.array(list(user_filename.keys()))

In [4]:
# 3034 User remaining
################################################################################################################################

In [5]:
features = ['Eye color',
            'Colour Blindness',
            'Hair Type',
            'Hair Color',
            'Ability to Tan',
            'Asthma',
            'Lactose intolerance',
            'Blood type',
            'Earwax type',
            'Freckling',
            'Tongue roller',
            'ring finger longer than index finger',
            'Beard Color',
            'Intolerance: gluten, casein, soy',
            'chrom_sex',
            'Handedness',
            'Widow\'s Peak',
            'ADHD',
            'Acrophobia',
            'hair on fingers',
            'Short-sightedness (Myopia)',
            'Irritable Bowel Syndrome',
            'Index Toe Longer than Big Toe',
            'Photic Sneeze Reflex (Photoptarmis)',
            'Migraine',
             #Mergings
            'ABO Rh ',
            'eye colour'
            #,'RhProtein'
             ]

In [6]:
renamed = [ 'EyeColor',
            'ColorBlindness',
            'HairType',
            'HairColor',
            'TanAbility',
            'Asthma',
            'LactoseIntolerance',
            'BloodType',
            'EarWax',
            'Freckling',
            'TongueRoller',
            'RingFinger',
            'BeardColor',
            'Intolerance',
            'Sex',
            'Handedness',
            'WidowPeak',
            'ADHD',
            'Acrophobia',
            'FingerHair',
            'Myopia',
            'IrritableBowel',
            'IndexLongerBig',
            'Photoptarmis',
            'Migraine',
             #Mergings
            'BloodType2',
            'EyeColor2']

In [7]:
# Read phenotype file
pheno = pd.read_csv(join(meta, "phenotypes_201811100342.csv"), sep=";")
pheno = pheno.sort_values(by=['user_id'])
pheno = pheno.drop(['date_of_birth', 'openhumans_name'], axis=1)
pheno.set_index('user_id', inplace=True)
pheno[pheno == "rather not say"] = "-"

# Drop duplicate values
pheno = pheno.loc[user_ind]
pheno = pheno.loc[~pheno.index.duplicated(keep='first')]

In [8]:
# Test the density
density = [(pheno.columns.values[i], np.sum(pheno[pheno.columns.values[i]] != "-", axis=0)) for i in range(len(pheno.columns.values))]
density = sorted(density, key=lambda x: x[1], reverse=True)
#Counter(pheno["brown hair colour"].values)

In [9]:
pheno = pheno[features]

pheno.columns = renamed
pheno.fillna("-", inplace=True)
pheno.sort_index(inplace=True)
print(pheno.shape)

(3034, 27)


In [10]:
# Merge duplicate columns
missing = pheno["EyeColor2"][np.logical_and(pheno["EyeColor"] == "-", pheno["EyeColor2"] != "-")]
pheno["EyeColor"][np.logical_and(pheno["EyeColor"] == "-", pheno["EyeColor2"] != "-")] = missing

missing = pheno["BloodType2"][np.logical_and(pheno["BloodType"] == "-", pheno["BloodType2"] != "-")]
pheno["BloodType"][np.logical_and(pheno["BloodType"] == "-", pheno["BloodType2"] != "-")] = missing

pheno["RhProtein"] = pheno["BloodType"].copy()

del pheno["BloodType2"]
del pheno["EyeColor2"]

# Eye Color

In [11]:
# Blue-Green
pheno["EyeColor"][pheno["EyeColor"] == "Blue, grey, green, changing"] = "Blue-green" 
pheno["EyeColor"][pheno["EyeColor"] == "blue-green "] = "Blue-green" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue-green; amber collarette, and gray-blue ringing"] = "Blue-green"
pheno["EyeColor"][pheno["EyeColor"] == "Blue-green "] = "Blue-green" 
pheno["EyeColor"][pheno["EyeColor"] == "blue-green"] = "Blue-green" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue-green-grey"] = "Blue-green" 
pheno["EyeColor"][pheno["EyeColor"] == "Light blue-green"] = "Blue-green" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue/green"] = "Blue-green"

# Blue
pheno["EyeColor"][pheno["EyeColor"] == "blue"] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue with yellow parts"] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue with a yellow ring of flecks that make my eyes look green depending on the light or my  mood     "] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Dark blue"] = "Blue"
pheno["EyeColor"][pheno["EyeColor"] == "blue-grey"] = "Blue" # Blue-Gray
pheno["EyeColor"][pheno["EyeColor"] == "gray-blue"] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue-grey; broken amber collarette"] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Gray-blue"] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Dark Grayish-Blue Eyes (like a stone)"] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue grey"] = "Blue" 
pheno["EyeColor"][pheno["EyeColor"] == "Blue/gray"] = "Blue"
pheno["EyeColor"][pheno["EyeColor"] == "Blue-grey"] = "Blue"

# Green 
pheno["EyeColor"][pheno["EyeColor"] == "Green "] = "Green" 
pheno["EyeColor"][pheno["EyeColor"] == "green"] = "Green" 
pheno["EyeColor"][pheno["EyeColor"] == "Green-gray"] = "Green" 
pheno["EyeColor"][pheno["EyeColor"] == "Ambar-Green"] = "Green" 
pheno["EyeColor"][pheno["EyeColor"] == "Light-mixed Green"] = "Green" 
pheno["EyeColor"][pheno["EyeColor"] == "Ambar-green"] = "Green" 
pheno["EyeColor"][pheno["EyeColor"] == "Light-mixed green"] = "Green" 
pheno["EyeColor"][pheno["EyeColor"] == "Green with blue halo"] = "Green"
pheno["EyeColor"][pheno["EyeColor"] == "Green with amber burst and gray outer ring"] = "Green"
pheno["EyeColor"][pheno["EyeColor"] == "Brown inner, dark green outer"] = "Green"

# Brown
pheno["EyeColor"][pheno["EyeColor"] == "brown"] = "Brown"
pheno["EyeColor"][pheno["EyeColor"] == "Grey brown"] = "Brown" 
pheno["EyeColor"][pheno["EyeColor"] == "Brown - Brown and green in bright sunlight"] = "Brown" 
pheno["EyeColor"][pheno["EyeColor"] == "Olive-Brown ringing Burnt Umber-Brown"] = "Brown" 
pheno["EyeColor"][pheno["EyeColor"] == "Olive-brown ringing burnt umber-brown"] = "Brown" 
pheno["EyeColor"][pheno["EyeColor"] == "Brown-(green when external temperature rises)"] = "Brown" 
pheno["EyeColor"][pheno["EyeColor"] == "Brown green starburs"] = "Brown"
pheno["EyeColor"][pheno["EyeColor"] == "Grey and Amber"] = "Brown"
pheno["EyeColor"][pheno["EyeColor"] == "Brown/black"] = "Brown" 
pheno["EyeColor"][pheno["EyeColor"] == "Dark brown"] = "Brown"

# Hazel
pheno["EyeColor"][pheno["EyeColor"] == "Green-hazel"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "Hazel (brown/green)"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "Hazel/Yellow"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "Amber/brown"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "Brown green starburst"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "brown green starburst"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "Green yellow"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "Hazel, olive green with amber starburst"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "Brown-green"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "hazel"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "Hazel/Light Brown"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "Hazel/light brown"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "Hazel (light brown, dark green, dark blue)"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "brown-green"] = "Hazel" # brown-green
pheno["EyeColor"][pheno["EyeColor"] == "Green-brown"] = "Hazel"
pheno["EyeColor"][pheno["EyeColor"] == "green-brown"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "Indeterminate brown-green with a subtle grey caste"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "Brown - brown and green in bright sunlight"] = "Hazel" 
pheno["EyeColor"][pheno["EyeColor"] == "indeterminate brown-green with a subtle grey caste"] = "Hazel" 

# Other
pheno["EyeColor"][pheno["EyeColor"] == "Mixed"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "Rs12913832 aa"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "rs12913832 AG (they"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "Gg"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "GG"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "Split - one side dark blue / other side light blue and green"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "Rs12913832 ag (they"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "blue-brown heterochromia"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "Amber - (yellow/ocre  brown)"] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "The very first phenotype listed is \"eye colour\".  you have created a duplicate.  please read the list of phenotypes before adding a new one."] = "Other" 
pheno["EyeColor"][pheno["EyeColor"] == "Ice blue mixed with slate blue, with an amber pupil burst in both eyes and a brown spot adjacent to lower left pupil. eyes were green into my 20's."] = "Other"
pheno["EyeColor"][pheno["EyeColor"] == "Blue/green/gold"] = "Other"
pheno["EyeColor"][pheno["EyeColor"] == "blue spot of brown"] = "Other"
pheno["EyeColor"] = pheno["EyeColor"][pheno["EyeColor"] != "Other"]

# Not Decided
pheno["EyeColor"][pheno["EyeColor"] == "Blue-green"] = "Blue"

Counter(pheno["EyeColor"])

Counter({'Blue': 366,
         'Brown': 322,
         '-': 2045,
         'Green': 104,
         'Hazel': 175,
         nan: 22})

In [12]:
''' 2. HAIR COLOR '''
# Dark Brown
pheno["HairColor"][pheno["HairColor"] == "Dark Brown"] = "DarkBrown" 
pheno["HairColor"][pheno["HairColor"] == "Dark brown"] = "DarkBrown" 
pheno["HairColor"][pheno["HairColor"] == "Darkest brown to black"] = "DarkBrown" 
pheno["HairColor"][pheno["HairColor"] == "Dark brown; red highlights"] = "DarkBrown" 
pheno["HairColor"][pheno["HairColor"] == "dark brown"] = "DarkBrown" 
pheno["HairColor"][pheno["HairColor"] == "Dark brown; blonde highlights"] = "DarkBrown"
pheno["HairColor"][pheno["HairColor"] == "Blackish brown"] = "DarkBrown" 
pheno["HairColor"][pheno["HairColor"] == "Brown-black"] = "DarkBrown" 
pheno["HairColor"][pheno["HairColor"] == "Brown-Black"] = "DarkBrown" 

# Brown
pheno["HairColor"][pheno["HairColor"] == "brown"] = "Brown" 
pheno["HairColor"][pheno["HairColor"] == "Medium brown"] = "Brown" 
pheno["HairColor"][pheno["HairColor"] == "Light to Medium brown"] = "Brown" 
pheno["HairColor"][pheno["HairColor"] == "Light to medium brown"] = "Brown" 
pheno["HairColor"][pheno["HairColor"] == "Medium brown with highlights"] = "Brown" 
pheno["HairColor"][pheno["HairColor"] == "Chestnut brown"] = "Brown" 
pheno["HairColor"][pheno["HairColor"] == "Brown and silver"] = "Brown" 

# Blonde
pheno["HairColor"][pheno["HairColor"] == "Dirty blonde, light brown, something?"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Blond"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Blond born, today dark brown"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "blond born, today dark brown"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Blond as a child and light brown as an adult"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Blonde as a child, light brown as an adult"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "blonde as a child, to brown as an adult"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Blonde as a child, to brown as an adult"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Hair darkening with age, starting blonde, ending dark brown"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Dirt-brown"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Dirt-blonde"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Dirt-Brown"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Dirt-Blonde"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "Strawberry blonde"] = "Blonde"

# Dark Blonde
pheno["HairColor"][pheno["HairColor"] == "Dark blonde (light brown)"] = "DarkBlonde" 
pheno["HairColor"][pheno["HairColor"] == "Dark blonde with a little of every colour but black."] = "DarkBlonde" 
pheno["HairColor"][pheno["HairColor"] == "Dark blonde as a child, dark brown as an adult"] = "DarkBlonde" 
pheno["HairColor"][pheno["HairColor"] == "Light ashy brown"] = "DarkBlonde" 
pheno["HairColor"][pheno["HairColor"] == "Light brown"] = "DarkBlonde" 
pheno["HairColor"][pheno["HairColor"] == "Blonde to light brown as child, medium brown as adult with blonde highlights from sun"] = "DarkBlonde" 

# Auburn
pheno["HairColor"][pheno["HairColor"] == "Auburn (Reddish-Brown)"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "Auburn (reddish-brown)"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "Strawberry brown"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "medium brown, red highlights"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "Reddish-brown"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "Medium brown, red highlights"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "Red"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "Red (gone blond-grey)"] = "Auburn" 
pheno["HairColor"][pheno["HairColor"] == "red"] = "Auburn"

# Other
pheno["HairColor"][pheno["HairColor"] == "CC"] = "Other" 
pheno["HairColor"][pheno["HairColor"] == "Cc"] = "Other" 
pheno["HairColor"][pheno["HairColor"] == "Born pure white, darkens to near black"] = "Other" 
pheno["HairColor"] = pheno["HairColor"][pheno["HairColor"] != "Other"]

# Fix
pheno["HairColor"][pheno["HairColor"] == "DarkBlonde"] = "Blonde" 
pheno["HairColor"][pheno["HairColor"] == "DarkBrown"] = "Brown" 
#pheno["HairColor"][pheno["HairColor"] == "Auburn"] = "Blonde" 
#pheno["HairColor"][pheno["HairColor"] == "Black"] = "Brown" 

Counter(pheno["HairColor"])

Counter({'Blonde': 134,
         '-': 2562,
         'Brown': 259,
         'Auburn': 35,
         'Black': 40,
         nan: 4})

In [13]:
''' 3. HAIR TYPE '''
# Straight
pheno["HairType"][pheno["HairType"] == "Mostly straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Thick, dense, straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Black, straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Brown, straight, slight wave"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Thick,wavy,straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Brown, straight"] = "Straight"
pheno["HairType"][pheno["HairType"] == "Red, straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "thick,wavy,straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Thick, straight with slight wave that flips outward near chin/neck length, mostly straight, multiple cow licks in back"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Mostly Straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Red, Straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Fine but straight"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Straight but when moist get little bit wavy"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Slightly straighter than average"] = "Straight" 
pheno["HairType"][pheno["HairType"] == "Stick-straight *everywhere* but pubic region"] = "Straight" 

# Curly
pheno["HairType"][pheno["HairType"] == "Fine/brown/curly"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Coarse, thick, curly"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Brown, curly"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "brown, fine, curly"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Thick, slightly curly"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Brown, fine, curly"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Brown, straight,"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Fine, thin, curly"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Thick, curls with length"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Medium, loose curls"] = "Curly" 
pheno["HairType"][pheno["HairType"] == "Mixed straight in some areas, curly in other sections"] = "Curly" 

# Wavy-Curly
pheno["HairType"][pheno["HairType"] == "Wavy-curly"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "Wavy-Curly"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "Large volume of fine curly/wavy hair"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "wavy-curly"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "Thick, brown, wavy/curly"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "Red, wavy-curly, dense "] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "Thick, fine, wavey-to-curly"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "red, wavy-curly, dense "] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "thick, brown, wavy/curly"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "Straight fine until 30's, became wavy to curly after"] = "WavyCurly" 
pheno["HairType"][pheno["HairType"] == "Fine, thin, loose curls as a child, straight with a minor wave as an adult"] = "WavyCurly" 

# Wavy
pheno["HairType"][pheno["HairType"] == "Wavy-curly"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Fine, wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Medium thickness/wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Red, fine, wavy and dense"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Brown, slightly wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Fine, thin, slightly wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Fine, dense, body wave"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Coarse,thick,wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Dense, slightly wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Baby fine/slightly wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "baby fine, slightly wavy, later straight"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Baby fine, slightly wavy, later straight"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "Near black, thick, slightly wavy"] = "Wavy" 
pheno["HairType"][pheno["HairType"] == "near black, thick, slightly wavy"] = "Wavy" 

# Other
pheno["HairType"][pheno["HairType"] == "Rs17646946 gg"] = "Other" 
pheno["HairType"][pheno["HairType"] == "GG"] = "Other" 
pheno["HairType"][pheno["HairType"] == "Thin, fine, thinned with age"] = "Other" 
pheno["HairType"][pheno["HairType"] == "Fine, never gets long, breaks off at the end"] = "Other" 
pheno["HairType"][pheno["HairType"] == "Bald"] = "Other" 
pheno["HairType"] = pheno["HairType"][pheno["HairType"] != "Other"]

pheno["HairType"][pheno["HairType"] == "WavyCurly"] = "Curly" 
#pheno["HairType"][pheno["HairType"] == "Curly"] = "Wavy" 

Counter(pheno["HairType"])

Counter({'Straight': 145, '-': 2650, 'Wavy': 120, 'Curly': 106, nan: 13})

In [14]:
''' 4. TAN ABILITY '''
# No
pheno["TanAbility"][pheno["TanAbility"] == "None"] = "No" 
pheno["TanAbility"][pheno["TanAbility"] == "No tan, just freckles"] = "No" 
pheno["TanAbility"][pheno["TanAbility"] == "No problem with tanning after i corrected vit. d deficiency"] = "No" 
pheno["TanAbility"][pheno["TanAbility"] == "none"] = "No" 
pheno["TanAbility"][pheno["TanAbility"] == "Burn then tan"] = "No" 
pheno["TanAbility"][pheno["TanAbility"] == "Burn "] = "No" 

# Yes
pheno["TanAbility"][pheno["TanAbility"] == "I stay dark year round"] = "Yes" 
pheno["TanAbility"][pheno["TanAbility"] == "Coarse, thick, curly"] = "Yes" 
pheno["TanAbility"][pheno["TanAbility"] == "Brown, curly"] = "Yes" 
pheno["TanAbility"][pheno["TanAbility"] == "I burn in some places and tan in others"] = "Yes" 
pheno["TanAbility"][pheno["TanAbility"] == "Easily deeply tan & rarely burn"] = "Yes" 

# Medium
pheno["TanAbility"][pheno["TanAbility"] == "Moderate"] = "Medium" 
pheno["TanAbility"][pheno["TanAbility"] == "moderate"] = "Medium" 
pheno["TanAbility"][pheno["TanAbility"] == "poor"] = "Medium" 
pheno["TanAbility"][pheno["TanAbility"] == "Poor"] = "Medium" 
pheno["TanAbility"][pheno["TanAbility"] == "Freckled, burn very quickly, a bit of tan"] = "Medium" 

# Other
pheno["TanAbility"][pheno["TanAbility"] == "Had Melanoma"] = "Other" 
pheno["TanAbility"][pheno["TanAbility"] == "Sun sensitivity with history of sun poisoning"] = "Other" 
pheno["TanAbility"] = pheno["TanAbility"][pheno["TanAbility"] != "Other"]

Counter(pheno["TanAbility"])

Counter({'Yes': 112, '-': 2744, 'Medium': 112, 'No': 63, nan: 3})

In [15]:
''' 5. ASTHMA '''
# No
pheno["Asthma"][pheno["Asthma"] == "None"] = "No" 
pheno["Asthma"][pheno["Asthma"] == "False"] = "No" 
pheno["Asthma"][pheno["Asthma"] == "no"] = "No" 
pheno["Asthma"][pheno["Asthma"] == "No (but i'm not sure)"] = "No"

# Yes
pheno["Asthma"][pheno["Asthma"] == "Asthma, secondary to allergies"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Late onset asthma"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Chronic asthma"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Cold-induced, worse when younger"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Asthmatic, "] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "slight"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Chronic Asthma"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Asthma, mostly during childhood"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Chronic Asthma, Allergen & Cold Induced"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "cough variant asthma"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Slight"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Chronic Asthma"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Chronic asthma, allergen & cold induced"] = "Yes" 
pheno["Asthma"][pheno["Asthma"] == "Exercise-induced"] = "Yes" 

# Childhood ??
pheno["Asthma"][pheno["Asthma"] == "Asthma, mostly during Childhood"] = "Childhood" 
pheno["Asthma"][pheno["Asthma"] == "Asthma, mostly during childhood"] = "Childhood" 
pheno["Asthma"][pheno["Asthma"] == "Childhood Asthma; No asthma as an adult"] = "Childhood" 
pheno["Asthma"][pheno["Asthma"] == "Allergy induced, mainly when younger"] = "Childhood" 
pheno["Asthma"][pheno["Asthma"] == "Had as a child, not anymore"] = "Childhood" 
pheno["Asthma"][pheno["Asthma"] == "Childhood asthma; no asthma as an adult"] = "Childhood" 
pheno["Asthma"][pheno["Asthma"] == "Severely asthmatic as a child, mildly as an adult (allergy and weather induced)"] = "Childhood" 

pheno["Asthma"][pheno["Asthma"] == "Childhood"] = "Yes" 

Counter(pheno["Asthma"])

Counter({'No': 129, '-': 2808, 'Yes': 97})

In [16]:
''' 6. LACTOSE INTOLERANCE '''
# Tolerant
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Lactose-tolerant"] = "Tolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Lactose tolerant"] = "Tolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "lactose-tolerant"] = "Tolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "lactose tolerant"] = "Tolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "False"] = "Tolerant" 

# Intolerant
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Partially lactose intolerant - surfaces with a lot of dairy products in one day"] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Lactose-intolerant"] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "lactose-intolerant"] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Lactose intolerant"] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Genetically intolerant but drink raw milk and eat lots of dairy."] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Slight lactose intolerance"] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == " Allergic to all forms of dairy "] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Genetically intolerant, partial intolerance."] = "Intolerant" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == " allergic to all forms of dairy "] = "Intolerant" 

# Other
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "AA"] = "Other" 
pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] == "Genetically tolerant-extrememly lactose intolerant"] = "Other" 
pheno["LactoseIntolerance"] = pheno["LactoseIntolerance"][pheno["LactoseIntolerance"] != "Other"]

Counter(pheno["LactoseIntolerance"])

Counter({'Tolerant': 235, 'Intolerant': 112, '-': 2683, nan: 4})

# Rh Protein

In [17]:
# +
pheno["RhProtein"][pheno["RhProtein"] == "A+ Secretor / Saliva Non Secretor!!"] = "Positive"
pheno["RhProtein"][pheno["RhProtein"] == "A+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "B+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "0+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "AB+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "b+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "A+ (A/O) "] = "Positive" 

pheno["RhProtein"][pheno["RhProtein"] == "O +"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "A+ (a/o) "] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "ABO  Kidd:  AG \tJk(a+b+)"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "A+ (AO/+-) Non-Secretor"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "A2B+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "Ab+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "A+ (ao/+-) non-secretor"] = "Positive" 

pheno["RhProtein"][pheno["RhProtein"] == "O/o +/-"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "AO/O +/-"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "O+"] = "Positive" 
pheno["RhProtein"][pheno["RhProtein"] == "O/O +/-"] = "Positive" 

# -
pheno["RhProtein"][pheno["RhProtein"] == "Ab-"] = "Negative" 
pheno["RhProtein"][pheno["RhProtein"] == "B-"] = "Negative" 
pheno["RhProtein"][pheno["RhProtein"] == "A-"] = "Negative" 
pheno["RhProtein"][pheno["RhProtein"] == "Rh-"] = "Negative" 
pheno["RhProtein"][pheno["RhProtein"] == "o rh negative"] = "Negative" 
pheno["RhProtein"][pheno["RhProtein"] == "O-"] = "Negative" 

pheno["RhProtein"][pheno["RhProtein"] == "Dont know"] = "Other" 
pheno["RhProtein"] = pheno["RhProtein"][pheno["RhProtein"] != "Other"]

Counter(pheno["RhProtein"])

Counter({'Positive': 240, '-': 2721, 'Negative': 71, nan: 2})

# Blood Type

In [18]:
# A
pheno["BloodType"][pheno["BloodType"] == "A+ Secretor / Saliva Non Secretor!!"] = "A" 
pheno["BloodType"][pheno["BloodType"] == "A+ (A/O) "] = "A" 
pheno["BloodType"][pheno["BloodType"] == "A+ (a/o) "] = "A" 
pheno["BloodType"][pheno["BloodType"] == "A+ (ao/+-) non-secretor"] = "A" 
pheno["BloodType"][pheno["BloodType"] == "A+ (AO/+-) Non-Secretor"] = "A" 
pheno["BloodType"][pheno["BloodType"] == "A-"] = "A" 
pheno["BloodType"][pheno["BloodType"] == "A+"] = "A" 

# B
pheno["BloodType"][pheno["BloodType"] == "b+"] = "B" 
pheno["BloodType"][pheno["BloodType"] == "B-"] = "B" 
pheno["BloodType"][pheno["BloodType"] == "B+"] = "B" 

# 0
pheno["BloodType"][pheno["BloodType"] == "O/O +/-"] = "O" 
pheno["BloodType"][pheno["BloodType"] == "0+"] = "O" 
pheno["BloodType"][pheno["BloodType"] == "O/o +/-"] = "O" 
pheno["BloodType"][pheno["BloodType"] == "O+"] = "O" 
pheno["BloodType"][pheno["BloodType"] == "o rh negative"] = "O" 
pheno["BloodType"][pheno["BloodType"] == "0-"] = "O" 
pheno["BloodType"][pheno["BloodType"] == "O-"] = "O" 
pheno["BloodType"][pheno["BloodType"] == "O +"] = "O" 

# AB
pheno["BloodType"][pheno["BloodType"] == "Ab+"] = "AB" 
pheno["BloodType"][pheno["BloodType"] == "A2B+"] = "AB" 
pheno["BloodType"][pheno["BloodType"] == "Ab-"] = "AB"
pheno["BloodType"][pheno["BloodType"] == "AB+"] = "AB" 
pheno["BloodType"][pheno["BloodType"] == "AB-"] = "AB-" 
pheno["BloodType"][pheno["BloodType"] == "ABO  Kidd:  AG \tJk(a+b+)"] = "AB" 
pheno["BloodType"][pheno["BloodType"] == "AB"] = "B" 

# TODO
#Counter(pheno["BloodType2"].values)

# Other
pheno["BloodType"][pheno["BloodType"] == "Dont know"] = "Other"
pheno["BloodType"][pheno["BloodType"] == "Rh-"] = "Other" 
pheno["BloodType"] = pheno["BloodType"][pheno["BloodType"] != "Other"]

Counter(pheno["BloodType"])

Counter({'A': 114, '-': 2721, 'B': 66, 'O': 130, nan: 3})

In [19]:
''' 8. EARWAX '''
# Wet
pheno["EarWax"][pheno["EarWax"] == "C/T wet"] = "Wet" 

# Dry
pheno["EarWax"][pheno["EarWax"] == "Wet-Dry (hetero)"] = "Mixed"
pheno["EarWax"][pheno["EarWax"] == "Wet, dry"] = "Mixed"
pheno["EarWax"][pheno["EarWax"] == "Varies with humidity/amt. of ambient dust (heterozygous)"] = "Mixed"
pheno["EarWax"][pheno["EarWax"] == "Wet-dry (hetero)"] = "Mixed"

# Other
pheno["EarWax"][pheno["EarWax"] == "Cc"] = "Other"
pheno["EarWax"][pheno["EarWax"] == "CC"] = "Other" 
pheno["EarWax"] = pheno["EarWax"][pheno["EarWax"] != "Other"]

Counter(pheno["EarWax"])

Counter({'Wet': 180, '-': 2786, 'Mixed': 36, nan: 4, 'Dry': 28})

In [20]:
''' 9. FRECKLING '''
# No
pheno["Freckling"][pheno["Freckling"] == "None on face - many on shoulders"] = "No" 
pheno["Freckling"][pheno["Freckling"] == "None"] = "No" 
pheno["Freckling"][pheno["Freckling"] == "none"] = "No" 

# Light
pheno["Freckling"][pheno["Freckling"] == "Light freckles all over, more when exposed to sun."] = "Light"
pheno["Freckling"][pheno["Freckling"] == "Light, more as a child"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "Some"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "some"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "Tiny amount"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "Slight"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "tiny amount"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "light, more as a child"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "I haven´t ,only in hand-light"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "Freckles only with sun"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "i haven´t ,only in hand-light"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "Light reddish freckling with sun"] = "Light"
pheno["Freckling"][pheno["Freckling"] == "Very few freckles and the ones i do have are not visible"] = "Light"

# Yes
pheno["Freckling"][pheno["Freckling"] == "Moderate"] = "Yes"
pheno["Freckling"][pheno["Freckling"] == "Across face, across arms, some light on shoulders. had more in childhood."] = "Yes"
pheno["Freckling"][pheno["Freckling"] == "Across face, across arms, some light on shoulders. Had more in childhood."] = "Yes"
pheno["Freckling"][pheno["Freckling"] == "Extensive"] = "Yes"
pheno["Freckling"][pheno["Freckling"] == "Not the cute kinds "] = "Yes"
pheno["Freckling"][pheno["Freckling"] == "Heavy on face, light on shoulders"] = "Yes"
pheno["Freckling"][pheno["Freckling"] == "Heavy"] = "Yes"
pheno["Freckling"][pheno["Freckling"] == "Medium"] = "Yes"

# Other
pheno["Freckling"][pheno["Freckling"] == "Ct"] = "Other"
pheno["Freckling"][pheno["Freckling"] == "CT"] = "Other"
pheno["Freckling"] = pheno["Freckling"][pheno["Freckling"] != "Other"]

#pheno["Freckling"][pheno["Freckling"] == "Light"] = "Yes"

Counter(pheno["Freckling"])

Counter({'No': 99, '-': 2755, 'Light': 111, nan: 2, 'Yes': 67})

In [21]:
''' 10. TONGUE ROLLER '''
# No
pheno["TongueRoller"][pheno["TongueRoller"] == "Non-roller"] = "No" 
pheno["TongueRoller"][pheno["TongueRoller"] == "Non-Roller"] = "No" 
pheno["TongueRoller"][pheno["TongueRoller"] == "No, but my siblings (7) can roll"] = "No" 
pheno["TongueRoller"][pheno["TongueRoller"] == "Non "] = "No" 
pheno["TongueRoller"][pheno["TongueRoller"] == "No, but parent and a sibling can."] = "No" 

# Yes
pheno["TongueRoller"][pheno["TongueRoller"] == "Tongue roller"] = "Yes"
pheno["TongueRoller"][pheno["TongueRoller"] == "Roller"] = "Yes"
pheno["TongueRoller"][pheno["TongueRoller"] == "Tongue folder"] = "Yes"
pheno["TongueRoller"][pheno["TongueRoller"] == "Tongue roller, can fold tongue into 3-leaf clover shape"] = "Yes"
pheno["TongueRoller"][pheno["TongueRoller"] == "roller"] = "Yes"

Counter(pheno["TongueRoller"])

Counter({'Yes': 353, '-': 2600, 'No': 81})

In [22]:
''' 11. BEARD COLOR '''
# Brown
pheno["BeardColor"][pheno["BeardColor"] == "Brown-grey"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red-brown"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Was red-brown now mixed with gray,"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "red-brown"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red-blonde-brown-black(in diferent parts i have different color,for example near the lips blond-red"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red-Blonde-Brown-Black(in diferent parts i have different color,for example near the lips blond-red"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Was red-brown now mixed with gray,"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Brown-gray"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Brown, some red"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "was red-brown now mixed with gray,"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red-blonde-brown-black"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red-blonde-brown"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red-Blonde-Brown-Black"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red"] = "Brown"
pheno["BeardColor"][pheno["BeardColor"] == "Red, moustache still is, beard mostly white"] = "Brown"

# Dark Brown
pheno["BeardColor"][pheno["BeardColor"] == "Brown-black"] = "DarkBrown"
pheno["BeardColor"][pheno["BeardColor"] == "Brown-black"] = "DarkBrown"
pheno["BeardColor"][pheno["BeardColor"] == "Dark brown"] = "DarkBrown"
pheno["BeardColor"][pheno["BeardColor"] == "Dark brown with minor blondish-red"] = "DarkBrown"
pheno["BeardColor"][pheno["BeardColor"] == "Dark Brown w/few Blonde & Red Hairs"] = "DarkBrown"
pheno["BeardColor"][pheno["BeardColor"] == "dark brown with minor blondish-red"] = "DarkBrown"
pheno["BeardColor"][pheno["BeardColor"] == "Dark brown w/few blonde & red hairs"] = "DarkBrown"
pheno["BeardColor"][pheno["BeardColor"] == "Black-brown-blonde"] = "DarkBrown"

# Black
pheno["BeardColor"][pheno["BeardColor"] == "black with few white hairs"] = "Black"
pheno["BeardColor"][pheno["BeardColor"] == "Black with few red hairs"] = "Black"
pheno["BeardColor"][pheno["BeardColor"] == "Black with few white hairs"] = "Black"
pheno["BeardColor"][pheno["BeardColor"] == "Black, graying"] = "Black"

# Blonde
pheno["BeardColor"][pheno["BeardColor"] == "Blonde/brown-some black-and red on chin-all starting to gray"] = "Blonde"
pheno["BeardColor"][pheno["BeardColor"] == "blonde-brown"] = "Blonde"
pheno["BeardColor"][pheno["BeardColor"] == "Blond-brown"] = "Blonde"
pheno["BeardColor"][pheno["BeardColor"] == "blonde-brown"] = "Blonde"
pheno["BeardColor"][pheno["BeardColor"] == "Blonde-brown"] = "Blonde"
pheno["BeardColor"][pheno["BeardColor"] == "Dark blonde with red and light blonde on goatee area."] = "Blonde"

# Other
pheno["BeardColor"][pheno["BeardColor"] == "every possible color, most hair shafts have more than one color at different points along the shaft"] = "Other"
pheno["BeardColor"][pheno["BeardColor"] == "No beard-female"] = "Other"
pheno["BeardColor"][pheno["BeardColor"] == "never had a beard"] = "Other"
pheno["BeardColor"][pheno["BeardColor"] == "I'm a woman"] = "Other"
pheno["BeardColor"][pheno["BeardColor"] == "i'm a woman"] = "Other"
pheno["BeardColor"][pheno["BeardColor"] == "never had a beard"] = "Other"
pheno["BeardColor"][pheno["BeardColor"] == "Never had a beard"] = "Other"

pheno["BeardColor"] = pheno["BeardColor"][pheno["BeardColor"] != "Other"]

Counter(pheno["BeardColor"])

Counter({'Blonde': 16,
         '-': 2854,
         nan: 41,
         'Brown': 75,
         'DarkBrown': 28,
         'Black': 20})

In [23]:
''' 12. RING FINGER '''
# No
pheno["RingFinger"][pheno["RingFinger"] == "Equal"] = "No" 
pheno["RingFinger"][pheno["RingFinger"] == "equal"] = "No" 
pheno["RingFinger"][pheno["RingFinger"] == "Left hand: yes / right hand: equal"] = "No" 

# Yes
pheno["RingFinger"][pheno["RingFinger"] == "yes"] = "Yes"
pheno["RingFinger"][pheno["RingFinger"] == "Both ring fingers are longer than my index fingers."] = "Yes"
pheno["RingFinger"][pheno["RingFinger"] == "Both ring fingers are longer than my index fingers, but my right ring finger is 1-2 mm longer than the left one."] = "Yes"

Counter(pheno["RingFinger"])

Counter({'Yes': 174, '-': 2766, 'No': 94})

In [24]:
''' 13. INTOLERANCE '''
# NoIntolerance
pheno["Intolerance"][pheno["Intolerance"] == "no intolerance"] = "NoIntolerance" 
pheno["Intolerance"][pheno["Intolerance"] == "No intolerance"] = "NoIntolerance" 
pheno["Intolerance"][pheno["Intolerance"] == "rs2300753 CC and no intolerance"] = "NoIntolerance" 
pheno["Intolerance"][pheno["Intolerance"] == "Rs2300753 CC and no intolerance"] = "NoIntolerance" 
pheno["Intolerance"][pheno["Intolerance"] == "Rs2300753 cc and no intolerance"] = "NoIntolerance" 
pheno["Intolerance"][pheno["Intolerance"] == "rs4988235 aa no intolerance"] = "NoIntolerance" 
pheno["Intolerance"][pheno["Intolerance"] == "No but close family member with gluten intolerance"] = "NoIntolerance" 

# Intolerant to smt
pheno["Intolerance"][pheno["Intolerance"] == "Soy Intolerant"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Gluten & casein intolerant, soy tolerant"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Gluten, casein, and soy intolerant"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Casein intolerance"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Gluten intolerance"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "gluten & casein intolerant, soy tolerant"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Gluten, soy and corn intolerant "] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Lactose"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Soy intolerant"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Wheat intolerant"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Celiac, casein intolerant, soy intolerant"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "Possible celiac"] = "Intolerant"
pheno["Intolerance"][pheno["Intolerance"] == "possible celiac"] = "Intolerant"

# Other
pheno["Intolerance"][pheno["Intolerance"] == "Rs2300753 cc"] = "Other"
pheno["Intolerance"][pheno["Intolerance"] == " rs4988235 AG likely to be tolerant"] = "Other"
pheno["Intolerance"][pheno["Intolerance"] == "rs2300753 CC"] = "Other"
pheno["Intolerance"][pheno["Intolerance"] == "rs2300753 CC"] = "Other"
pheno["Intolerance"][pheno["Intolerance"] == "rs2300753 CC"] = "Other"
pheno["Intolerance"] = pheno["Intolerance"][pheno["Intolerance"] != "Other"]

Counter(pheno["Intolerance"])

Counter({'NoIntolerance': 105, '-': 2889, nan: 9, 'Intolerant': 31})

In [25]:
''' 14. COLOR BLINDNESS '''
# No
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Not colour blind"] = "No"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Not color blind"] = "No"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "False"] = "No"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "not color blind"] = "No"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "carrier (eg: my brother and my son have colour blindness)"] = "No"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Carrier (eg: my brother and my son have colour blindness)"] = "No"

# Yes
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Mild red-green colour blind"] = "Yes"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Red-green colour blind"] = "Yes"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "low light blue green colour blindness"] = "Yes"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Tritanomaly"] = "Yes"
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Mild blue-black colour blind"] = "Yes"

# Other
pheno["ColorBlindness"][pheno["ColorBlindness"] == "Not colour blind, but difficulty telling red/orange/pink apart sometimes"] = "No"
pheno["ColorBlindness"] = pheno["ColorBlindness"][pheno["ColorBlindness"] != "Other"]

Counter(pheno["ColorBlindness"])

Counter({'No': 351, '-': 2664, 'Yes': 19})

# Handedness

In [26]:
# Left
pheno["Handedness"][pheno["Handedness"] == "left-handed"] = "Left"
pheno["Handedness"][pheno["Handedness"] == "Left-handed"] = "Left"
pheno["Handedness"][pheno["Handedness"] == "Left-handed in Hockey and Dominant left handed clapping; everything else right handed"] = "Left"
pheno["Handedness"][pheno["Handedness"] == "Left-handed for small motor movement, writing. right-handed for large motor movement, throwing"] = "Left"
pheno["Handedness"][pheno["Handedness"] == "Left-handed in hockey and dominant left handed clapping; everything else right handed"] = "Left"
pheno["Handedness"][pheno["Handedness"] == "Left-handed trained right handed as child"] = "Left"
pheno["Handedness"][pheno["Handedness"] == "Was born left handed but forced to be right"] = "Left"
pheno["Handedness"][pheno["Handedness"] == "Left-handed dominant, right-handed/ambidextrous  for sport."] = "Left"

# Right
pheno["Handedness"][pheno["Handedness"] == "right-handed"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Right-handed"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Right handed"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "right handed"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Ambidexterous, write with Right Hand"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "right"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Right handed. Left footed."] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Right handed. left footed."] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Primarily right-handed but for active, more detailed activities (e.g. writing, drawing, using scissors, etc.); less precise, slow, and less focused movements are done with my left hand (e.g. driving/steering, pulling/pushing to open doors, etc.)"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Right-handed - but play cards, pay at cash registers left-handed "] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Right-handed but throw ball overarm with my left"] = "Right"
pheno["Handedness"][pheno["Handedness"] == "Right hand dominant; play multiple musical instruments,guitar, banjo, mandolin;baseball,switch hitter; ;fine motor skills ambidextrous"] = "Right"

# Ambidexter
pheno["Handedness"][pheno["Handedness"] == "ambidexterous"] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Ambidexterous"] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Ambidexterous, write with right hand"] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Right-handed dominant with left for computer mouse"] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Right-handed. hit left-handed in baseball & golf"] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Right-handed, but slightly ambidextrous."] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Right-handed, but ambidexterous in sports, music, filing, cutting, etc."] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Mixed-handed (right for writing / left for other)"] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Cross-dominant, favouring right side somewhat more"] = "Ambidexter"
pheno["Handedness"][pheno["Handedness"] == "Right hand dominant but ambidextrous and \"goofy-footed\" in sports/larger motor movement"] = "Ambidexter"

# Other
pheno["Handedness"][pheno["Handedness"] == "70%"] = "Other"
pheno["Handedness"][pheno["Handedness"] == "Right-handed dominant, left-handed for eating, riding and sport when younger."] = "Other"
pheno["Handedness"] = pheno["Handedness"][pheno["Handedness"] != "Other"]

Counter(pheno["Handedness"].values)

Counter({'Left': 96, 'Right': 554, '-': 2328, 'Ambidexter': 54, nan: 2})

# WidowPeak

In [27]:
# Yes
pheno["WidowPeak"][pheno["WidowPeak"] == "Slight"] = "Yes"
pheno["WidowPeak"][pheno["WidowPeak"] == "yes"] = "Yes"
# No
pheno["WidowPeak"] = pheno["WidowPeak"][pheno["WidowPeak"] != "Other"]

Counter(pheno["WidowPeak"].values)

Counter({'-': 2858, 'No': 117, 'Yes': 59})

# ADHD

In [28]:
# Yes
pheno["ADHD"][pheno["ADHD"] == "True"] = "Yes"
pheno["ADHD"][pheno["ADHD"] == "Diagnosed as \"other hyperkinetic disorder\""] = "Yes"
pheno["ADHD"][pheno["ADHD"] == "Undiagnosed, but probably true"] = "Yes"
pheno["ADHD"][pheno["ADHD"] == "yes"] = "Yes"

# No
pheno["ADHD"][pheno["ADHD"] == "no"] = "No"
pheno["ADHD"][pheno["ADHD"] == "False"] = "No"

# Other
pheno["ADHD"][pheno["ADHD"] == "rs1801260"] = "Other"
pheno["ADHD"][pheno["ADHD"] == "Rs1801260"] = "Other"
pheno["ADHD"][pheno["ADHD"] == "Mthfr c677t"] = "Other"
pheno["ADHD"][pheno["ADHD"] == "MTHFR C677T"] = "Other"
pheno["ADHD"][pheno["ADHD"] == "Adult Onset"] = "Other"
pheno["ADHD"][pheno["ADHD"] == "Not diagnosed"] = "Other"

pheno["ADHD"] = pheno["ADHD"][pheno["ADHD"] != "Other"]

Counter(pheno["ADHD"].values)

Counter({'No': 68, '-': 2862, 'Yes': 86, nan: 18})

# Acrophobia 

In [29]:
# Yes
pheno["Acrophobia"][pheno["Acrophobia"] == "True"] = "Yes"
pheno["Acrophobia"][pheno["Acrophobia"] == "Slightly"] = "Yes"
pheno["Acrophobia"][pheno["Acrophobia"] == "Medium"] = "Yes"
pheno["Acrophobia"][pheno["Acrophobia"] == "True"] = "Yes"
pheno["Acrophobia"][pheno["Acrophobia"] == "True"] = "Yes"

# No
pheno["Acrophobia"][pheno["Acrophobia"] == "no"] = "No"
pheno["Acrophobia"][pheno["Acrophobia"] == "Afraid of falling."] = "No"
pheno["Acrophobia"][pheno["Acrophobia"] == "No as a child - fear of falling from heights gradually got (a lot) worse as i aged"] = "No"
pheno["Acrophobia"][pheno["Acrophobia"] == "Counterphobic (skydiver)"] = "No"
pheno["Acrophobia"][pheno["Acrophobia"] == "Counterphobic (tree and roof-climber, as a kid)"] = "No"
pheno["Acrophobia"][pheno["Acrophobia"] == "Counterphobic (Skydiver)"] = "No"

# Other
pheno["Acrophobia"][pheno["Acrophobia"] == "(supposedly) a precocious infant climber, rock-climbed through my 30s, now somewhat vertiginous"] = "Other"

pheno["Acrophobia"] = pheno["Acrophobia"][pheno["Acrophobia"] != "Other"]

Counter(pheno["Acrophobia"].values)

Counter({'No': 93, '-': 2878, 'Yes': 62, nan: 1})

# Finger Hair

In [30]:
# Yes
pheno["FingerHair"][pheno["FingerHair"] == "slight"] = "Yes"
pheno["FingerHair"][pheno["FingerHair"] == "yes"] = "Yes"
pheno["FingerHair"][pheno["FingerHair"] == "Slight"] = "Yes"
pheno["FingerHair"][pheno["FingerHair"] == "not visible but is there, slightly"] = "Yes"
pheno["FingerHair"][pheno["FingerHair"] == "Not visible but is there, slightly"] = "Yes"

# No
pheno["FingerHair"][pheno["FingerHair"] == "No hair from top of finger to second knuckle."] = "No"
pheno["FingerHair"][pheno["FingerHair"] == "no"] = "No"

pheno["FingerHair"] = pheno["FingerHair"][pheno["FingerHair"] != "Other"]

Counter(pheno["FingerHair"].values)

Counter({'No': 57, '-': 2879, 'Yes': 98})

# Myopia

In [31]:
# Low
pheno["Myopia"][pheno["Myopia"] == "low"] = "Low"
pheno["Myopia"][pheno["Myopia"] == "False"] = "Low"
pheno["Myopia"][pheno["Myopia"] == "normal"] = "Low"
pheno["Myopia"][pheno["Myopia"] == "20/15 vision"] = "Low"

# Medium
pheno["Myopia"][pheno["Myopia"] == "medium"] = "Medium"
pheno["Myopia"][pheno["Myopia"] == "Moderate"] = "Medium"
pheno["Myopia"][pheno["Myopia"] == "20/25"] = "Medium"
pheno["Myopia"][pheno["Myopia"] == "yes"] = "Medium"

# High
pheno["Myopia"][pheno["Myopia"] == "High"] = "High"
pheno["Myopia"][pheno["Myopia"] == "20/40 corrected  so modeately high"] = "High"
pheno["Myopia"][pheno["Myopia"] == "At age 40"] = "High"
pheno["Myopia"][pheno["Myopia"] == "High in one eye (right)"] = "High"
pheno["Myopia"][pheno["Myopia"] == "myopia age 10"] = "High"
pheno["Myopia"][pheno["Myopia"] == "Myopia age 10"] = "High"
pheno["Myopia"][pheno["Myopia"] == "20/300"] = "High"
pheno["Myopia"][pheno["Myopia"] == "Yes"] = "High"

# Other
pheno["Myopia"][pheno["Myopia"] == "Adult onset; disease-related"] = "Other"
pheno["Myopia"][pheno["Myopia"] == "Don't know- Had cararacts removed"] = "Other"
pheno["Myopia"][pheno["Myopia"] == "Don't know- had cararacts removed"] = "Other"

pheno["Myopia"] = pheno["Myopia"][pheno["Myopia"] != "Other"]
Counter(pheno["Myopia"].values)

Counter({'Low': 57, '-': 2879, 'Medium': 38, 'High': 57, nan: 3})

# Irritable Bowel Syndrome

In [32]:
# Yes
pheno["IrritableBowel"][pheno["IrritableBowel"] == "Yes (occasionally, under stress)"] = "Yes"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "constipation and bloating"] = "Yes"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "Constipation and bloating"] = "Yes"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "before 10ft colon resection, yes"] = "Yes"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "Yes (mild)"] = "Yes"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "Yes (severe)"] = "Yes"

# No
pheno["IrritableBowel"][pheno["IrritableBowel"] == "No issues unless i consume wheat, gluten, dairy and sugar."] = "No"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "No if i cut out all wheat, gluten and sugar"] = "No"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "No issues unless i consume wheat, gluten, dairy and sugar."] = "No"
pheno["IrritableBowel"][pheno["IrritableBowel"] == "No if I cut out all wheat, gluten and sugar"] = "No"

# Other
pheno["IrritableBowel"] = pheno["IrritableBowel"][pheno["WidowPeak"] != "Other"]

Counter(pheno["IrritableBowel"].values)

Counter({'-': 2892, 'Yes': 73, 'No': 69})

# IndexLongerBig

In [33]:
# Index Longer
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "index toe longer"] = "IndexLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Index Toe Larger Than Big Toe"] = "IndexLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Index toe longer"] = "IndexLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Index toe larger than big toe"] = "IndexLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Yes"] = "IndexLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "2nd toe longer"] = "IndexLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Left index longer than big toe"] = "IndexLonger"

# Big Longer
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "No"] = "BigLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "no"] = "BigLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Big Toe Larger then Second or Index Toe"] = "BigLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Toe size in order big thru little"] = "BigLonger"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Big toe larger then second or index toe"] = "BigLonger"
   
# Other
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Right side index longer, left sidebig toe longer"] = "Other"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Index toe the same height as big toe"] = "Other"
pheno["IndexLongerBig"][pheno["IndexLongerBig"] == "Morton’s neuroma"] = "Other"

pheno["IndexLongerBig"] = pheno["IndexLongerBig"][pheno["IndexLongerBig"] != "Other"]

Counter(pheno["IndexLongerBig"].values)

Counter({'BigLonger': 56, '-': 2895, 'IndexLonger': 75, nan: 8})

# Photoptarmis

In [34]:
# Yes
pheno["Photoptarmis"][pheno["Photoptarmis"] == "photic sneezer"] = "Yes"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "photic sneezer - but do not have snp"] = "Yes"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Instant sneezing using pepper on my plate"] = "Yes"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Photic sneezer - but do not have snp"] = "Yes"

pheno["Photoptarmis"][pheno["Photoptarmis"] == "Sometimes"] = "Yes"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Photic sneezer"] = "Yes"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Photic sneezer with the snp"] = "Yes"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Photic sneezer"] = "Yes"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Photic sneezer"] = "Yes"

# No
pheno["Photoptarmis"][pheno["Photoptarmis"] == "no sneezing"] = "No"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "I have the snp but no sneezing"] = "No"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Not in light but do have sneezing fits on occasion"] = "No"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "No sneezing"] = "No"


# Other
pheno["Photoptarmis"][pheno["Photoptarmis"] == "CC"] = "Other"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "rs10427255"] = "Other"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Rs10427255"] = "Other"
pheno["Photoptarmis"][pheno["Photoptarmis"] == "Bright light helps control my sneezing (moves it along) but not an uncontrollable source of."] = "Other"


pheno["Photoptarmis"] = pheno["Photoptarmis"][pheno["Photoptarmis"] != "Other"]

Counter(pheno["Photoptarmis"].values)

Counter({'No': 73, '-': 2897, 'Yes': 60, nan: 4})

# Migraine

In [35]:
# Yes
pheno["Migraine"][pheno["Migraine"] == "yes, basilar"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "Yes, without aura"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "Yes, both with and without aura"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "yes, both with and without aura"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "Yes, hemiplegic with aura"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "yes, hemiplegic with aura"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "Yes, always with aura"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "Vestibular Migraine, with and without aura"] = "Yes"
pheno["Migraine"][pheno["Migraine"] == "Vestibular migraine, with and without aura"] = "Yes"

# No
pheno["Migraine"][pheno["Migraine"] == "no"] = "No"

# Other
pheno["Migraine"][pheno["Migraine"] == "Not sure it really is a migraine"] = "Other"
pheno["Migraine"][pheno["Migraine"] == "Occasional"] = "Other"

pheno["Migraine"] = pheno["Migraine"][pheno["Migraine"] != "Other"]

Counter(pheno["Migraine"].values)

Counter({'No': 58, '-': 2900, 'Yes': 71, nan: 5})

In [36]:
pheno.fillna("-", inplace=True)
pheno.index = pheno.index.astype(str)

In [37]:
pheno.to_pickle(join(beacons, "OpenSNP_Phenotype.pickle"))
with open(join(beacons, "OpenSNP_Phenotype.pickle"), 'rb') as handle:
    pheno = pickle.load(handle)