In [None]:
library(ggplot2)
library(plyr)
library(tidyverse)
library(GridLMM)
library(snpStats)
library(qq)
library(lme4qtl)
library(sjstats)
library(wesanderson)

In [None]:
getwd()

In [None]:
traits <- c('movement_count','mean_movement_length','speed_in_movement_mean','dist_travelled')

pheno_df_ts <- read.csv("pheno_df_ts_f1s.csv")
parents_df <- read.csv("pheno_df_ts_parents.csv")
pheno_df_36 <- read.csv("pheno_df_36.csv")

In [None]:
grm <- as.matrix(read.table("female_parents.biSNP.sing.HW.gatk.GQfilter.MAF05.w100s1r08.rel"))
colnames(grm) <- rownames(grm) <- read.table("female_parents.biSNP.sing.HW.gatk.GQfilter.MAF05.w100s1r08.rel.id")[,2]

f1_grm <- grm[grep("F1",colnames(grm)),grep("F1",colnames(grm))]
p_grm <- grm[-grep("F1",colnames(grm)),-grep("F1",colnames(grm))]

f1s <- grep("_S",row.names(grm))
parental_grm <- as.matrix(grm)

family <- sapply(row.names(parental_grm),FUN=function(x){strsplit(x,"_")[[1]][4]})
row.names(parental_grm) <- colnames(parental_grm) <- family

In [None]:
pheno_df_ts <- pheno_df_ts[pheno_df_ts$Family %in% colnames(parental_grm),]


#Filtering as needed
combined_filtered_df <- pheno_df_ts
combined_filtered_df = combined_filtered_df[which(combined_filtered_df$Empty == F),]
combined_filtered_df = combined_filtered_df[which(!is.na(combined_filtered_df$Treatment)),]
combined_filtered_df = combined_filtered_df[which(!is.na(combined_filtered_df$Batch)),]
# Account for framerate
combined_filtered_df$dist_travelled <- combined_filtered_df$dist_travelled/10

#Ensure the flies still move some -- no dead flies!
# combined_filtered_df = combined_filtered_df[which(combined_filtered_df$movement_count > 10),]
# combined_filtered_df = combined_filtered_df[which(combined_filtered_df$movement_count < 10000),]
# combined_filtered_df = combined_filtered_df[which(combined_filtered_df$mean_of_mean_in_movement_speed < 100),]
combined_filtered_df = combined_filtered_df[combined_filtered_df$vel_par_in_movement_mean > 5 & combined_filtered_df$vel_par_in_movement_mean < 100,]
# combined_filtered_df <- combined_filtered_df[which(combined_filtered_df$speed_in_movement_var < 500),]
# combined_filtered_df <- combined_filtered_df[which(combined_filtered_df$mean_mov < 20000),]


pheno_df_ts <- combined_filtered_df

#Append meaningful individual ID
pheno_df_ts$indiv_id <- paste0(pheno_df_ts$Batch,"_",pheno_df_ts$Family,"_",pheno_df_ts$Well_orderAsTracked)

In [None]:
pheno_df_36 <- pheno_df_ts[pheno_df_36$Family %in% colnames(parental_grm),]


#Filtering as needed
combined_filtered_df <- pheno_df_36
combined_filtered_df = combined_filtered_df[which(combined_filtered_df$Empty == F),]
combined_filtered_df = combined_filtered_df[which(!is.na(combined_filtered_df$Treatment)),]
combined_filtered_df = combined_filtered_df[which(!is.na(combined_filtered_df$Batch)),]
# Account for framerate
combined_filtered_df$dist_travelled <- combined_filtered_df$dist_travelled/10

#Ensure the flies still move some -- no dead flies!
# combined_filtered_df = combined_filtered_df[which(combined_filtered_df$movement_count > 10),]
# combined_filtered_df = combined_filtered_df[which(combined_filtered_df$movement_count < 10000),]
# combined_filtered_df = combined_filtered_df[which(combined_filtered_df$mean_of_mean_in_movement_speed < 100),]
combined_filtered_df = combined_filtered_df[combined_filtered_df$vel_par_in_movement_mean > 5 & combined_filtered_df$vel_par_in_movement_mean < 100,]
# combined_filtered_df <- combined_filtered_df[which(combined_filtered_df$speed_in_movement_var < 500),]
# combined_filtered_df <- combined_filtered_df[which(combined_filtered_df$mean_mov < 20000),]


pheno_df_36 <- combined_filtered_df

#Append meaningful individual ID
pheno_df_36$indiv_id <- paste0(pheno_df_36$Batch,"_",pheno_df_36$Family,"_",pheno_df_36$Well_orderAsTracked)

In [None]:
path <- paste("female_parents.biSNP.sing.HW.gatk.GQfilter.MAF05.w100s1r08", c(".bed", ".bim", ".fam"), sep = "")
SNPs <- read.plink(path[1], path[2], path[3])

genotype_mat <- as(SNPs$genotypes,"numeric")
row.names(genotype_mat) <- sapply(row.names(genotype_mat),FUN=function(x){strsplit(x,"_")[[1]][4]})

strs <- sapply(unique(pheno_df_ts$Family),toString)

# Expand genotype matrix to fit phenotype vector length
gt_mat <- genotype_mat[strs,]


mean_filled_gt_mat = gt_mat

fillNAX= TRUE
if(sum(is.na(mean_filled_gt_mat))>0){
    if(fillNAX){
      mean_filled_gt_mat = apply(mean_filled_gt_mat,2,function(x) {
        if(sum(is.na(x)) > 0){
          x[is.na(x)] = mean(x,na.rm=T)
        }
        x
      })
    } else{
      mean_filled_gt_mat[,colSums(is.na(X))>1] = 0
    }
  }

name_split <- sapply(colnames(gt_mat),FUN=function(x){strsplit(x,split=c("\\["))[[1]][1]})
chr <- sapply(name_split,FUN=function(x){strsplit(x,split=":")[[1]][1]})
pos <- as.numeric(sapply(name_split,FUN=function(x){strsplit(x,split=":")[[1]][2]}))

map_g = data.frame(snp = name_split,chr = chr, pos=pos,ref=rep("A",ncol(gt_mat)),alt=rep("T",ncol(gt_mat)))
# chr <- mapvalues(map_g$chr,unique(map_g$chr),c(1,2,3,4,5,6))

In [None]:
h2_estimates <- data.frame(trait=character(),
                 treatment=character(),
                 h2=numeric(), 
                 slicetime=numeric(),
                 se=numeric(),
                 p=numeric()) 
traits <- c('movement_count','mean_movement_length','speed_in_movement_mean','dist_travelled')

In [None]:

# h2_estimates = list()
for (trait in traits){
    print(trait)
    for (treatment in c("C","R","HS")){
        print(treatment)
        for (time in c(0,32)){
            print(time)
            slice <- pheno_df_ts[pheno_df_ts$Treatment == treatment & pheno_df_ts$slicetime == time,]#mapping_df[mapping_df$Treatment == treatment & mapping_df$slicetime == time,]
            form = as.formula(paste0(trait, "~","1 + (1 | Family)"))
            full_mod <- relmatLmer(form,slice,relmat = list(Family = parental_grm))
            dummy <- slice %>% 
            # generate 10 bootstrap replicates of dataset
            bootstrap(20) %>% 
            # run mixed effects regression on each bootstrap replicate
            mutate(models = lapply(.$strap, function(x) {
              relmatLmer(as.formula(paste(trait, '1 + (1|Family)', sep = ' ~ ')), data = x,
                         relmat = list(Family = parental_grm))
            })) %>% 
            # compute ICC for each "bootstrapped" regression
            mutate(icc = unlist(lapply(.$models, function(x){icc(x)$ICC_adjusted})));
            se <- boot_se(dummy, icc)[2]
            p <- boot_p(dummy, icc)[2]
#             print(se)
            h2_estimates[nrow(h2_estimates) + 1,] <-c(trait,treatment,lme4qtl::VarProp(full_mod)$prop[1],time,se,p)
        }
    }
}

In [None]:
#Plotting each


h2_estimates$h2 <- as.numeric(h2_estimates$h2)
h2_estimates$slicetime <- factor(h2_estimates$slicetime,levels=c(32,0))
h2_subset <- h2_estimates[h2_estimates$slicetime == 0,]
plt <- ggplot(h2_subset,aes(y=h2,fill=treatment,x=trait))  + ylim(0,0.5) + 
geom_errorbar(aes(ymin=h2 - se, ymax = h2+se),position="dodge", stat="identity", colour="black") +
geom_bar(position="dodge", stat="identity", colour="black") +
scale_fill_manual(values = wes_palette("Darjeeling1",3))+
ggtitle(paste0("H2 Estimates Over Time - Trait: ", trait, " - SE From Bootstrapping")) +
coord_flip() +
theme_minimal()
print(plt)

#Plotting each


h2_estimates$h2 <- as.numeric(h2_estimates$h2)
h2_estimates$slicetime <- factor(h2_estimates$slicetime,levels=c(32,0))
h2_subset <- h2_estimates[h2_estimates$slicetime == 32,]
plt <- ggplot(h2_subset,aes(y=h2,fill=treatment,x=trait))  + ylim(0,0.5) + 
geom_errorbar(aes(ymin=h2, ymax = h2+se),position="dodge", stat="identity", colour="black") +
geom_bar(position="dodge", stat="identity", colour="black") +
scale_fill_manual(values = wes_palette("Darjeeling1",3))+
ggtitle(paste0("H2 Estimates Over Time - Trait: ", trait, " - SE From Bootstrapping")) +
coord_flip() +
theme_minimal() 
print(plt)



In [None]:
max(pheno_df_ts$speed_in_movement_mean)