In [8]:
### Read in data file

In [9]:
datafile <- "data/export_all.csv"
df <- read.csv(datafile,header=FALSE)

fields <- c("ID","FirstName","MiddleName","LastName","Institution")
columnNames <- c(
    paste("Trainee",fields,sep="."),
    paste("Mentor",fields,sep="."),
    "relationshipCode","relationshipType", "year")

colnames(df) <- columnNames
head(df)

Unnamed: 0,Trainee.ID,Trainee.FirstName,Trainee.MiddleName,Trainee.LastName,Trainee.Institution,Mentor.ID,Mentor.FirstName,Mentor.MiddleName,Mentor.LastName,Mentor.Institution,relationshipCode,relationshipType,year,NA
1,pid1,fn1,mn1,ln1,loc1,pid2,fn2,mn2,ln2,loc2,relation,relstring,startyear,stopyear
2,2,Benjamin,Y,Hayden,University of Rochester,3,Jack,L,Gallant,"University of California, Berkeley",1,student,2000,2005
3,4,Benjamin,,Willmore,University of Oxford,3,Jack,L,Gallant,"University of California, Berkeley",2,postdoc,2003,2006
4,6,Ryan,,Prenger,Lawrence Livermore Laboratory,3,Jack,L,Gallant,"University of California, Berkeley",1,student,2002,2008
5,18761,Alan,P,Koretsky,National Institute of Neurological Disorders and Stroke,9,Melvin,P.,Klein,"University of California, Berkeley",1,student,0,1984
6,10,C,Edward,Connor,Johns Hopkins University,16,David,C,Van Essen,"Washington University, Saint Louis",2,postdoc,0,0


In [10]:
dim(df)

In [11]:
### define root persion ID
root.ID <- 52763 ### Peter Schultz

#root.ID <- 62876 ### test on Virginia Cornish
#root.ID <- 63525 ### test on Hening Lin
#root.ID <- 54496  ### test on Chris Walsh 
#root.ID <- 4338 ### Robert Woodward
#root.ID <- 9005 ### George Whitesides (161 direct children)
root.ID


In [12]:
getPerson <- function(ID, df, mentor.ID = NULL, include.children=TRUE, level=1 ) {
#    print(paste("getting person", ID, level))
    if( is.null(mentor.ID) ) {
        p <- df[df$Trainee.ID==ID,c("Trainee.ID","Trainee.FirstName","Trainee.MiddleName","Trainee.LastName","year")]
    } else {
        p <- df[df$Trainee.ID==ID&df$Mentor.ID==mentor.ID,c("Trainee.ID","Trainee.FirstName","Trainee.MiddleName","Trainee.LastName","year")]
    }
    person <- list()
    person$ID <- as.character(p$Trainee.ID[1])
    person$FirstName <- as.character(p$Trainee.FirstName[1])
    person$MiddleName <- as.character(p$Trainee.MiddleName[1])
    person$LastName <- as.character(p$Trainee.LastName[1])
    person$level <- level
    person$year <- as.character(p$year)
    person$name <- paste(c(
        substr(person$FirstName,0,1),
        substr(person$MiddleName,0,1),
        substr(person$LastName,0,1) ), collapse="")
    person$url = paste('http://academictree.org/chemistry/peopleinfo.php?pid=',person$ID,sep="")
    if( include.children ) {
        person$children <- getChildren( person$ID, df, level = level + 1 )
    }

    return(person) 
}

getChildren <- function(root.ID, df, level = 0) {
    root <- getPerson(root.ID, df, include.children=FALSE)
#    print(paste("A: ", root$ID, level))
#    print(root)
#    print(dim(df))
    children.df <- df[df$Mentor.ID == root$ID,]
    children.df <- children.df[order(children.df$year,decreasing = TRUE),]
    children.ids <- unique(children.df$Trainee.ID)
#    print(paste("B: ", children.ids))
#    print(children.df)

    children <- list()
    if( dim(children.df)[1] == 0 ) { return(children); }
#    for( child.id in children.ids ) {
    for( i in 1:dim(children.df)[1] ) {
       child.id <- children.df[i,"Trainee.ID"]
       child.type <- children.df[i,"relationshipType"]
#       print(paste("B: ", child.id, level))
       child <- getPerson(child.id, df, level=level, mentor.ID=root.ID)
       child$type <- child.type
#        print(child$ID)
#       child$children <- getChildren( child$ID, df )
       children[[length(children)+1]] <- child
    }
    return(children)
}

In [13]:
date()
root <- getPerson(root.ID, df)
date()


In [14]:
library(jsonlite)
json <- jsonlite::toJSON(root,pretty=TRUE,auto_unbox=TRUE)
write(json, paste(c("output/output_PGS.json"),collapse=""))
