diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..f839d03 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,18 @@ +Package: easyPSID +Title: Reading, Formatting, and Organizing the Panel Study of Income + Dynamics (PSID) +Version: 0.1.0 +Authors@R: person("Brian", "Aronson", email = "bdaronson@gmail.com", role = c("aut", "cre")) +Description: Provides various functions for reading and preparing the Panel Study of Income Dynamics (PSID) for longitudinal analysis, including functions that read the PSID's fixed width format files directly into R, rename all of the PSID's longitudinal variables so that recurring variables have consistent names across years, simplify assembling longitudinal datasets from cross sections of the PSID Family Files, and export the resulting PSID files into file formats common among other statistical programming languages ('SAS', 'STATA', and 'SPSS'). +Depends: R (>= 3.0.1) +Imports: stringr (>= 1.0.0), LaF (>= 0.6.0), foreign (>= 0.8-67) +License: MIT + file LICENSE +Encoding: UTF-8 +LazyData: true +RoxygenNote: 6.1.0 +NeedsCompilation: no +Packaged: 2019-06-27 17:14:05 UTC; bda13 +Author: Brian Aronson [aut, cre] +Maintainer: Brian Aronson +Repository: CRAN +Date/Publication: 2019-07-01 10:40:03 UTC diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..01330a3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2019 +COPYRIGHT HOLDER: Brian Aronson diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..6f71e52 --- /dev/null +++ b/MD5 @@ -0,0 +1,33 @@ +48e637c1ec93b52fa6075a99e45a8455 *DESCRIPTION +02d1618577bd2d0466aa03f413aca193 *LICENSE +9506bff0123601879225c2af3647593c *NAMESPACE +504c78d1143d2eabbd7353bebcc55ee2 *R/10find_desc.R +cfc68a5cdddb81746aafa197f464b6ba *R/1unzip_all_files.R +062d276c9308d1224c420382ee0aaf2a *R/2convert_to_rds.R +1860d15929b916dee6838fc6f6248716 *R/3rename_fam_vars.R +f7c51e1c966398975dc7abbab7a58161 *R/4rename_ind_vars.R +cff7ac15d77e2fd53628e881d932b3d3 *R/5create_custom_panel.R +5eddd106681ec152066276a80931d8fa *R/6create_extract.R +d1b5d0e6a1acb4cc5341eef4f0745346 *R/7convert_from_rdata.R +6c1013c14d817547ce674d85fbd2f7ed *R/8find_name.R +220925bea5f68f355e628352b4e141db *R/9find_years.R +00ee728a84bf1ccda82983f101a23f94 *R/PSID_Package.Documentation_function.R +908cf2deb6da90fdac560e91e9265953 *R/sysdata.rda +90bc0e395f4bcf6dc42cd1070c944d62 *README.md +a3ec50e70b44fb699009d06d6d1c402e *inst/extdata/rds_dir/FAM1968.rds +a24bf4365b653a291af90f9d9fb457d7 *inst/extdata/rds_dir/FAM1969.rds +b775868c5cd850722782a8ce5297228d *inst/extdata/rds_dir/IND2015.rds +d9542fbbf2513e8559a8be7317c492be *inst/extdata/unzip_dir/FAM1984.do +6b31772af4073883c76d77aae74499cb *inst/extdata/unzip_dir/FAM1984.txt +903436bd03d13eee3bdaf84bcaa2456a *inst/extdata/zip_dir/fam1968.zip +afe3f0e80913ceebd730903077fb25b4 *man/convert_from_rds.Rd +dc0873bd81886c48b5df83555ea6e522 *man/convert_to_rds.Rd +8b87c53e7689dd30607c94331317fe78 *man/create_custom_panel.Rd +005e57ff5f826054a68df190e089465b *man/create_extract.Rd +8cca39d80dea448b4043ea520ede9e66 *man/easyPSID-package.Rd +a5fe95482fbcd9ed1354d3a884a7aef3 *man/find_description.Rd +a55e08b7c1a1c1a6e2f714c55e567c71 *man/find_name.Rd +81bae55cab97ac5a020621077ad19eba *man/find_years.Rd +3b7b2ddefacf1506ffd638a87f43f5b7 *man/rename_fam_vars.Rd +e4fb51302e862a2a7c6135eecfd9a30b *man/rename_ind_vars.Rd +342a6ed56d8ddd08f725e28d55e72e8d *man/unzip_all_files.Rd diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..31fcc1e --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,19 @@ +# Generated by roxygen2: do not edit by hand + +export(convert_from_rds) +export(convert_to_rds) +export(create_custom_panel) +export(create_extract) +export(find_description) +export(find_name) +export(find_years) +export(rename_fam_vars) +export(rename_ind_vars) +export(unzip_all_files) +importFrom(LaF,laf_open_fwf) +importFrom(foreign,write.dta) +importFrom(foreign,write.foreign) +importFrom(stringr,str_extract) +importFrom(stringr,str_extract_all) +importFrom(utils,object.size) +importFrom(utils,unzip) diff --git a/R/10find_desc.R b/R/10find_desc.R new file mode 100644 index 0000000..c669c4a --- /dev/null +++ b/R/10find_desc.R @@ -0,0 +1,23 @@ +#' Find description of PSID variable +#' @description Finds the descriptions of selected PSID variables. +#' @param variables Variable names to look up (as individual string or vector of strings) +#' @keywords PSID +#' @export +#' @examples find_description(variables=c("V2","V30")) + +find_description<-function(variables){ + #1) Make sure variable is in character format + variables<-as.character(variables) + #2) Find variables in reference dataset + varrow<-data$psid_desc[match(variables,data$psid_desc[,1]),] + #3) If variables is not in reference dataset, just show the variables name + if(all(is.na(varrow))){ + return(variables) + } + #4) Otherwise, print variables name and description + return(varrow) +} + + + + \ No newline at end of file diff --git a/R/1unzip_all_files.R b/R/1unzip_all_files.R new file mode 100644 index 0000000..48f3365 --- /dev/null +++ b/R/1unzip_all_files.R @@ -0,0 +1,27 @@ +#' Unzip all PSID files +#' @description Unzips all .zip_files files in the specified directory. +#' @param in_direc Directory of .zip files to be unzipped +#' @param out_direc Directory for unzipped PSID files to be placed +#' @keywords PSID +#' @export +#' @importFrom utils object.size unzip +#' @examples +#' unzip_all_files( +#' in_direc=system.file("extdata","zip_dir", package = "easyPSID"), +#' out_direc=tempdir() +#' } + +unzip_all_files<-function(in_direc,out_direc){ + #1) Deprecated - Set directories to current directory if none supplied + # in_direc<-ifelse(is.null(in_direc),getwd(),in_direc) + # out_direc<-ifelse(is.null(out_direc),in_direc,out_direc) + #2) Find all zip_files files in folder + zip_files<-list.files(path=in_direc,pattern= c("*\\.zip$" ),full.names=T) + #3) Unzip_files files to specified directories + dir.create(out_direc,showWarnings = F) + for(i in 1:length(zip_files)){ + unzip(zip_files[i],exdir=out_direc) + } + #4) Indicate that files are unzip_filesped + message("Files Unzipped") +} diff --git a/R/2convert_to_rds.R b/R/2convert_to_rds.R new file mode 100644 index 0000000..298193b --- /dev/null +++ b/R/2convert_to_rds.R @@ -0,0 +1,74 @@ +#' Convert all PSID files from .txt format to .rds format +#' @description Converts all PSID fixed width format .txt files in a selected directory into .rds format. Importantly, this function assumes that all files contained in the original PSID .zip files (especially those ending in .do) are present in the same directory as the PSID .txt files, and that all files within that directory have the same names as when first unzipped. +#' @param in_direc Directory containing unzipped PSID .txt and .do files +#' @param out_direc Directory to place PSID .rds files into +#' @keywords PSID +#' @export +#' @importFrom stringr str_extract str_extract_all +#' @importFrom LaF laf_open_fwf +#' @examples +#' convert_to_rds( +#' in_direc=system.file("extdata","unzip_dir", package = "easyPSID"), +#' out_direc=tempdir() +#' ) + +convert_to_rds<-function(in_direc,out_direc){ + #1 - Deprecated - Specify directory + # in_direc<-ifelse(is.null(in_direc),getwd(),in_direc) + # out_direc<-ifelse(is.null(out_direc),getwd(),out_direc) + #2 - Read do and txt files + do_files<-c(list.files(path=in_direc,pattern= c("*\\.do$" ),full.names=T)) + txt_files<-c(list.files(path=in_direc,pattern= c("*\\.txt$" ),full.names=T)) + txt_files2<-c(list.files(path=in_direc,pattern= c("*\\.txt$" ))) + #3 - Convert Stata .do files to R syntax + for(i in 1:length(do_files)){ + #a) Read stata code + stata_code <- readLines(do_files[i]) + #b) Keep relevant lines + code_start <- min(grep('infix', stata_code)) + code_end <- min(grep('using', stata_code))-1 + stata_keep <- stata_code[code_start:code_end] + stata_keep<-stata_keep[-1] #(just contains "infix") + #c) Extract all variable names + variable_names <- unlist(str_extract_all(stata_keep, '[A-Z][a-zA-Z0-9_]+')) + #d) Extract all variable labels + #i) Find where relevant code starts + labs_start <- min(grep('label', stata_code)) + labs_end <- max(grep('label', stata_code)) + stata_labs <- stata_code[labs_start:labs_end] + #ii) Grab strings between quotes + var_descriptions<-vector(length=length(stata_labs)) + for(j in 1:length(var_descriptions)){ + temp<-str_extract_all(stata_labs[j],"\"(.+)\"")[[1]] + #Remove quotes + var_descriptions[j]<-gsub("\"","",temp) + #Remove extra spaces + var_descriptions[j]<-gsub(" ","",var_descriptions[j]) + } + #iii) Extract all column indices + temp_columns<-unlist(str_extract_all(stata_keep,'[0-9]+ [/-] [0-9]+')) + #iv) Change column ranges to column counts + columns<-vector() + for(j in 1:length(temp_columns)){ + a<-temp_columns[j] + b<-as.numeric(unlist(str_extract_all(a,'[0-9]+'))) + columns[j]<-b[2]-b[1]+1 + } + #4) Use converted stata code to read in txt files and use stata variable labels + temp_data <- laf_open_fwf(txt_files[i], column_widths = columns,column_types = rep("double",length(columns))) + temp_data <- temp_data[,] + names(temp_data) <- variable_names + attr(temp_data,"var.labels")<-var_descriptions + #5) Save files as "[Type] [Year].rds" + #a) Determine file name + temp_year<-str_extract(txt_files2[i],'[0-9]+') + temp_heading<-substr(txt_files2[i],1,3) + temp_name<-paste(temp_heading,temp_year,".rds",sep="") + #b) Save file + dir.create(out_direc,showWarnings = F) + saveRDS(object=temp_data,file=paste(out_direc,"/",temp_name,sep = "")) + #c) Notify user of progress + message(paste(temp_name,"Converted")) + } +} + diff --git a/R/3rename_fam_vars.R b/R/3rename_fam_vars.R new file mode 100644 index 0000000..b82d965 --- /dev/null +++ b/R/3rename_fam_vars.R @@ -0,0 +1,47 @@ +#' Rename longitudinal Family File variables +#' @description Renames all longitudinal variables in every PSID Family File of a given directory, such that variables are labeled with the variable name used when the variable was first made available in the PSID. For example, the "Release Number" variable was first recorded in the PSID dataset in 1968 as variable "V1" but its name in the 1969 family file is "V441". This program changes the "Release Number" variable name to "V1" in 1968 and all subsequent waves. +#' @param in_direc Directory of PSID .rds files to rename +#' @param out_direc Directory for renamed PSID .rds files to be saves to. Warning: If no directory specified, this function will overwrite the Family Files in the current directory. +#' @keywords PSID +#' @export +#' @examples +#' rename_fam_vars( +#' in_direc=system.file("extdata","rds_dir", package = "easyPSID"), +#' out_direc=tempdir() +#' ) + +rename_fam_vars<-function(in_direc,out_direc){ + #1) Deprecate - Set Directory + # in_direc<-ifelse(is.null(in_direc),getwd(),in_direc) + # out_direc<-ifelse(is.null(out_direc),getwd(),out_direc) + #2) Create list of files to alter + file_names<-list.files(path=in_direc,pattern= c("^FAM.*\\.rds$" ),full.names=T) #only files beginning with title "FAM" and ending with ".rds" + file_names2<-list.files(path=in_direc,pattern= c("^FAM.*\\.rds$" )) + #3) For each file: + for(j in 1:length(file_names)){ + #a) Load first file and rename to "temp_data" + temp_data<-readRDS(file_names[j]) + #b) Create vector of variable names + temp_names<-names(temp_data) + #c) Determine which column to search + temp_column<-match(gsub('\\D+','', file_names2[j]),substring(names(data$psid_all),2)) + #d) For each year: + for (i in 1:length(temp_names)){ + #i) Find and list equivalent variables from other years + temp<-as.matrix(data$psid_all[data$psid_all[,temp_column]==temp_names[i],]) + #ii) Remove years where that variable didn't exist + temp<-temp[temp!=""] + #iii) If variable not in list, skip + if (is.na(temp[1])){ + next()} + temp_names[i]<-temp[1] # otherwise replace variable name to name of variable in earliest occurring dataset + } + #4) Change names of dataframes + names(temp_data)<-temp_names + #5) Save output into same directory and with same name as previously + dir.create(out_direc,showWarnings = F) + saveRDS(temp_data,file=paste(out_direc,"/",file_names2[j],sep = "")) + message(paste(file_names2[j],"renamed")) + rm(list=c("temp_data")) + } +} diff --git a/R/4rename_ind_vars.R b/R/4rename_ind_vars.R new file mode 100644 index 0000000..0d1b9e3 --- /dev/null +++ b/R/4rename_ind_vars.R @@ -0,0 +1,123 @@ +#' Renames longitudinal Cross-year Individual variables and saves in long format +#' @description Renames all repeated variables in the Cross-year Individual file so that matching variables across waves have the same name, and transforms the resulting dataset into long format. The longitudinal file does not include rows for respondents who were missing in a given wave, and cross-sectional variables are marked as NA during waves when they were not asked. In addition, the resulting file adds two variables for ease of use: "Year" and "fam_id_68". +#' +#' This function may require up to 8gb of RAM, and will likely throw "cannot allocate memory" errors to users with less RAM on their computer. Users with memory issues should implement the "only_long_vars" or "cust_vars" options. +#' +#' @param in_direc Directory of PSID Cross-year Individual file .rds file +#' @param out_direc Directory for renamed and transformed PSID Cross-year Individual file to be saved to +#' @param only_long_vars Keep only longitudinal variables in dataset +#' @param cust_vars Custom variables to keep in dataset (as character vector). Output will always include "ER30001", "fam_id_68", and "Year" +#' @keywords PSID +#' @export +#' @examples +#' rename_ind_vars( +#' only_long_vars=TRUE, +#' in_direc=system.file("extdata","rds_dir", package = "easyPSID"), +#' out_direc=tempdir() +#' ) + +rename_ind_vars<-function(in_direc,out_direc,only_long_vars=F,cust_vars=NULL){ + #1) Deprecated - Set directory + # in_direc<-ifelse(is.null(in_direc),getwd(),in_direc) + # out_direc<-ifelse(is.null(out_direc),getwd(),out_direc) + #2) Load rds file + temp_file<-list.files(path=in_direc,pattern= c("IND.*\\.rds$" ),full.names=T)[1] + temp_file2<-list.files(path=in_direc,pattern= c("IND.*\\.rds$" ))[1] + df<-readRDS(temp_file) + #3) Identify dimensions of eventual dataframe + ncols<-dim(data$psid_ind)[1] + nyears<-dim(data$psid_ind)[2] + ninds<-nrow(df) + nrows<-nyears*ninds + #4) Create transposed version of data$psid_ind, sorted by time first in dataset + early_name<-apply(data$psid_ind,1,function(x) x[x!=""][1]) + index<-data$psid_ind[order(early_name),] + index<-t(index) + #5) For convenience, create a column in data that is all NAs + df$colna<-NA + #6) Identify whether variable is cross-sectional + cross<-apply(index,2, function(x) length(x[x!=""])) + cross<-ifelse(cross==1,1,0) + #7) Identify whether observation was not present during a given wave + #ER30003 indicates whether non-response in given wave + ER30003<-which(index=="ER30003",arr.ind = T)[2] + #i) Find corresponding variable names + varnames<-as.vector(index[,ER30003]) + #ii) Replace missing variable names with colna + varnames[varnames==""]<-"colna" + #iii) Pull out those column names and convert to vector + resp<-vector() + for(j in 1:length(varnames)){ + resp<-c(resp,df[,varnames[j]]) + } + #iv) Remove missing individuals + resp[resp>0]<-1 + #8) Subset to custom variables or longitudinal variables + if(!is.null(cust_vars)){ + if(!"ER30001" %in% cust_vars){ + cust_vars<-c(cust_vars,"ER30001") + } + indcols<-sapply(cust_vars,function(x) which(index == x,arr.ind = T)[2]) + index<-index[,indcols] + }else if(only_long_vars==T){ + index<-index[,cross==0] + } + #9) For each unique variable in individual file, convert to long format + #i) Find corresponding variable names across waves + ind_longdf<-lapply(as.data.frame(index),as.vector) + #ii) Replace missing variable names with colna + ind_longdf<-lapply(ind_longdf,function(x) ifelse(x=="","colna",x)) + #iii) Pull out those columns + ind_longdf<-lapply(ind_longdf,function(x) df[,x]) + #iv) Convert columns to single vector + fastunlist<-function(x){ + x<-c(as.matrix(x)) + x<-x[resp==1] #(not usually part of function) + } + percent <- function(x, digits = 0, format = "f", ...) { + paste0(formatC(100 * x, format = format, digits = digits, ...), "%") + } + for(i in 1:length(ind_longdf)){ + ind_longdf[[i]]<-fastunlist(ind_longdf[[i]]) + if(i/10==floor(i/10)){ + gc() + message(percent(i/length(ind_longdf))) + } + } + gc() + #v) Convert list to dataframe + ind_longdf<-do.call(cbind,ind_longdf) + gc() + ind_longdf<-as.data.frame(ind_longdf) + gc() + #10) Add metadata to dataframe + #a) Create proper names for columns + if(!is.null(cust_vars)){ + early_name2<-cust_vars + }else if(only_long_vars==T){ + early_name2<-apply(index,2,function(x) x[x!=""][1]) + }else{ + early_name2<-early_name[order(early_name)] + } + names(ind_longdf)<-early_name2 + #b) Bring back variable descriptions + temp<-find_description(names(ind_longdf)) + attr(ind_longdf,"var.labels")<-temp$Description + #c) Add a year variable + uyears<-gsub('\\D+','', row.names(index)) + Year<-rep(uyears,each=ninds) + Year<-Year[resp==1] + ind_longdf$Year<-Year + #d) Add a family id variable + fam_id_68<-ind_longdf$ER30001 + #11) Save file + dir.create(out_direc,showWarnings = F) + temp_file2<-gsub(".rds","",temp_file2) + saveRDS(ind_longdf,file=paste(out_direc,"/",temp_file2," - Long Format.rds",sep="")) + message(percent(1)) +} + + + + + diff --git a/R/5create_custom_panel.R b/R/5create_custom_panel.R new file mode 100644 index 0000000..e982b35 --- /dev/null +++ b/R/5create_custom_panel.R @@ -0,0 +1,90 @@ +#' Create custom longitudinal (panel) dataset with PSID Family Files +#' @description Uses the longitudinal PSID Family Files to create a custom longitudinal dataset in long format based on all PSID .rds Family files in a selected directory. +#' +#' This function can work with data that has been renamed via the rename_fam_vars function or data just converted to .rds format via the convert_to_rds function. It will creates NAs for years when a given variable was not available, and creates a new variable ("Year") to specify the panel of data included in the custom dataset. If a provided variable exists in other waves of the family files under a different name, all waves of that variable will be included in the resulting dataset. +#' +#' To create a longitudinal family file of the PSID with all variables in the PSID Family Files, it is recommended that one uses the create_extract function instead. However, such a file can be very large when using many waves of the PSID. Users with more than five waves of the PSID Family Files are highly recommended to avoid creating a longitudinal dataset with all unique Family File variables. +#' +#' @param var_names Variable names to include in custom longitudinal dataset (as vector of strings) +#' @param in_direc Directory of PSID .rds to use for custom longitudinal dataset +#' @param out_direc Directory to place resulting longitudinal dataset into +#' @keywords PSID +#' @export +#' @examples +#' create_custom_panel( +#' var_names = c("V534", "V442", "V398"), +#' in_direc=system.file("extdata","rds_dir", package = "easyPSID"), +#' out_direc=tempdir() +#' ) + +create_custom_panel<-function(var_names, in_direc, out_direc){ + #1) Assign directories + # in_direc<-ifelse(is.null(in_direc),getwd(),in_direc) + # out_direc<-ifelse(is.null(out_direc),getwd(),out_direc) + filenames<-list.files(path=in_direc,pattern= c("^FAM.*\\.rds$" ),full.names = T) + filenames2<-list.files(path=in_direc,pattern= c("^FAM.*\\.rds$" )) + #2) Error checks + #a) Subset variables dataframe to lines that contain variable names. + variables_temp<-as.matrix(data$psid_all) + #b) Check if each variable is in the dataset. Break if any variable missing. + missings<-var_names[is.na(match(var_names,variables_temp))] + if(length(missings)>0){ + message<-paste("The following variable(s) were not found in the PSID family files:",paste(missings,collapse = ", "),sep="\n\n") + stop(message) + } + #c) Exclude redundant variables from following functions. Post warning. + redundant<-duplicated(var_names) + if(any(redundant)){ + redundant_name<-var_names[redundant] + message<-paste("The following variable(s) were requested more than once. Only one copy of each of these variable(s) will be included in the resulting dataset:",paste(redundant_name,collapse = ", "), sep="\n\n") + var_names<-var_names[!redundant] + warning(message) + } + #3) Temporarily rename each variable to match names of their earliest occurrence. + temp_names<-var_names + #a) Identify row and column of each variable + index<-lapply(temp_names,function(x) which(data$psid_all==x,arr.ind=T)) + #b) Extract rows from index + row<-sapply(index,function(x) x[1]) + #c) Keep index of current variables + new_ind<-data$psid_all[row,] + #d) Rename columms + names(new_ind)<-gsub('\\D+','', names(new_ind)) + #e) Identify earliest name of each variable + early_name<-as.vector(apply(new_ind,1,function(x) x[x!=""][1])) + + #4) Create a list of longitudinal datasets with variables selected above + temp_datal<-list() + for(j in 1:length(filenames)){ + #a) Load first file and rename to "temp_data" + temp_data<-readRDS(filenames[j]) + #b) Create vector of variable names + temp_names<-names(temp_data) + #c) Only keep columns that match possible variable names + #i) Identify dataframe year + temp_year<-gsub('\\D+','', filenames2[j]) + #ii) Identify matchin column in dataset + column<-match(temp_year,names(new_ind)) + #iii) Find variable names of requested variables as of a given year + keep_vars<-match(new_ind[,column],temp_names) + # In case variables already renamed, try above with earliest variable names + keep_varsalt<-match(early_name,temp_names) + if(sum(is.na(keep_varsalt))1){ + var_freq<-apply(tdf,1,function(x) sum(x!="")) + }else{ + var_freq<-sum(tdf[,1]!="") + } + var_rank<-rank(var_freq,ties.method="first") + var_rank<-max(var_rank)-var_rank + var_common<-tdf[var_rank<=num_vars,] + var_names<-as.vector(apply(var_common,1,function(x) x[x!=""][1])) + + #3) Redundant steps to make remainder of formula work. + temp_names<-var_names + #a) Identify row and column of each variable + index<-lapply(temp_names,function(x) which(data$psid_all==x,arr.ind=T)) + #b) Extract rows from index + row<-sapply(index,function(x) x[1]) + #c) Keep index of current variables + new_ind<-data$psid_all[row,] + #d) Rename columms + names(new_ind)<-gsub('\\D+','', names(new_ind)) + #e) Identify earliest name of each variable + early_name<-as.vector(apply(new_ind,1,function(x) x[x!=""][1])) + + #4) Create a list of longitudinal datasets with variables selected above + temp_datal<-list() + for(j in 1:length(filenames)){ + #a) Load first file and rename to "temp_data" + temp_data<-readRDS(filenames[j]) + #b) Create vector of variable names + temp_names<-names(temp_data) + #c) Only keep columns that match possible variable names + #i) Identify dataframe year + temp_year<-gsub('\\D+','', filenames2[j]) + #ii) Identify matchin column in dataset + column<-match(temp_year,names(new_ind)) + #iii) Find variable names of requested variables as of a given year + keep_vars<-match(new_ind[,column],temp_names) + # In case variables already renamed, try above with earliest variable names + keep_varsalt<-match(early_name,temp_names) + if(sum(is.na(keep_varsalt))