diff --git a/DESCRIPTION b/DESCRIPTION index 90dadbc..220340e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: knncat -Version: 1.1.4 -Date: 2005-10-06 +Version: 1.1.7 +Date: 2005-10-12 Title: Nearest-neighbor classification with categorical variables Author: Sam Buttrey Maintainer: Sam Buttrey @@ -9,4 +9,4 @@ Description: This program scales categorical variables in such a way as to variables and prior probabilities, and does intelligent variable selection and estimation of error rates and the right number of NN's. License: GPL version 2 -Packaged: Thu Oct 6 13:01:55 2005; buttrey +Packaged: Wed Oct 12 08:59:07 2005; buttrey diff --git a/R/knncat b/R/knncat new file mode 100644 index 0000000..c783999 --- /dev/null +++ b/R/knncat @@ -0,0 +1,465 @@ +.packageName <- "knncat" +"knncat" <- +function (train, test, k = c(1, 3, 5, 7, 9), xvals = 10, xval.ceil = -1, + knots = 10, + prior.ind = 4, prior, + permute = 10, permute.tail = 1, improvement = .01, ridge = .003, + once.out.always.out = FALSE, classcol = 1, verbose = 0) +{ +# +# knncat: Create a knncat object from a training set, and optionally +# compute the predictions for a test set. +# +# Arguments: +# train: data frame of training data, classification in classcol column +# test: data frame of test data (can be omitted). This should have +# the correct classification in the classcol column, too. +# k: vector of choices for # nn's. Default c(1, 3, 5, 7, 9) +# xvals: number of cross-validations to use to find the best model +# size and number of nn's. Default 10. +# xval.ceil: Maximum number of variables to add. -1 = Use the smallest +# number from any xval; 0 = use the smallest number from the +# first xval; >= 0, use that. +# knots: vector of number of knots for numeric variables. Reused if +# necessary. Default: 10 for each. +# prior.ind: Integer telling how to compute priors. 1 = estimated from +# training set; 2 = all equal; 3 = supplied in "prior"; 4 = +# ignored. Default: 1. +# prior: Numeric vector, one entry per unique element in the training +# set's first column, giving prior probabilities. Ignored unless +# prior.ind = 3; then they're normalized to sum to 1 and each +# entry must be strictly > 0. +# permute: Number of permutations for variable selection. Default: 10. +# permute.tail: A variable fails the permutation test if permute_tail or more +# permutations do better than the original. Default: 1. +# improvement: Minimum improvement for variable selection. Ignored unless +# present and permute missing, or permute = 0; then default = .01. +# ridge: Amount by which to "ridge" the W matrix for numerical +# stability. Default: .003. +# once.out.always.out: if TRUE, a variable that fails a permutation test +# or doesn't improve by enough is excluded from further +# consideration during that cross-validation run. Default FALSE. +# classcol: Column with classification in it. Default: 1. +# verbose: Controls level of diagnostic output. Higher numbers produce +# more output, sometimes 'way too much. 0 produces no output; +# 1 gives progress report for xvals. Default: 1. +# +# +# Save numbers of rows and columns. If test is missing, pass zeros for +# that data. If not, test.classes will hold the predictions on return. +# +train.data.name <- deparse (substitute(train)) +nrow.train <- nrow(train) +ncol.train <- ncol(train) +if (classcol != 1){ + classcol.name <- names(train)[classcol] + train <- data.frame (train[,classcol], train[,-classcol]) + names(train)[1] <- classcol.name +} +factor.vars <- sapply (train[,-1,drop=FALSE], is.factor) +# +# "Missing.values" holds the values we'll use in case there are any +# missing values in the data. We compute these in C, but we may as well +# do it here. Whether there are missings in the training set or not, we +# need to save them in case the test set has missings. +# +missing.values <- train[1,-1] +missing.values[!factor.vars] <- sapply (train[,-1,drop=FALSE][,!factor.vars], mean, + na.rm=TRUE) +most.common <- function (x) { + tbl <- table (x) + names(tbl)[tbl == max(tbl)][1] +} +missing.values[factor.vars] <- sapply (train[,-1,drop=FALSE][,factor.vars], most.common) +vars.with.na <- sapply (train, function(x) any (is.na(x))) +if (vars.with.na[1] == TRUE) + stop ("No missing values allowed in response variable") +if (any (vars.with.na)) + for (i in 1:(ncol.train - 1)) + if (vars.with.na[i] == TRUE) + train[is.na (train[,i+1]),i + 1] <- missing.values[i] + +train.levs <- lapply (train[,-1], levels) +if (missing (test)) +{ + nrow.test <- ncol.test <- test.classes <- 0 +} +else +{ + nrow.test <- nrow(test) + ncol.test <- ncol (test) + test.classes <- numeric (nrow(test)) +# +# Ensure that all the columns of "train" are in "test," too. If they +# are, re-order the columns of test so they match up. +# + if (any (!is.element (names(train), names(test)))) + stop ("Some train columns aren't present in test.") + test <- test[,names(train)] +# +# Ensure that the sets of levels for the factor variables are the same. +# + test.levs <- sapply (test[,-1,drop=FALSE], levels) + if (!identical (TRUE, all.equal (train.levs, test.levs))) + stop ("Sets of levels in train and test don't match.") +# +# Handle missing values in the test data. No na's allowed for now. +# +vars.with.na <- sapply (test, function(x) any (is.na(x))) +if (vars.with.na[1] == TRUE) + stop ("No missing values allowed in test set response variable") +if (any (vars.with.na)) + for (i in 1:(ncol.test - 1)) + if (vars.with.na[i] == TRUE) + test[is.na (test[,i+1]),i + 1] <- missing.values[i] +} +# +# Find the number in each class. Table() sorts by levels, so we don't +# need to worry about ordering the levels. +# +number.in.class <- table (train[,1]) +if (any (number.in.class <= 1)) + stop ("Some classes have only one member. Check \"classcol\"") +nclass <- length(number.in.class) +misclass.mat <- numeric (nclass * nclass) +# +# Identify factor variables. Set "increase", the vector that indicates +# which variables have which type, accordingly. Also set up cats.in.var +# (whose ith element tells us how many levels are in predictor variable +# i) and cdata (which says whether i is in or out). By the way, if the +# ith element of cats.in.var is -j, variable i is numeric with j knots. +# +increase <- cats.in.var <- cdata <- numeric (ncol(train) - 1) +increase[!factor.vars] <- -5 +increase[factor.vars] <- -1 +# +# Set up the knots vectors if there are any continuous variables. Even +# though it's a little wasteful, we'll send in a matrix of knots values. +# Each column will contain all the knots for a variable. If the numbers +# of knots differ among columns, some spots will be left as 0's. +# +if (sum (!factor.vars) == 0) +{ + knots.vec <- 0 + knot.values <- 0 +} +else +{ + knots.vec <- numeric (sum (!factor.vars)) + knots.vec[] <- knots # re-use as necessary + knot.values <- matrix (0, max(knots.vec), sum (!factor.vars)) + num.vars <- (1:(ncol(train) - 1))[!factor.vars] + dimnames(knot.values) <- list (NULL, names(train)[-1][!factor.vars]) +# +# Put knot values in columns, so we don't have to transpose this matrix. +# This is a good time to update "train.levs" for numeric.variables. +# + num.ctr <- 1 + for (i in 1:length(factor.vars)) + { + if (!factor.vars[i]) + { + qq <- quantile (train[,num.vars[num.ctr]+1], + seq (0, 1, length = knots.vec[num.ctr] + 1)) + qq <- qq[-length(qq)] # chop off the 100% point. + knot.values[1:length(qq),num.ctr] <- qq + train.levs[[i]] <- paste ("knot.", 1:knots.vec[num.ctr], sep="") + num.ctr <- num.ctr + 1 + } + } +} +# +# Count the number of levels in each factor. phi is the vector of coefficients; +# there's one for each level, plus the right number of knots for each numeric +# variable. +# +factor.count <- sapply (train[,-1,drop=FALSE][,factor.vars], function(x) length(unique(x))) +if (length(factor.count) == 0) + factor.count <- 0 +cats.in.var[factor.vars] <- factor.count +cats.in.var[!factor.vars] <- knots.vec +phi <- numeric (sum (factor.count) + sum (knots.vec)) +# +# Make sure that the k's are valid integers. +# +k <- round (k) +if (any (k <= 0)) +{ + warning ("Some invalid k's excluded") + k <- k[k > 0] +} +k.len <- length(k) +best.k <- 0 +if (missing (permute) & !missing(improvement)) + permute <- 0 +classif <- 1 # Required by "ords" +status <- 0 +# +# Set up prior, if necessary +# +if (prior.ind == 3) +{ + if (missing (prior) || any (prior <= 0)) + stop ("Missing or invalid prior") + if (length(prior) != nclass) + stop ("Prior has wrong length") + prior <- prior / sum (prior) +} +else + prior <- numeric (nclass) +# +# Convert factors to zero-based numerics. Save the levels for later. +# +for (i in 1:ncol(train)) + if (is.factor (train[,i])) + train[,i] <- as.numeric (train[,i]) - 1 +if (nrow.test > 0) + for (i in 1:ncol(test)) + if (is.factor (test[,i])) + test[,i] <- as.numeric (test[,i]) - 1 +# +# Right now R has to transpose the data. We should do this in C. +# +train.names <- names(train) +train <- c(t(matrix (unlist (train), nrow(train), ncol(train)))) +if (nrow.test == 0) + test <- 0 +else +{ + test <- c(t(matrix (unlist (test), nrow(test), ncol(test)))) + names(test) <- NULL +} +thang <- .C("ord", + as.double (train), as.integer (nrow.train), as.integer (ncol.train), + as.double (test), as.integer (nrow.test), as.integer (ncol.test), + test.classes = as.integer (test.classes), + cdata = as.double (cdata), phi = as.double (phi), + as.integer (nclass), as.integer (xvals), + as.integer (increase), as.integer (permute), as.integer (permute.tail), + as.double (ridge), as.double (c(knot.values)), + as.integer (k.len), as.integer (k), + best.k = as.integer (best.k), + as.integer (classif), as.double (improvement), as.integer (cats.in.var), + as.integer (number.in.class), + misclass.mat = as.double (misclass.mat), + as.integer (xval.ceil), + as.integer (once.out.always.out), + prior.ind = as.integer (prior.ind), prior = as.double (prior), + as.integer (verbose), status = as.integer(status), + PACKAGE = "knncat") +thang <- thang[names(thang) != ""] +thang$misclass.mat <- matrix (thang$misclass.mat, nclass, nclass) +if (nrow.test == 0) +{ + thang$misclass.type <- "train" + thang$test.classes <- NULL +} +else +{ + thang$misclass.type <- "test" +} +# +# Store names of variables that were actually used. +# +thang$train <- train.data.name +vars <- train.names[-1][thang$cdata != 0] +factor.vars <- factor.vars[thang$cdata != 0] +thang$vars <- rep ("factor", length(factor.vars)) +thang$vars[!factor.vars] <- "numeric" +names(thang$vars) <- vars +train.levs <- train.levs[thang$cdata != 0] + +phi <- thang$phi[1:(sum (sapply (train.levs, length)))] +# +# Replace levels of numeric variables, which are currently NULL, +# with some useful text. +# +thang$knots.vec <- knots.vec +knots.vec <- knots.vec[thang$cdata != 0] +if (any (!factor.vars)) +{ + num.ctr <- 1 + for (i in 1:length(factor.vars)) + { + if (!factor.vars[i]) + { + knot.levels <- paste ("knot.", 1:knots.vec[num.ctr], sep="") + train.levs[[i]] <- knot.levels + num.ctr <- num.ctr + 1 + } + } +} +# +# Create list of phis by variable, then attach the names from train.levs. +# +phi.list <- split (phi, rep (names(train.levs), sapply (train.levs, length))) +phi.list <- phi.list[names(train.levs)] +for (i in 1:length(phi.list)) +{ + names(phi.list[[i]]) <- train.levs[[i]] +} +thang$phi <- phi.list +thang$build <- numeric (5) +names(thang$build) <- c("Permute", "Improvement", "Once.Out", "Ridge", + "Xval") +thang$build[] <- c(permute, improvement, once.out.always.out, ridge, + xvals) +thang$k <- k +thang$missing <- missing.values[thang$cdata != 0] +if (any (!factor.vars)) +{ + matchers <- is.element (dimnames(knot.values)[[2]], vars) + if (any (matchers)) + thang$knot.values <- knot.values[,matchers, drop=FALSE] +} +oldClass (thang) <- "knncat" +invisible (return (thang)) +} +"plot.knncat" <- +function (x, ...) +{ +# +# Plot.knncat: plot method for knncat objects +# +# Arguments: x: knncat object, from knncat(). +# +y <- unlist (x$phi) +maxx <- length(x$phi) +# +# Set up plot without axes; add y axix +# +ten.pct <- maxx * .10 +plot (c(1-ten.pct, maxx+ten.pct), range(y), xlab = "Variable", ylab = "Phi", + type = "n", axes=FALSE) +box () +axis (2) +# +# Add x axis with proper labels +# +axis (1, at = 1:maxx, labels=names(x$vars)) +# +# Now plot the phi's, using the levels names. +# +for (i in 1:length(x$vars)) +{ + vec <- x$phi[[i]] + text (i, vec, names(vec)) +} +} +"predict.knncat" <- +function(object, train, newdata, train.classcol = 1, newdata.classcol = 1, + return.classes = TRUE, more = FALSE, verbose = 0, ...) +{ +# +# Perform knncat-type classification on new data. +# +# +# First ensure that all the predictor variables in the original appear in the +# newdata. +# +vars <- names(object$vars) +factor.vars <- object$vars == "factor" +not.found <- vars[!is.element (vars, names(newdata))] +if (length(not.found) > 0) + stop (paste ("Variables ", paste (not.found, collapse=", "), + "not found in newdata.")) +# +# Ensure that the sets of levels match up. This might be stronger than +# we need. +# +train <- data.frame (train[,train.classcol], train[,vars]) +if (is.factor (train[,1])) + class.labels = levels(train[,1]) +else + class.labels = seq (0, max(train[,1])) +if (newdata.classcol <= 0) + newdata <- data.frame (class = rep (0, nrow(newdata)), newdata[,vars]) +else + newdata <- data.frame (newdata[,newdata.classcol], newdata[,vars]) +newdata.true.class <- newdata[,1] +if (any (factor.vars)) +{ + train.names <- sapply (object$phi[factor.vars], names) + newdata.names <- sapply (newdata[,vars[factor.vars],drop=FALSE], levels) + if (!identical (TRUE, all.equal (train.names, newdata.names))) + stop ("Some level names differ in train and newdata.") +} +# +# Convert categoricals to numerics in the usual way +# +for (i in 1:ncol(train)) +{ + if (is.factor (train[,i])) + { + train[,i] <- as.numeric (train[,i]) - 1 + newdata[,i] <- as.numeric (newdata[,i]) - 1 + } +} +nrow.train <- nrow(train) +nrow.newdata <- nrow(newdata) +ncol.train <- ncol(train) +ncol.newdata <- ncol(newdata) +train <- c(t(matrix (unlist (train), nrow.train, ncol.train))) +newdata <- c(t(matrix (unlist (newdata), nrow.newdata, ncol.newdata))) +cats.in.var <- sapply (object$phi, length) +cum.cats.this.subset <- c(0, cumsum (cats.in.var)[-length(cats.in.var)]) +cdata <- rep (1, ncol.train - 1) +phidata <- unlist (object$phi) +prior.ind <- object$prior.ind +priordata <- object$prior +number.of.classes <- length(priordata) +if (any (names(object) == "knot.values")) + knots <- c(object$knot.values) +else + knots <- 0 +error.rate <- 0 +increase <- numeric (ncol.train - 1) +increase[!factor.vars] <- -5 +increase[factor.vars] <- -1 +if (return.classes) + classes <- numeric (nrow.newdata) +else + classes <- 0 +status <- 0 +thang <- .C ("donnwrap", + as.double (train), as.integer(nrow.train), as.integer(ncol.train), + as.double (newdata), as.integer(nrow.newdata), as.integer(ncol.newdata), + as.integer (cats.in.var), as.integer(cum.cats.this.subset), + as.double (cdata), as.double (phidata), as.integer (prior.ind), + as.double (priordata), + as.integer (number.of.classes), as.integer (increase), + as.double (knots), rate = as.double (error.rate), + as.integer (object$best.k), + as.integer (return.classes), classes = as.integer (classes), + as.integer (verbose), as.integer (status), PACKAGE="knncat") +if (more == TRUE) + cat ("Test set error rate is ", + paste (signif (100* thang$rate, 3), "%", sep=""), "\n") +if (return.classes) +{ + preds <- factor (class.labels[thang$classes + 1]) + return (preds) +} +} +"print.knncat" <- +function (x, ...) +{ +# +# Print.knncat: print method for knncat objects. +# +# Arguments: x: a knncat classifier, from knncat() +# +# Right now all this does is print the error rate. +# +yes <- sum (diag(x$misclass.mat)) +tot <- sum (x$misclass.mat) +# +# Get an upper-case letter to start. Neatness counts. +# +cute <- ifelse (x$misclass.type == "train", "Training", "Test") +cat (paste (cute, " set misclass rate: ", + round (100 * (1 - yes/tot), 2), "%\n", sep="")) +} +.First.lib <- function(libname, pkgname) { +library.dynam ("knncat", pkgname, libname) +} diff --git a/R/plot.knncat.R b/R/plot.knncat.R index 4365a12..11d34f4 100644 --- a/R/plot.knncat.R +++ b/R/plot.knncat.R @@ -5,7 +5,6 @@ function (x, ...) # Plot.knncat: plot method for knncat objects # # Arguments: x: knncat object, from knncat(). -# ...: Other arguments, passed to plot() # y <- unlist (x$phi) maxx <- length(x$phi) @@ -14,7 +13,7 @@ maxx <- length(x$phi) # ten.pct <- maxx * .10 plot (c(1-ten.pct, maxx+ten.pct), range(y), xlab = "Variable", ylab = "Phi", - type = "n", axes=FALSE, ...) + type = "n", axes=FALSE) box () axis (2) # diff --git a/R/predict.knncat.R b/R/predict.knncat.R index 2e6bfb8..ef644ca 100644 --- a/R/predict.knncat.R +++ b/R/predict.knncat.R @@ -4,16 +4,6 @@ function(object, train, newdata, train.classcol = 1, newdata.classcol = 1, { # # Perform knncat-type classification on new data. -# -# Arguments: object: "knncat" object from call to knncat() -# train: Training data used to construct object -# newdata: Test data to be classified -# train.classcol: Number of training set column containing class -# newdata.classcol: Number of test set column containing class -# return.classes: If TRUE, return classes; if not, just print error rate -# more: If TRUE, print error rate -# verbose: Diagnostic printing level (default 0) -# ...: Other arguments (currently ignored) # # # First ensure that all the predictor variables in the original appear in the @@ -67,6 +57,7 @@ cats.in.var <- sapply (object$phi, length) cum.cats.this.subset <- c(0, cumsum (cats.in.var)[-length(cats.in.var)]) cdata <- rep (1, ncol.train - 1) phidata <- unlist (object$phi) +prior.ind <- object$prior.ind priordata <- object$prior number.of.classes <- length(priordata) if (any (names(object) == "knot.values")) @@ -86,7 +77,8 @@ thang <- .C ("donnwrap", as.double (train), as.integer(nrow.train), as.integer(ncol.train), as.double (newdata), as.integer(nrow.newdata), as.integer(ncol.newdata), as.integer (cats.in.var), as.integer(cum.cats.this.subset), - as.double (cdata), as.double (phidata), as.double (priordata), + as.double (cdata), as.double (phidata), as.integer (prior.ind), + as.double (priordata), as.integer (number.of.classes), as.integer (increase), as.double (knots), rate = as.double (error.rate), as.integer (object$best.k), diff --git a/R/print.knncat.R b/R/print.knncat.R index c98df3f..e740de6 100644 --- a/R/print.knncat.R +++ b/R/print.knncat.R @@ -5,7 +5,6 @@ function (x, ...) # Print.knncat: print method for knncat objects. # # Arguments: x: a knncat classifier, from knncat() -# ...: any other arguments (currently ignored) # # Right now all this does is print the error rate. # diff --git a/man/knncat.Rd b/man/knncat.Rd index ad12b94..f161f1c 100644 --- a/man/knncat.Rd +++ b/man/knncat.Rd @@ -94,8 +94,8 @@ synpred <- predict (syncat, synth.tr, synth.te, train.classcol=3, table (synpred, synth.te$yc) synpred 0 1 - 0 466 61 - 1 34 439 + 0 460 91 + 1 40 409 # # Or do the whole thing in one pass: # diff --git a/man/plot.knncat.Rd b/man/plot.knncat.Rd index bfcebe4..18b1d5a 100644 --- a/man/plot.knncat.Rd +++ b/man/plot.knncat.Rd @@ -7,7 +7,7 @@ } \arguments{ \item{x}{Knncat object, from \link{knncat}} -\item{\dots}{Other arguments, passed to \link{plot}} +\item{\dots}{Other arguments, currently ignored} } \keyword{models} \details{This plot shows all the estimated numnbers associated with each diff --git a/man/predict.knncat.Rd b/man/predict.knncat.Rd index 2c6993f..af45ec7 100644 --- a/man/predict.knncat.Rd +++ b/man/predict.knncat.Rd @@ -3,8 +3,9 @@ \title{Prediict on a knncat classifier} \description{Produce predictions for a knncat classifier} \usage{ -\method{predict}{knncat}(object, train, newdata, train.classcol=1, -newdata.classcol = 1, return.classes=TRUE, more=FALSE, verbose = 0, \dots) +\method{predict}{knncat}(object, train, newdata, +train.classcol=1, newdata.classcol=1, return.classes=TRUE, +more=FALSE, verbose = 0, \dots) } \arguments{ \item{object}{Knncat object, from \link{knncat}} @@ -18,11 +19,11 @@ Default: 1. If <= 0, new data has no classifications.} of the newdata set. Default: TRUE} \item{more}{Logical; if TRUE, also print error rate. Default: FALSE} \item{verbose}{Level of verbosity for debugging. Default: 0} -\item{\dots}{Other arguments (currently ignored)} +\item{\dots}{Other arguments, currently ignored} } \keyword{models} \details{This prints the misclassification rate from the knncat classifier, together with an indication as to whether it was based on a training or -test set. By default it returns the vector of predicted classes.} +test set.} \value{None.} \author{Samuel E. Buttrey, \email{buttrey@nps.edu}} diff --git a/src/Makedeps b/src/Makedeps deleted file mode 100644 index 465d2b6..0000000 --- a/src/Makedeps +++ /dev/null @@ -1,23 +0,0 @@ -com.o: com.c ranlib.h -dodisc.o: dodisc.c matrix.h utils.h ord.h -donn.o: donn.c matrix.h utils.h ord.h donn.h -donnwrap.o: donnwrap.c matrix.h utils.h ord.h donn.h -dsort.o: dsort.c -linpack.o: linpack.c -matrix.o: matrix.c utils.h matrix.h h:/r/include/R.h \ - h:/r/include/Rconfig.h h:/r/include/R_ext/Arith.h \ - h:/r/include/R_ext/libextern.h h:/r/include/R_ext/Boolean.h \ - h:/r/include/R_ext/Complex.h h:/r/include/R_ext/Constants.h \ - h:/r/include/R_ext/Error.h h:/r/include/R_ext/Memory.h \ - h:/r/include/R_ext/Print.h h:/r/include/R_ext/Random.h \ - h:/r/include/R_ext/Utils.h h:/r/include/R_ext/RS.h -objective.o: objective.c matrix.h utils.h ord.h -ords.o: ords.c matrix.h utils.h ord.h ordfuncs.h donn.h dodisc.h \ - h:/r/include/R.h h:/r/include/Rconfig.h h:/r/include/R_ext/Arith.h \ - h:/r/include/R_ext/libextern.h h:/r/include/R_ext/Boolean.h \ - h:/r/include/R_ext/Complex.h h:/r/include/R_ext/Constants.h \ - h:/r/include/R_ext/Error.h h:/r/include/R_ext/Memory.h \ - h:/r/include/R_ext/Print.h h:/r/include/R_ext/Random.h \ - h:/r/include/R_ext/Utils.h h:/r/include/R_ext/RS.h -ranlib.o: ranlib.c ranlib.h -utilsR.o: utilsR.c utils.h diff --git a/src/dodisc.c b/src/dodisc.c index 88606dd..a814f82 100644 --- a/src/dodisc.c +++ b/src/dodisc.c @@ -22,14 +22,14 @@ extern long xval_upper; extern long *am_i_in; extern long *increase; -double sum_of_phis (long, double *, long *, long *, double **, double *); +double sum_of_phis (long, double *, Slong *, Slong *, double **, double *); int expand_vector (MATRIX *, MATRIX *, long, long, - long *, double *); + Slong *, double *); /*=========================== do_discriminant ===========================*/ int do_discriminant (MATRIX *test, MATRIX *eigenvalues, MATRIX *eigenvectors, - MATRIX *make_phi, long *cats_in_var, long *cum_cats_this_subset, + MATRIX *make_phi, Slong *cats_in_var, Slong *cum_cats_this_subset, long dimension, long number_of_variables, double **knots, MATRIX *cost, MATRIX *prior, double *error_rate, MATRIX *misclass_mat, int do_the_omission) @@ -239,7 +239,7 @@ return (TRUE); /*=========================== sum_of_phis ==============================*/ double sum_of_phis (long number_of_variables, double *phi, - long *cats_in_var, long *cum_cats, + Slong *cats_in_var, Slong *cum_cats, double **knots, double *data) { long var_ctr, offset; @@ -276,7 +276,7 @@ return (sum); /*=========================== expand_vector ==============================*/ int expand_vector (MATRIX *new, MATRIX *old, long how_many_holes, - long number_of_variables, long *holes, + long number_of_variables, Slong *holes, double *filler) { /* @@ -330,7 +330,7 @@ hole_ctr = second_var_in; while (new_ctr < new->ncol) { - if (holes != (long *) NULL && holes[hole_ctr] == new_ctr) + if (holes != (Slong *) NULL && (unsigned long) holes[hole_ctr] == new_ctr) { if (filler == (double *) NULL) new->data[new_ctr] = 0.0; diff --git a/src/dodisc.h b/src/dodisc.h index 7ff5f7f..318fb36 100644 --- a/src/dodisc.h +++ b/src/dodisc.h @@ -1,6 +1,6 @@ int do_discriminant (MATRIX *test, MATRIX *eigenvalues, MATRIX *eigenvectors, - MATRIX *make_phi, long *cats_in_var, long *cum_cats, + MATRIX *make_phi, Slong *cats_in_var, Slong *cum_cats, long dimension, long number_of_variables, double **knots, MATRIX *cost, MATRIX *prior, double *error_rate, MATRIX *misclass_mat, int do_the_omission); diff --git a/src/donn.c b/src/donn.c index 7937887..66267ae 100644 --- a/src/donn.c +++ b/src/donn.c @@ -23,17 +23,18 @@ void do_nothing(); #endif long verbose; +long save_verbose; long global_test_ctr; long number_of_classes; long *xval_indices; long xval_lower; long xval_upper; -long *increase; +Slong *increase; double c_euclidean (double *, double *, double *, long, double); double f_euclidean (double *vec_1, double *vec_2, double *phi, double *c, - long n, double threshold, long *cats_in_var, - long *cum_cats, + long n, double threshold, Slong *cats_in_var, + Slong *cum_cats, MATRIX *prior, double **knots); double c_absolute (double *, double *, double *, long, double); @@ -41,19 +42,19 @@ double c_absolute (double *, double *, double *, long, double); static double *class_results; static double *class_results_with_ties; static int *tie_marker; -void xpoll (long *classes, double *distances, long *k, long how_many_ks, +void xpoll (long *classes, double *distances, Slong *k, long how_many_ks, long largest_k, long slots, MATRIX * prior, long *outcome); /*=========================== do_nn =================================*/ -int do_nn (long *quit, MATRIX *training, MATRIX *test, - MATRIX *c, long *k, long *in_how_many_ks, - long *theyre_the_same, double *phi, long *cats_in_var, - long *cum_cats_this_subset, - double **knots, - MATRIX *cost, MATRIX *prior, double *error_rates, - MATRIX *misclass_mat, long *return_classes, Slong *classes, +int do_nn (Slong *quit, MATRIX *training, MATRIX *test, + MATRIX *c, Slong *k, long *in_how_many_ks, + Slong *theyre_the_same, double *phi, Slong *cats_in_var, + Slong *cum_cats_this_subset, + double **knots, MATRIX *cost, + Slong *prior_ind, MATRIX *prior, double *error_rates, + MATRIX *misclass_mat, Slong *return_classes, Slong *classes, long *in_xval_lower, long *in_xval_upper, long *in_xval_indices, - long *in_increase, long *in_number_of_classes, long *in_verbose) + Slong *in_increase, Slong *in_number_of_classes, Slong *in_verbose) { /* ** This deluxe version of do_nn became necessary when we started to @@ -82,7 +83,7 @@ long test_item_count; long dist_ctr, move_ctr, k_ctr, number_of_nearest, test_class; -long how_many_ks = 1, number_of_vars; +long how_many_ks, number_of_vars; double dist; @@ -98,6 +99,9 @@ static long slots; if (*quit == TRUE) initialized = TRUE; +if (*prior_ind == IGNORED) + prior = (MATRIX *) NULL; + how_many_ks = *in_how_many_ks; if (initialized == FALSE) @@ -106,7 +110,6 @@ if (initialized == FALSE) for (j = 0; j < how_many_ks; j++) if (k[j] > largest_k) largest_k = k[j]; - if (verbose > 0) Rprintf ("Largest k is now %i\n", largest_k); slots = largest_k + 15; @@ -257,7 +260,7 @@ double f_euclidean (double *vec_1, double *vec_2, double *phi, double *c, ** "nearest-neighbor" distances. As soon as a distance gets above that, we ** know we can stop this comparison. The function returns -1, and we continue. */ - if (verbose > 3 && test_ctr <= 1 & train_ctr <= 1 && dist < 0) + if (verbose > 3 && test_ctr <= 1 && train_ctr <= 1 && dist < 0) Rprintf ("dist was < 0, skip\n"); if (dist < 0) continue; @@ -344,8 +347,18 @@ double f_euclidean (double *vec_1, double *vec_2, double *phi, double *c, nearest_class[j], nearest_distance[j]); } } + save_verbose = verbose; + if (verbose >= 2 && test_ctr <= 1) verbose = 4; + if (verbose >= 4) + { + if (prior == (MATRIX *) NULL) + Rprintf ("About to call xpoll, and prior is so very NULL\n"); + else + Rprintf ("About to call xpoll, and prior is so very *not* NULL\n"); +} xpoll (nearest_class, nearest_distance, k, how_many_ks, largest_k, slots, prior, poll_result); + verbose = save_verbose; if (verbose >= 2 && test_ctr <= 1) { @@ -379,11 +392,15 @@ double f_euclidean (double *vec_1, double *vec_2, double *phi, double *c, += *SUB (cost, test_class, poll_result[k_ctr]); if (nearest_distance[0] == 0.0) misclass_with_distance_zero[k_ctr]++; - if (verbose > 2 && *theyre_the_same == FALSE) + if (test_ctr <= 1 && verbose > 2 && *theyre_the_same == FALSE) { Rprintf ( -"k = %ld: Classif.err test rec. %li (a %li) as %li (nearest: %li, dist. %f)\n", +"k = %ld: Classif.err test rec. %li (a %li) as %li (nearest: %li, dist. %f)", k[k_ctr], test_ctr, test_class, (long) poll_result[k_ctr], (long) nearest_neighbor[0], nearest_distance[0]); + Rprintf ("(next: %li, dist. %f), (next: %li, dist. %f)\n", + (long) nearest_neighbor[1], nearest_distance[1], + (long) nearest_neighbor[2], nearest_distance[2]); + } } else { @@ -429,7 +446,7 @@ return (TRUE); } /* end "do_nn." */ /*============================ poll =====================================*/ -void xpoll (long *classes, double *distances, long *k, long how_many_ks, +void xpoll (long *classes, double *distances, Slong *k, long how_many_ks, long largest_k, long slots, MATRIX * prior, long *outcome) { int i, k_ctr; @@ -489,12 +506,24 @@ for (k_ctr = 0; k_ctr < how_many_ks; k_ctr++) { class_results[i] = 0.0; } +if (verbose >= 4) +Rprintf ("Number of classes is %li\n", number_of_classes); +if (verbose >= 4) +Rprintf ("First two class results are %f and %f\n", +class_results[0], class_results[1]); /* ...and go through the neighbors to fill it up again. When classes[i] ** = j, add one to the j-th entry of class_results. Well, not one, ** exactly; if priors isn't NULL, add 1/(that class' prior). That ** way, classes with large priors contribute less. Which is as it should be. */ +if (verbose >= 4) +{ + if (prior == (MATRIX *) NULL) + Rprintf ("By the way, prior is so very NULL\n"); + else + Rprintf ("By the way, prior is so very *not* NULL\n"); +} for (i = 0; i < k[k_ctr]; i++) { if (prior == (MATRIX *) NULL) @@ -503,6 +532,9 @@ for (k_ctr = 0; k_ctr < how_many_ks; k_ctr++) class_results[classes[i]] += (1.0 / *SUB (prior, classes[i], classes[i])); } +if (verbose >= 4) +Rprintf ("First two class results are %f and %f\n", +class_results[0], class_results[1]); /* ** Okay. It could happen that some other neighbors are tied ** with the kth one. We would know that if their distances equalled the @@ -560,11 +592,16 @@ for (k_ctr = 0; k_ctr < how_many_ks; k_ctr++) if (tie == 0) { +if (verbose > 2) + Rprintf ("Max class was %li, putting that in outcome %li\n", + max_class, k_ctr); + outcome[k_ctr] = max_class; continue; } -/**** printf ("Got a tie.\n"); ****/ +if (verbose > 2) + Rprintf ("Got a tie.\n"); /* Make a note of all tied classes.... */ for (i = 0; i < number_of_classes; i ++) { @@ -606,8 +643,8 @@ return (sum); /*========================= f_euclidean ==================================*/ double f_euclidean (double *vec_1, double *vec_2, double *phi, double *c, - long n, double threshold, long *cats_in_var, - long *cum_cats_this_subset, + long n, double threshold, Slong *cats_in_var, + Slong *cum_cats_this_subset, MATRIX *prior, double **knots) { long col_ctr, knot_ctr, offset; diff --git a/src/donn.h b/src/donn.h index d5bf4b6..6b62b48 100644 --- a/src/donn.h +++ b/src/donn.h @@ -4,13 +4,12 @@ #define Slong long #endif -int do_nn (long *quit, - MATRIX *training, MATRIX *test, - MATRIX *c, long *k, long *how_many_ks, long *theyre_the_same, - double *phi, long *cats_in_var, long *cum_cats_ptr, - double **knots, MATRIX *cost, MATRIX *prior, - double *error_rates, MATRIX *misclass_mat, - long *return_classes, Slong *classes, - long *xval_lower, long *xval_upper, long *xval_indices, - long *increase, long *number_of_classes, long *verbose); - +int do_nn (Slong *quit, MATRIX *training, MATRIX *test, + MATRIX *c, Slong *k, long *in_how_many_ks, + Slong *theyre_the_same, double *phi, Slong *cats_in_var, + Slong *cum_cats_this_subset, + double **knots, MATRIX *cost, + Slong *prior_ind, MATRIX *prior, double *error_rates, + MATRIX *misclass_mat, Slong *return_classes, Slong *classes, + long *in_xval_lower, long *in_xval_upper, long *in_xval_indices, + Slong *in_increase, Slong *in_number_of_classes, Slong *in_verbose); diff --git a/src/donnwrap.c.h b/src/donnwrap.c similarity index 90% rename from src/donnwrap.c.h rename to src/donnwrap.c index c332521..2809048 100644 --- a/src/donnwrap.c.h +++ b/src/donnwrap.c @@ -26,7 +26,8 @@ void donnwrap ( double *traindata, Slong *trainrows, Slong *traincols, double *testdata, Slong *testrows, Slong *testcols, Slong *cats_in_var, Slong *cum_cats_this_subset, - double *cdata, double *phidata, double *priordata, + double *cdata, double *phidata, + Slong *prior_ind, double *priordata, Slong *number_of_classes, Slong *increase, double *in_knots, double *error_rate, Slong *best_k, Slong *return_classes, Slong *classes, @@ -34,7 +35,8 @@ void donnwrap ( { long i; MATRIX *training, *test, *c, *cost, *misclass_mat, *prior; -long quit, how_many_ks, theyre_the_same, xval_lower, xval_upper; +Slong quit, theyre_the_same; +long how_many_ks, xval_lower, xval_upper; long knot_ctr, number_of_vars; double **knots; @@ -78,7 +80,7 @@ do_nn (&quit, training, test, c, best_k, &how_many_ks, &theyre_the_same, phidata, cats_in_var, cum_cats_this_subset, knots, - (MATRIX *) NULL, prior, error_rate, + (MATRIX *) NULL, prior_ind, prior, error_rate, (MATRIX *) NULL, return_classes, classes, &xval_lower, &xval_upper, (long *) NULL, increase, number_of_classes, verbose); @@ -88,7 +90,7 @@ do_nn (&quit, training, test, c, best_k, &how_many_ks, &theyre_the_same, phidata, cats_in_var, cum_cats_this_subset, knots, - (MATRIX *) NULL, prior, error_rate, + (MATRIX *) NULL, prior_ind, prior, error_rate, (MATRIX *) NULL, return_classes, classes, &xval_lower, &xval_upper, (long *) NULL, increase, number_of_classes, verbose); diff --git a/src/ord.h b/src/ord.h index c3ef6c1..ec7c32d 100644 --- a/src/ord.h +++ b/src/ord.h @@ -45,10 +45,10 @@ /* ** Defines for prior probabilities */ -#define ESTIMATED 1 -#define ALL_EQUAL 2 -#define SUPPLIED 3 -#define IGNORED 4 +#define ESTIMATED (Slong) 1 +#define ALL_EQUAL (Slong) 2 +#define SUPPLIED (Slong) 3 +#define IGNORED (Slong) 4 /* Knots per (numeric) variable */ #define KNOTS 5L diff --git a/src/ordfuncs.h b/src/ordfuncs.h index 401276a..72a49a1 100644 --- a/src/ordfuncs.h +++ b/src/ordfuncs.h @@ -15,9 +15,9 @@ void matrix_element_divide (MATRIX *result, MATRIX *num, MATRIX *denom); int fill_margin_holder (MATRIX *margin_holder, long *permute_index, long which, - MATRIX *data, long *cats_in_var, - double **knots, long *cum_cats_this_subset, - long *number_in_class, long *increase); + MATRIX *data, Slong *cats_in_var, + double **knots, Slong *cum_cats_this_subset, + long *number_in_class, Slong *increase); void constraint (long *mode, long *how_many, long *vars, long *nrow, double *x, double *value, double *jacobian, long *nstate); @@ -26,22 +26,22 @@ void objective (long *mode, long *n, double *x, double *objvalue, double *gradient, long *nstate); int fill_u (MATRIX *training, - long *cats_in_var, long *cum_cats_this_subset, + Slong *cats_in_var, Slong *cum_cats_this_subset, long *number_in_class, MATRIX *margin_ptr); int fill_row_of_u_submatrices (MATRIX *training, - long starting_row, long *cats_in_var, - long *cum_cats_this_subset, long *number_in_class, MATRIX *margin_ptr); + long starting_row, Slong *cats_in_var, + Slong *cum_cats_this_subset, long *number_in_class, MATRIX *margin_ptr); int fill_w (MATRIX *training, - double **knots, long *cats_in_var, long *cum_cats_this_subset, + double **knots, Slong *cats_in_var, Slong *cum_cats_this_subset, MATRIX *margin_ptr, long i, - long *permute_indices, long *increase); + long *permute_indices, Slong *increase); int fill_row_of_w_submatrices (MATRIX *training, - long starting_row, double **knots, long *cats_in_var, - long *cum_cats_this_subset, MATRIX *margin_ptr, long which_to_permute, - long *permute_indices, long *increase); + long starting_row, double **knots, Slong *cats_in_var, + Slong *cum_cats_this_subset, MATRIX *margin_ptr, long which_to_permute, + long *permute_indices, Slong *increase); int divide_by_root_before_and_after (MATRIX *in, MATRIX *diag); @@ -57,11 +57,11 @@ int adjust_column (unsigned long column, MATRIX *adjustee, long increase, int handle_fascinating_file (FILE *fascinating_file, int *are_any_numeric); double add_ordered_variable (double *result, long which_var, long dimension, - long *cats_in_var, - MATRIX *eigenvectors, long *increase); + Slong *cats_in_var, + MATRIX *eigenvectors, Slong *increase); void deal_with_missing_values (MATRIX *training, double missing_max, - long *increase, long *cats_in_var, + Slong *increase, Slong *cats_in_var, double *missing_values); int insert_missing_values (MATRIX *mat, double missing_max, @@ -74,14 +74,14 @@ int do_the_eigen_thing (MATRIX *eigenval_ptr, int get_a_solution (long permute_ctr, long *permute_indices, long permute_len, long i, MATRIX *margin_ptr, MATRIX *training, - long *cats_in_var, long *cum_cats_ptr, double **knots, + Slong *cats_in_var, Slong *cum_cats_ptr, double **knots, long *number_in_class, MATRIX *eigenval_ptr, MATRIX *eigenvec_ptr, MATRIX *w_inv_m, double ridge, int classification, long number_of_vars, int dimension, long current_cat_total, - MATRIX **prior, int prior_ind, int prior_len, + MATRIX **prior, Slong prior_ind, int prior_len, int first_time_through, int do_the_omission, - long *increase, int quit); + Slong *increase, int quit); int prepare_eigen_matrices ( MATRIX **original_eigenvalues, MATRIX **original_eigenvectors, @@ -94,25 +94,25 @@ int prepare_eigen_matrices ( void count_them_cats (long number_of_vars, long *current_cat_total, long *currently_ordered, long *currently_numeric, - long *orderable_cats, MATRIX *c, long *increase); + long *orderable_cats, MATRIX *c, Slong *increase); int get_sequence_of_solutions (long quit_dimension, long number_of_vars, long permute, long permute_len, long *permute_indices, - double improvement, long k_len, long *k, - long *cats_in_var, long *cum_cats_ptr, MATRIX *c, + double improvement, long k_len, Slong *k, + Slong *cats_in_var, Slong *cum_cats_ptr, MATRIX *c, MATRIX **original_eigenvectors, MATRIX **original_w_inv_m_mat, long *number_in_class, long xval_ctr, MATRIX *xval_result, long *xval_ceiling, - MATRIX *cost, MATRIX *prior, long prior_ind, long number_of_classes, - double *misclass_rate, int do_the_omission, long *increase, + MATRIX *cost, MATRIX *prior, Slong prior_ind, Slong number_of_classes, + double *misclass_rate, int do_the_omission, Slong *increase, long *once_out_always_out); int do_the_discriminant_thing (long permute_ctr, MATRIX *eigenval_ptr, MATRIX *eigenvec_ptr, MATRIX *original_w_inv_m_mat, MATRIX *prior, long i, long number_of_vars, int dimension, - long current_cat_total, int do_the_omission, long *increase); + long current_cat_total, int do_the_omission, Slong *increase); /* Nag routines for eigenvalues. */ diff --git a/src/ords.c b/src/ords.c index 71053bd..d282c13 100644 --- a/src/ords.c +++ b/src/ords.c @@ -38,8 +38,8 @@ void do_nothing(); #include "dodisc.h" #include "R.h" -long long_TRUE = (long) TRUE; -long long_FALSE = (long) FALSE; +Slong Slong_TRUE = (Slong) TRUE; +Slong Slong_FALSE = (Slong) FALSE; long zero = 0L; long one = 1L; @@ -88,9 +88,8 @@ MATRIX *phi = (MATRIX *) NULL; long *am_i_in; /* Keeps track of what variables are in. */ long am_i_in_ctr; /* Keeps track of variable rankings. */ -long *last_cum_cats_this_subset; -long *cum_cats_this_subset; /* */ -long *cats_in_var; /* Ith entry: num of levels in var i */ +Slong *cum_cats_this_subset; /* */ +Slong *cats_in_var; /* Ith entry: num of levels in var i */ long current_cat_total; long currently_ordered; /* Number of variables in the current subset */ /* which are ordered */ @@ -99,7 +98,7 @@ long orderable_cats; /* Number of cats in the current subset's */ /* ordered variables */ int all_unordered; int all_numeric; -long theyre_the_same = FALSE; /* Are training and test sets identical? */ +Slong theyre_the_same = FALSE; /* Are training and test sets identical? */ int delete_this_variable_forever = TRUE; int classification = CLASSIFICATION; @@ -149,9 +148,9 @@ MATRIX *original_margin_holder, *permuted_margin_holder; ** + 1) columns. */ long number_of_vars; -long *increase; +Slong *increase; -long number_of_classes = 0L; /* Number of classes. You guessed that. */ +Slong number_of_classes = 0L; /* Number of classes. You guessed that. */ long permute, permute_tail; /* @@ -170,7 +169,7 @@ double smallest_misclass_error; long smallest_misclass_dim, smallest_misclass_k; MATRIX *misclass_mat; -long verbose = 0; +Slong verbose = 0; /*============================= ord ==================================*/ void ord (double *traindata, Slong *trainrows, Slong *traincols, @@ -194,22 +193,22 @@ long var_ctr; /* */ long permute_ctr; /* */ long permute_len; /* */ long *cum_cats_before_var; /* Ith : # of categories in vars 1-(i-1) */ -long *cum_cats_ptr; /* */ +Slong *cum_cats_ptr; /* */ long *number_in_class; /* */ long *number_in_class_ptr; /* */ long total_number_of_cats; /* Number of levels in all categories */ long *permute_indices; /* */ MATRIX *c; /* */ double *misclass_rate; /* */ -long *k, best_k; +Slong *k, best_k; long row_ctr; -long prior_ind; +Slong prior_ind; long dimension = 0L; int first_time_through; int do_the_omission = 0; int do_nn_result, get_seq_result; long xval_ceiling; -long *return_classes = (long *) NULL; +Slong *return_classes = (Slong *) NULL; #define null_mat (MATRIX *) NULL /* Matrices for discrimination stuff. */ @@ -238,34 +237,34 @@ permute = (long) *in_permute; permute_tail = (long) *in_permute_tail; ridge = *in_ridge; /* this is double */ k_len = (long) *in_k_len; -best_k = (long ) *in_best_k; +best_k = (Slong) *in_best_k; /* Loop over the vector of nn's to fill k */ -alloc_some_longs (&k, k_len); +alloc_some_Slongs (&k, k_len); for (i = 0; i < k_len; i++) - k[i] = (long) in_k[i]; + k[i] = (Slong) in_k[i]; xvals = (long) *in_xvals; classification = (long) *in_classification; improvement = *in_improvement; /* this is double */ number_of_vars = (long) *traincols - 1; /* Loop over the set of columns to fill these two */ -alloc_some_longs (&cats_in_var, number_of_vars); -alloc_some_longs (&increase, number_of_vars); +alloc_some_Slongs (&cats_in_var, number_of_vars); +alloc_some_Slongs (&increase, number_of_vars); for (i = 0; i < number_of_vars; i++) { cats_in_var[i] = in_cats_in_var[i]; increase[i] = in_increase[i]; } /* Loop over the set of classes to fill number_in_class */ -number_of_classes = (long) *in_number_of_classes; -alloc_some_longs (&number_in_class, number_of_classes); +number_of_classes = (Slong) *in_number_of_classes; +alloc_some_longs (&number_in_class, (long) number_of_classes); for (i = 0; i < number_of_classes; i++) number_in_class[i] = in_number_in_class[i]; priordata = in_priordata; /* this is double */ -prior_ind = (long) *in_prior_ind; -verbose = (long) *in_verbose; +prior_ind = (Slong) *in_prior_ind; +verbose = (Slong) *in_verbose; once_out_always_out = (long *) in_once_out_always_out; *status = (Slong) 0; @@ -284,13 +283,12 @@ else alloc_some_longs (&permute_indices, train_n); alloc_some_longs (&xval_indices, train_n); alloc_some_longs(&cum_cats_before_var, number_of_vars); -alloc_some_longs(&cum_cats_this_subset, number_of_vars); -alloc_some_longs(&last_cum_cats_this_subset, number_of_vars); +alloc_some_Slongs(&cum_cats_this_subset, number_of_vars); alloc_some_doubles(&missing_values, number_of_vars); for (cat_ctr = 0L; cat_ctr < number_of_vars; cat_ctr++) { cum_cats_before_var [cat_ctr] = 0L; - cum_cats_this_subset[cat_ctr] = 0L; + cum_cats_this_subset[cat_ctr] = (Slong) 0; } alloc_some_longs(&am_i_in, number_of_vars); training = make_matrix ((long) train_n, number_of_vars + 1, @@ -403,7 +401,7 @@ for (xval_ctr = 0L; xval_ctr < xvals; xval_ctr++) ** Fill "permute_indices" with the indices of all the training set ** items that are in this cross-validation group. */ -if (verbose > 0) +if (verbose > 1) Rprintf ("Top of xval loop, this is %i of %i\n", xval_ctr, xvals); permute_ctr = 0L; for (row_ctr = 0; row_ctr < train_n; row_ctr++) @@ -477,7 +475,7 @@ smallest_misclass_error = matrix_min (xval_result, &smallest_misclass_dim, if (verbose > 1) fprintf (stderr, "Best rate is with %li dims and %li nn's\n", - smallest_misclass_dim, k[smallest_misclass_k]); + smallest_misclass_dim, (long) k[smallest_misclass_k]); xval_indices = (long *) NULL; train_n_effective = train_n; @@ -542,12 +540,12 @@ if (classification == CLASSIFICATION) */ if (test_n == 0) { - return_classes = &long_FALSE; - do_nn_result = do_nn (&long_FALSE, training, training, + return_classes = &Slong_FALSE; + do_nn_result = do_nn (&Slong_FALSE, training, training, c, &best_k, &one, - &long_TRUE, /* theyre_the_same */ + &Slong_TRUE, /* theyre_the_same */ phi->data, cats_in_var, - cum_cats_ptr, knots, cost, prior, + cum_cats_ptr, knots, cost, &prior_ind, prior, misclass_rate, misclass_mat, return_classes, test_classes, &one, &zero, (long *) NULL, /* <- xval stuff */ @@ -555,12 +553,12 @@ if (classification == CLASSIFICATION) } else { - return_classes = &long_TRUE; - do_nn_result = do_nn (&long_FALSE, training, test, + return_classes = &Slong_TRUE; + do_nn_result = do_nn (&Slong_FALSE, training, test, c, &best_k, &one, - &long_FALSE, /* theyre_the_same */ + &Slong_FALSE, /* theyre_the_same */ phi->data, cats_in_var, - cum_cats_ptr, knots, cost, prior, + cum_cats_ptr, knots, cost, &prior_ind, prior, misclass_rate, misclass_mat, return_classes, test_classes, &one, &zero, (long *) NULL, /* <- xval stuff */ @@ -581,12 +579,12 @@ if (classification == CLASSIFICATION) smallest_misclass_dim + 1, misclass_rate[0]); /* Now call with "quit" set to TRUE to free up memory */ - return_classes = &long_FALSE; - do_nn (&long_TRUE, training, test, c, &(k[smallest_misclass_k]), &one, - &long_FALSE, phi->data, cats_in_var, cum_cats_ptr, knots, cost, - prior, misclass_rate, misclass_mat, FALSE, (Slong *) NULL, + return_classes = &Slong_FALSE; + do_nn (&Slong_TRUE, training, test, c, &(k[smallest_misclass_k]), &one, + &Slong_FALSE, phi->data, cats_in_var, cum_cats_ptr, knots, cost, + &prior_ind, prior, misclass_rate, misclass_mat, FALSE, (Slong *) NULL, (long *) NULL, (long *) NULL, (long *) NULL, - (long *) NULL, (long *) NULL, (long *) NULL); + (Slong *) NULL, (Slong *) NULL, (Slong *) NULL); free (knots); @@ -658,9 +656,9 @@ for (i = 0L; i < how_many; i++) /*============================== fill_margin_holder ======================*/ int fill_margin_holder (MATRIX *margin_holder, long *permute_index, long which, - MATRIX *data, long *cats_in_var, - double **knots, long *cum_cats_this_subset, - long *number_in_class, long *increase) + MATRIX *data, Slong *cats_in_var, + double **knots, Slong *cum_cats_this_subset, + long *number_in_class, Slong *increase) /* ** Here we fill up the "margin_holder" matrix. For a categorical ** variable, this holds what you expect; the entry in the i-th row associated @@ -838,7 +836,7 @@ return TRUE; } /* end "fill_margin_holder" */ /*============================ fill_u ===================================*/ -int fill_u (MATRIX *training, long *cats_in_var, long *cum_cats_this_subset, +int fill_u (MATRIX *training, Slong *cats_in_var, Slong *cum_cats_this_subset, long *number_in_class, MATRIX *margin_ptr) { long cat_row_ctr; @@ -858,8 +856,8 @@ return (TRUE); /*===================== fill_row_of_u_submatrices ========================*/ int fill_row_of_u_submatrices (MATRIX *training, - long starting_row, long *cats_in_var, - long *cum_cats_this_subset, long *number_in_class, MATRIX *margin_ptr) + long starting_row, Slong *cats_in_var, + Slong *cum_cats_this_subset, long *number_in_class, MATRIX *margin_ptr) { long row_ctr, big_col_ctr, col_ctr; long col_start, row_start; @@ -912,8 +910,8 @@ return (TRUE); /*======================= fill_w ======================================*/ int fill_w (MATRIX *training, - double **knots, long *cats_in_var, long *cum_cats_this_subset, - MATRIX *margin_ptr, long i, long *permute_indices, long *increase) + double **knots, Slong *cats_in_var, Slong *cum_cats_this_subset, + MATRIX *margin_ptr, long i, long *permute_indices, Slong *increase) { long cat_row_ctr; @@ -931,9 +929,9 @@ return (TRUE); /*===================== fill_row_of_w_submatrices ========================*/ int fill_row_of_w_submatrices (MATRIX *training, - long starting_row, double **knots, long *cats_in_var, - long *cum_cats_this_subset, MATRIX *margin_ptr, long which_to_permute, - long *permute_indices, long *increase) + long starting_row, double **knots, Slong *cats_in_var, + Slong *cum_cats_this_subset, MATRIX *margin_ptr, long which_to_permute, + long *permute_indices, Slong *increase) { long j, train_n; long col_ctr, knot_row_ctr, knot_col_ctr; @@ -1307,8 +1305,8 @@ return (TRUE); /*====================== add_ordered_variable =========================*/ double add_ordered_variable (double *result, long which_var, - long dimension, long *cats_in_var, MATRIX *phi, - long *increase) + long dimension, Slong *cats_in_var, MATRIX *phi, + Slong *increase) { /* ** Each ordered variable requires (# of cats) lin. constraints. (# of cats - 1) @@ -1695,7 +1693,7 @@ return (objective_result); /*====================== deal_with_missing_values =========================*/ void deal_with_missing_values (MATRIX *training, double missing_max, - long *increase, long *cats_in_var, + Slong *increase, Slong *cats_in_var, double *missing_values) { /* @@ -1901,14 +1899,14 @@ return (0); int get_a_solution (long permute_ctr, long *permute_indices, long permute_len, long current_var, MATRIX *margin_ptr, MATRIX *training, - long *cats_in_var, long *cum_cats_this_subset, + Slong *cats_in_var, Slong *cum_cats_this_subset, double **knots, long *number_in_class, MATRIX *eigenval_ptr, MATRIX *eigenvec_ptr, MATRIX *w_inv_m, double local_ridge, int classification, long number_of_vars, int dimension, - long current_cat_total, MATRIX **prior, int prior_ind, + long current_cat_total, MATRIX **prior, Slong prior_ind, int number_of_classes, int first_time_through, - int do_the_omission, long *increase, int quit) + int do_the_omission, Slong *increase, int quit) { /* static MATRIX *u; */ static MATRIX *eigenvalues_imaginary, *eigenvalues_beta; @@ -2147,7 +2145,7 @@ return (TRUE); void count_them_cats (long number_of_vars, long *current_cat_total, long *currently_ordered, long *currently_numeric, - long *orderable_cats, MATRIX *c, long *increase) + long *orderable_cats, MATRIX *c, Slong *increase) { long var_ctr; @@ -2182,7 +2180,7 @@ int do_the_discriminant_thing (long permute_ctr, MATRIX *eigenval_ptr, MATRIX *eigenvec_ptr, MATRIX *original_w_inv_m_mat, MATRIX *prior, long i, long number_of_vars, int dimension, - long current_cat_total, int do_the_omission, long *increase) + long current_cat_total, int do_the_omission, Slong *increase) { static MATRIX *temp_w, *reduced_w, *w_ptr; static MATRIX *w_inv_m_mat = null_mat, @@ -2371,14 +2369,14 @@ return (TRUE); /*========================= get_sequence_of_solutions ===================*/ int get_sequence_of_solutions (long quit_dimension, long number_of_vars, long permute, long permute_len, long *permute_indices, - double improvement, long k_len, long *k, - long *cats_in_var, long *cum_cats_ptr, MATRIX *c, + double improvement, long k_len, Slong *k, + Slong *cats_in_var, Slong *cum_cats_ptr, MATRIX *c, MATRIX **original_eigenvectors, MATRIX **original_w_inv_m_mat, long *number_in_class, long xval_ctr, MATRIX *xval_result, long *xval_ceiling, - MATRIX *cost, MATRIX *prior, long prior_ind, long number_of_classes, - double *misclass_rate, int do_the_omission, long *increase, + MATRIX *cost, MATRIX *prior, Slong prior_ind, Slong number_of_classes, + double *misclass_rate, int do_the_omission, Slong *increase, long *once_out_always_out) { double biggest_permuted_eigenval, last_eigenvalue; @@ -2394,7 +2392,7 @@ int cat_ctr, var_ctr, k_ctr, permute_ctr; int inv_result, do_nn_result; int get_soln_result; MATRIX *margin_ptr; -long *return_classes = (long *) NULL; +Slong *return_classes = (Slong *) NULL; /* MATRIX *original_w_inv_m_mat; */ @@ -2603,12 +2601,13 @@ if (first_time_through) number_of_classes, "Misclass mat", REGULAR, ZERO_THE_MATRIX); ***/ - return_classes = &long_FALSE; - do_nn_result = do_nn (&long_FALSE, training, training, - c, k, &k_len, &long_TRUE, /* theyre_the_same */ + return_classes = &Slong_FALSE; + do_nn_result = do_nn (&Slong_FALSE, training, training, + c, k, &k_len, &Slong_TRUE, /* theyre_the_same */ SUB (best_eigenvector, 0L, 0L), cats_in_var, cum_cats_ptr, knots, cost, - prior, misclass_rate, NULL, return_classes, (Slong *) NULL, + &prior_ind, prior, misclass_rate, NULL, return_classes, + (Slong *) NULL, &xval_lower, &xval_upper, xval_indices, increase, &number_of_classes, &verbose); if (do_nn_result == FALSE) @@ -2945,11 +2944,11 @@ free (current_eigenvalue); /* free (am_i_in); */ get_a_solution ((long) NULL, (long *) NULL, (long) NULL, (long) NULL, - (MATRIX *) NULL, (MATRIX *) NULL, (long *) NULL, (long *) NULL, + (MATRIX *) NULL, (MATRIX *) NULL, (Slong *) NULL, (Slong *) NULL, (double **) NULL, (long *) NULL, (MATRIX *) NULL, (MATRIX *) NULL, (MATRIX *) NULL, (double) 0, classification, (long) NULL, (int) NULL, (long) NULL, (MATRIX **) NULL, (int) NULL, (int) NULL, (int) NULL, - (int) NULL, (long *) NULL, TRUE /* quit */); + (int) NULL, (Slong *) NULL, TRUE /* quit */); return (TRUE); diff --git a/src/utils.h b/src/utils.h index c9bf476..2353d3b 100644 --- a/src/utils.h +++ b/src/utils.h @@ -4,6 +4,12 @@ ** Includes handy defines for general use ** */ +#ifdef CALL_FROM_R +#define Slong int +#else +#define Slong long +#endif + #define TRUE 1 #define FALSE 0 #define NOT ! @@ -29,6 +35,7 @@ int alloc_some_doubles (double **my_array, unsigned long how_many); int alloc_some_floats (float **my_array, unsigned long how_many); int alloc_some_long_ptrs (long ***my_array, unsigned long how_many); int alloc_some_longs (long **my_array, unsigned long how_many); +int alloc_some_Slongs (Slong **my_array, unsigned long how_many); int alloc_some_u_longs (unsigned long **my_array, unsigned long how_many); int alloc_some_ints (int **my_array, unsigned long how_many); int alloc_some_char_ptrs (char ***my_array, unsigned long how_many);