# Setup data

In [8]:
devtools:::load_all("../occcompaRe")
devtools:::load_all(".")

source("R_data_preparation//helper_dataPreparation.R")
ls()

Loading occCompare
Loading occCompareExp


## Reference Data

Load the original reference data (this also creates the file "Kulturen_Ids_nPix_mainTypes.txt"):

In [9]:
addForest=TRUE
orig <- dPrep_get_ids_orig(dir="../data/raster", buf=20, rmUn=FALSE, addForest=addForest)

Create a mapping data frame (lut_reclass) between the original IDs and the main crop types that can be used for reclassification:

In [10]:
lut_origUtypes <- read.csv("data/raster/Kulturen_Ids_nPix_mainTypes.txt")
lut_origUtypes

X,Nutzungen,Code,type,nPix
1,Sonstiges,3,others,0
2,Koernermais,10,corn,2374
3,Winterfutterweizen,11,wcereals,58769
4,Winterfuttergerste,12,wcereals,42988
5,Winterfutterroggen,13,wcereals,7489
6,Sommerhafer,14,scereals,9860
7,Wintertriticale,15,wcereals,39713
8,sonstigesGetreide,16,others,683
9,Silomais,17,corn,145260
10,Wintermenggetreide,18,wcereals,1383


In [11]:
lut_origUtypes <- lut_origUtypes[lut_origUtypes$nPix!=0, ]
# IDs/Labels
classnames <- c("unlabeled", "corn", "oilseeds", "pgrass", 
                "root", "scereals", "wcereals", "bforest", "cforest", "others")
lut_reclass <- data.frame(id=0:(length(classnames)-1), 
                  name=classnames, oldId=NA)
lut_reclass

id,name,oldId
0,unlabeled,
1,corn,
2,oilseeds,
3,pgrass,
4,root,
5,scereals,
6,wcereals,
7,bforest,
8,cforest,
9,others,


In [12]:
for (i in 1:nrow(lut_reclass))
  lut_reclass[i, "oldId"] <- paste(sort(
    lut_origUtypes$Code[lut_origUtypes$type==lut_reclass$name[i]]),
    collapse=",")
lut_reclass

id,name,oldId
0,unlabeled,87203206
1,corn,1017
2,oilseeds,212223
3,pgrass,757793
4,root,6167
5,scereals,14263334363738
6,wcereals,11121315182427
7,bforest,241
8,cforest,242
9,others,1631323543576263647071727374768385175185202204205


Reclassify the original ids to the new classes:

In [13]:
ref <- list(ids=dPrep_reclassify(orig$ids, lut=lut_reclass))
ref$cells <- orig$cells
ref$freq <- table(ref$ids)
ref$lut <- lut_reclass[, 1:2]

Get the field information:

In [14]:
# Format data as required for experiments
ref$fids <- raster("data/raster/agri_fid")[ref$cells]
ref$fids[ref$ids==0] <- 0
fidsize <- as.numeric(table(ref$fids))
filen <- lapply(ref$lut$id, function(i) {
  tbl <- table(ref$fids[ref$ids==i])
  list(ufids=as.numeric(names(tbl)),
       len=as.numeric(tbl))})
class_stats <- data.frame(name=ref$lut$name, 
                          id=ref$lut$id,
                          nSmpls=sapply(filen, function(x) sum(x$len)), 
                          nFlds=sapply(filen, function(x) length(x$len)))
class_stats

name,id,nSmpls,nFlds
unlabeled,0,4762912,1
corn,1,147634,1336
oilseeds,2,39816,315
pgrass,3,469643,5685
root,4,8722,87
scereals,5,48701,535
wcereals,6,201480,1934
bforest,7,7721,1
cforest,8,7750,1
others,9,172989,2028


## Image Data

We used four RapidEye (RE) acquisitions and TerraSAR-X (TSX) acquisitions from 2011.

In [15]:
re20 <- brick("data/raster/re_20bands")[]
re20 <- re20[ref$cells, ]; gc(verbose=FALSE)

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,1038658,55.5,3654118,195.2,7250569,387.3
Vcells,82323614,628.1,375783573,2867.1,469326403,3580.7


In [16]:
tsx <- brick("data/raster/tsx139")[]
tsx <- tsx[ref$cells, ]; gc(verbose=FALSE)

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,1038947,55.5,3654118,195.2,7250569,387.3
Vcells,152732196,1165.3,433043475,3303.9,489421122,3734.0


In [17]:
re20Utsx <- cbind(re20, tsx); gc(verbose=FALSE)

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,1038929,55.5,3654118,195.2,7250569,387.3
Vcells,340487867,2597.8,519732170,3965.3,489421122,3734.0


In [18]:
colnames(re20Utsx)

Check/fin seeds such that min. size of samples per class >= 1500 samples:

In [19]:
# ----------------------------------------------------------
# TRAIN, TEST DATA SPLIT SEEDS
nMin <- 1500
pTr <- 0.5
seeds <- numeric(100)
seed <- 0
cat("Adding seeds: ")

Adding seeds: 

In [None]:
while (any(seeds==0)) {
  seed <- seed+1
  tbl <- table(ref$ids[get_tr_fids_idx(ref$ids, ref$fids, pTr, seed=seed)])
  if (min(tbl)>=nMin) {
    cat(seed, ".")
    seeds[which(seeds==0)[1]] <- seed
  }
}

Save all the data required for running the experiments:

In [29]:
outdir <- "data/rdata_agri6cl_tryedToReprod"
if (addForest)
    outdir <- paste0(outdir, "Uforest")
dir.create(outdir)
# ----------------------------------------------------------
# Save
saveRDS(ref$cells, paste0(outdir, "/cells.rds"))
saveRDS(ref$fids, paste0(outdir, "/fids.rds"))
saveRDS(fidsize, paste0(outdir, "/fidsize.rds"))
saveRDS(filen, paste0(outdir, "/filen.rds"))
saveRDS(ref$ids, paste0(outdir, "/ids.rds"))
saveRDS(re20, paste0(outdir, "/re.rds"))
saveRDS(tsx, paste0(outdir, "/tsx.rds"))
saveRDS(re20Utsx, paste0(outdir, "/reUtsx.rds"))
saveRDS(seeds, paste0(outdir, "/seeds.rds"))
saveRDS(class_stats, paste0(outdir, "/class_stats.rds"))

In dir.create(outdir): 'data\rdata_agri6clUforest' existiert bereits

In [30]:
class_stats # here

name,id,nSmpls,nFlds
unlabeled,0,4762912,1
corn,1,147634,1336
oilseeds,2,39816,315
pgrass,3,469643,5685
root,4,8722,87
scereals,5,48701,535
wcereals,6,201480,1934
bforest,7,7721,1
cforest,8,7750,1
others,9,172989,2028
