# MB2 Data Cleaning and Merging

In [3]:
library(tidyverse)
library(RStoolbox)
library(dplyr)

-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.3.0 --

[32mv[39m [34mggplot2[39m 3.3.2     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.0.1     [32mv[39m [34mdplyr  [39m 1.0.0
[32mv[39m [34mtidyr  [39m 1.1.0     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.3.1     [32mv[39m [34mforcats[39m 0.5.0

-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [4]:
setwd("C:/Users/s1526/Dropbox/EAGLE_Assessments/MB3_Geoinfo/MB3_FINAL")

## 1. Data Cleaning

### 1.1. Read data

In [103]:
Chris_data <- read_csv(file.path("Data", "Chris_Samples", "Chris_Samples.csv"))
Antonio_data <- read_csv(file.path("Data", "Collection_Antonio", "samples_Antonio.csv"))
Diego_data <- read_csv(file.path("Data", "Collection_Diego", "collection_Diego.csv"))
Kevin_data <- read_csv(file.path("Data", "Collection_kevin", "Collection_kevin.csv"))
Lui_data <- read_csv(file.path("Data", "Luis_FieldSurvey", "Field Survey.csv"))
Nils_data <- read_csv(file.path("Data", "Nils_Point_images", "Nils_samples.csv"))
Jakob_data <- read_csv(file.path("Data", "Samples_Jakob", "Samples_Jakob.csv"))
KM_data <- read_csv(file.path("Data", "Samples_KM", "sample_KM.csv"))
Maurius_data <- read_csv(file.path("Data", "Samples_Maurius", "Samples_Marius.csv"))
Sofia_data <- read_csv(file.path("Data", "Samples_Sofia", "samples_Sofia.csv"))

Parsed with column specification:
cols(
  ID = [32mcol_double()[39m,
  Latitude = [32mcol_double()[39m,
  Longitude = [32mcol_double()[39m,
  ClassID = [31mcol_character()[39m,
  Class = [31mcol_character()[39m,
  Name = [31mcol_character()[39m,
  Note = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  ID = [32mcol_double()[39m,
  latitude = [32mcol_double()[39m,
  longitude = [32mcol_double()[39m,
  ClassID = [32mcol_double()[39m,
  Class = [31mcol_character()[39m,
  Surveyor = [31mcol_character()[39m,
  Notes = [31mcol_character()[39m
)

Parsed with column specification:
cols(
  ID = [32mcol_double()[39m,
  Latitude = [32mcol_double()[39m,
  Longitude = [32mcol_double()[39m,
  Class_id = [32mcol_double()[39m,
  Notes = [31mcol_character()[39m,
  Class = [31mcol_character()[39m,
  DMS = [31mcol_character()[39m,
  UTM = [31mcol_character()[39m,
  MGRS = [31mcol_character()[39m,
  Photo = [32mcol_double()[39m,
  Comment

### 1.2.  Batch rename tables

Standard for dataframe names:

ID, Latitude, Longitude, Class_ID

In [104]:
colnames_vec <- c("ID", "Latitude", "Longitude", "Class_ID")

colnames_vec

In [105]:
colnames(Chris_data)[1:4] <- colnames_vec
colnames(Antonio_data)[1:4] <- colnames_vec
colnames(Diego_data)[1:4] <- colnames_vec
colnames(Kevin_data)[1:4] <- colnames_vec
colnames(Lui_data)[1:4] <- colnames_vec
colnames(Nils_data)[1:4] <- colnames_vec
colnames(Jakob_data)[1:4] <- colnames_vec
colnames(KM_data)[1:4] <- colnames_vec
colnames(Maurius_data)[1:4] <- colnames_vec
colnames(Sofia_data)[1:4] <- colnames_vec

In [107]:
All_data <- list(Chris_data, Antonio_data, Diego_data, Kevin_data, Lui_data, Nils_data, Jakob_data, KM_data, Maurius_data, Sofia_data)

All_data[7]

ID,Latitude,Longitude,Class_ID,Notes,Class,DMS,UTM,MGRS,Photo,Comments
<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>
136,49.78206,9.97465,1,JA,Built-up,49° 46' 55.41'' N | 9° 58' 28.74'' E,570165.0E 5514855.0N 32U,32UNA 70165 14855,,
93,49.78096,9.976711,1,JA,Built-up,49° 46' 51.46'' N | 9° 58' 36.16'' E,570315.0E 5514735.0N 32U,32UNA 70315 14735,,
88,49.78178,9.975061,1,JA,Built-up,49° 46' 54.42'' N | 9° 58' 30.22'' E,570195.0E 5514825.0N 32U,32UNA 70195 14825,,
66,49.7837,9.972182,1,JA,Built-up,49° 47' 1.31'' N | 9° 58' 19.86'' E,569985.0E 5515035.0N 32U,32UNA 69985 15035,,
42,49.78232,9.975905,1,JA,Built-up,49° 46' 56.34'' N | 9° 58' 33.26'' E,570255.0E 5514885.0N 32U,32UNA 70255 14885,,
94,49.78237,9.969655,3,JA,Grassland,49° 46' 56.53'' N | 9° 58' 10.76'' E,569805.0E 5514885.0N 32U,32UNA 69805 14885,,
117,49.7818,9.972978,1,JA,Built-up,49° 46' 54.48'' N | 9° 58' 22.72'' E,570045.0E 5514825.0N 32U,32UNA 70045 14825,,
91,49.78277,9.985499,1,JA,Built-up,49° 46' 57.99'' N | 9° 59' 7.8'' E,570945.0E 5514945.0N 32U,32UNA 70945 14945,,
141,49.78149,9.977556,1,JA,Built-up,49° 46' 53.37'' N | 9° 58' 39.2'' E,570375.0E 5514795.0N 32U,32UNA 70375 14795,,
60,49.78319,9.968005,3,JA,Grassland,49° 46' 59.49'' N | 9° 58' 4.82'' E,569685.0E 5514975.0N 32U,32UNA 69685 14975,,


## 2. Data Merging

### 2.1. Grepping 1st 4 cols of each dataframe

In [132]:
Merge_df <- if (colnames() = "ID", "Latitude", "Longitude", "Class_ID"){
    cbind(Chris_data, Antonio_data, Diego_data, Kevin_data, Lui_data, Nils_data, Jakob_data, KM_data, Maurius_data, Sofia_data)
}

head(Merge_df)

ERROR: Error in parse(text = x, srcfile = src): <text>:1:28: unexpected '='
1: Merge_df <- if (colnames() =
                               ^
