In [19]:
require(rgdal)
require(rgeos)

# read Landkreis Shapefile
lk <- readOGR("data/Kreisgrenzen_2019.shp")
#str(lk@data)

# compute centroid per polygon
cen <- gCentroid(lk, byid=TRUE)
#str(cen)

# make table with coordinates and Landkreis ID
lk_ <- cbind(as.data.frame(cen@coords), RS=as.integer(gsub("^0", "", lk$RS)))
str(lk_)

OGR data source with driver: ESRI Shapefile 
Source: "/home/frantzda/cor/covid19/data/Kreisgrenzen_2019.shp", layer: "Kreisgrenzen_2019"
with 401 features
It has 19 fields
'data.frame':	401 obs. of  3 variables:
 $ x : num  9.44 10.13 10.73 9.98 9.11 ...
 $ y : num  54.8 54.3 53.9 54.1 54.1 ...
 $ RS: int  1001 1002 1003 1004 1051 1053 1054 1055 1056 1057 ...


# GeoService Link
https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/RKI_COVID19/FeatureServer/0/query?outFields=*&where=1%3D1

# GeoJSON link
https://opendata.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0.geojson

In [34]:
# read COVID-19 statistics
rki <- read.csv("data/RKI_COVID19.csv")
#str(rki)

# add new column for Landkreis ID
rki <- cbind(rki, RS=as.integer(rki$IdLandkreis))
#str(rki)

# dissolve Berlin districts
rki$RS[which(rki$RS > 11000 & rki$RS < 12000)] <- 11000L

# fix -1 cases
rki$AnzahlFall[which(rki$AnzahlFall < 1)] <- 1

# add new column with date
rki <- cbind(rki, date=as.POSIXct(rki$Meldedatum))
str(rki)


'data.frame':	138307 obs. of  20 variables:
 $ FID                 : int  7372424 7372425 7372426 7372427 7372428 7372429 7372430 7372431 7372432 7372433 ...
 $ IdBundesland        : int  1 1 1 1 1 1 1 1 1 1 ...
 $ Bundesland          : Factor w/ 16 levels "Baden-Württemberg",..: 15 15 15 15 15 15 15 15 15 15 ...
 $ Landkreis           : Factor w/ 412 levels "LK Ahrweiler",..: 336 336 336 336 336 336 336 336 336 336 ...
 $ Altersgruppe        : Factor w/ 7 levels "A00-A04","A05-A14",..: 3 3 3 3 3 3 3 3 3 3 ...
 $ Geschlecht          : Factor w/ 3 levels "M","unbekannt",..: 1 1 1 1 1 1 1 1 1 3 ...
 $ AnzahlFall          : num  1 1 1 1 1 1 1 1 1 1 ...
 $ AnzahlTodesfall     : int  0 0 0 0 0 0 0 0 0 0 ...
 $ Meldedatum          : Factor w/ 95 levels "2020/01/28 00:00:00",..: 31 36 36 38 44 52 54 55 57 31 ...
 $ IdLandkreis         : int  1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
 $ Datenstand          : Factor w/ 1 level "18.05.2020, 00:00 Uhr": 1 1 1 1 1 1 1 1 1 1 ...
 $ Neue

In [42]:
# compile final table
df <- data.frame(X=0, 
                 Y=0, 
                 T=rep(rki$date,       rki$AnzahlFall),
                 LK=rep(rki$Landkreis, rki$AnzahlFall),
                 ID=rep(rki$RS,        rki$AnzahlFall))
pos <- sapply(df$ID, function(x)which(x==lk_$RS))
df$X <- lk_$x[pos]
df$Y <- lk_$y[pos]
str(df)

'data.frame':	174707 obs. of  5 variables:
 $ X : num  9.44 9.44 9.44 9.44 9.44 ...
 $ Y : num  54.8 54.8 54.8 54.8 54.8 ...
 $ T : POSIXct, format: "2020-03-14" "2020-03-19" ...
 $ LK: Factor w/ 412 levels "LK Ahrweiler",..: 336 336 336 336 336 336 336 336 336 336 ...
 $ ID: int  1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...


In [47]:
# write table
write.csv(df, "data/covid19-deu.csv", row.names=FALSE)