## croplearn.ipynb

In [1]:
import sys
sys.path.insert(0,'../cropseg/')

datasetinfo = { "datadir":"/home/ucfaab0/Desktop/su_african_crops_ghana/",
                "metadatadir":"/home/ucfaab0/Desktop/su_african_crops_ghana/metadata/",
                "dataset":"su_african_crops_ghana",
                "groundcollection":"su_african_crops_ghana_labels",
                "s1collection":"su_african_crops_ghana_source_s1",
                "s2collection":"su_african_crops_ghana_source_s2",
                "groundlabels":"su_african_crops_ghana_labels_id.json",
                "groundmetadata":"su_african_crops_ghana_labels.json",
                "s1metadata":"su_african_crops_ghana_source_s1.json",
                "s2metadata":"su_african_crops_ghana_source_s2.json",
                "groundname":"labels.tif",
                "s1imagename":"source.tif",
                "s2imagename":"source.tif",
                "s2maskname":"cloudmask.tif",
                "groundshape":[64,64],
                "s1shape":[64,64],
                "s2shape":[64,64],
                "extension":"tif"
              }
s1bands = [
            {"band":"vv","idx":0},
            {"band":"vh","idx":1},    
          ]  
s2bands = [
            {"band":"blue","wavelength":490,"idx":0},
            {"band":"green","wavelength":560,"idx":1},
            {"band":"red","wavelength":665,"idx":2},
            {"band":"rded1","wavelength":705,"idx":3},
            {"band":"rded2","wavelength":740,"idx":4},
            {"band":"rded3","wavelength":783,"idx":5},
            {"band":"nir","wavelength":842,"idx":6},
            {"band":"rded4","wavelength":865,"idx":7},
            {"band":"swir1","wavelength":1610,"idx":8},
            {"band":"swir2","wavelength":2190,"idx":9}
          ]
s1indices = ["vhvv"]
s2indices = ["ndvi","rdedci","ndmi"]

from mlhubdata import loadjson
groundlabels = loadjson(f'{datasetinfo["metadatadir"]}{datasetinfo["groundlabels"]}')
groundmetadata = loadjson(f'{datasetinfo["metadatadir"]}{datasetinfo["groundmetadata"]}')
s1metadata = loadjson(f'{datasetinfo["metadatadir"]}{datasetinfo["s1metadata"]}')
s2metadata = loadjson(f'{datasetinfo["metadatadir"]}{datasetinfo["s2metadata"]}')

skiplist = ["001268","002382","003146","003803"]

In [2]:
#####
erosioniterations = 0
ncoeff = 15
#####

import numpy
from osgeo import gdal
import scipy.interpolate

from grounddata import erodedfieldmasks
from mlhubdata import get_tileitems_from_collection
from satellitedata import load_satellite_data_as_array
from satellitedata import load_satellite_cloudmasks_as_array
from compression import dct
from compression import dct_fittingconditions
from compression import doublelogistic
from compression import doublelogistic_fittingconditions
from compression import doublelogistic_parameterconditions
from miscellaneous import fusedataandweight
from dates import datepositions

coeffs = []
for i in range(len(groundmetadata)):
    tileid = groundmetadata[i]["id"].split("_")[len(groundmetadata[i]["id"].split("_"))-1]
    print(i+1,"/",len(groundmetadata),tileid,end="\r")
    if tileid not in skiplist:
        tilehandle = gdal.Open(f'{datasetinfo["datadir"]}{datasetinfo["groundcollection"]}/{datasetinfo["groundcollection"]}_{tileid}/{datasetinfo["groundname"]}')
        tiledata = numpy.array(tilehandle.GetRasterBand(1).ReadAsArray(),dtype="int")
        crops = numpy.unique(tiledata[tiledata != 0])
        fieldmasks = erodedfieldmasks(tiledata,erosioniterations)    
        s1items,s1dates = get_tileitems_from_collection(tileid,s1metadata,datasetinfo,verbose=0)
        s1data = load_satellite_data_as_array(s1items,s1bands,s1indices,datasetinfo,datasetinfo["s1shape"])
        s2items,s2dates = get_tileitems_from_collection(tileid,s2metadata,datasetinfo,verbose=0)
        s2data = load_satellite_data_as_array(s2items,s2bands,s2indices,datasetinfo,datasetinfo["s2shape"],rr=4096.)
        s2cloudmasks = load_satellite_cloudmasks_as_array(s2items,datasetinfo,datasetinfo["s2shape"])
        for j in range(len(fieldmasks)):
            for k in range(datasetinfo["groundshape"][0]):
                for m in range(datasetinfo["groundshape"][0]):
                    if fieldmasks[j][0][k][m] != 0:
                        coeff = []                        
                        for n in range(len(s2indices)):
                            position = len(s2bands) + n
                            data = []
                            dates = []
                            for p in range(len(s2items)):
                                if s2cloudmasks[p][k][m] == 0:
                                    data.append(s2data[p][position][k][m])
                                    dates.append(s2dates[p])                            
                            if len(dates) > 0:
                                data = numpy.array(data)
                                dates = numpy.array(datepositions(dates))                
                                if doublelogistic_fittingconditions(dates,data,durationmin=0.9,gapmax=0.33,amplitudemin=0.25) == True:
                                    spline = scipy.interpolate.UnivariateSpline(dates,data,s=0.01,k=3)
                                    splinedates = numpy.linspace(numpy.min(dates),numpy.max(dates),100)
                                    fuseddates,fusedmean,fusedweights = fusedataandweight(dates,data,splinedates,spline(splinedates))                    
                                    dbllog = doublelogistic(fuseddates,fusedmean,weights=fusedweights,bound=True,epsilon=0.1)
                                    if n == 0:
                                        if doublelogistic_parameterconditions(dbllog,A1min=0.11,A1max=0.24,A2min=0.26,A2max=0.63,x01min=0.34,x01max=0.88,k1min=7.65,k1max=54.02,x02min=0.61,x02max=1.08,k2min=-43.89,k2max=-16.52) == True:
                                            dbllog = list(dbllog)
                                            coeff = coeff + dbllog
                                    if n == 1:
                                        if doublelogistic_parameterconditions(dbllog,A1min=0.17,A1max=0.41,A2min=0.52,A2max=1.80,x01min=0.34,x01max=0.86,k1min=9.73,k1max=60.25,x02min=0.59,x02max=1.07,k2min=-45.27,k2max=-10.78) == True:
                                            dbllog = list(dbllog)
                                            coeff = coeff + dbllog
                                    if n == 2:
                                        if doublelogistic_parameterconditions(dbllog,A1min=-0.22,A1max=-0.09,A2min=0.15,A2max=0.49,x01min=0.34,x01max=0.96,k1min=7.88,k1max=84.59,x02min=0.62,x02max=1.08,k2min=-101.88,k2max=-25.03) == True:                                    
                                            dbllog = list(dbllog)
                                            coeff = coeff + dbllog
                        data = []
                        dates = []
                        for n in range(len(s1items)):
                            data.append(s1data[n][2][k][m])
                            dates.append(s1dates[n])
                        if len(data) > 0:
                            data = numpy.array(data)
                            dates = numpy.array(datepositions(dates)) 
                            if numpy.isnan(numpy.sum(data)) == False and numpy.min(data) > -15.0 and numpy.max(data) < 0.0:
                                if dct_fittingconditions(dates,data,minduration=0.9,maxgap=0.2) == True:
                                    dctcoeff = dct(data,ncoeff)
                                    dctcoeff = list(dctcoeff)
                                    coeff = coeff + dctcoeff
                        if len(coeff) == (18 + ncoeff):
                            coeff.insert(0,crops[j])
                            coeffs.append(coeff)
data = numpy.array(coeffs)

15 / 4040 000282

  return (data[nir] - data[red]) / (data[nir] + data[red])
  return (data[nir] / data[reded1]) - 1
  return (data[nir] - data[swir1]) / (data[nir] + data[swir1])


116 / 4040 000430

  return (data[nir] / data[reded1]) - 1


159 / 4040 000249

  return A1+A2*(1./(1+numpy.exp(-k1*(x-x01)))-1./(1+numpy.exp(k2*(x-x02))))


1072 / 4040 000927

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.


4040 / 4040 004007 003009 0032154040 003729

In [57]:
d = numpy.copy(data)
numpy.random.shuffle(d)
d[:,0] = d[:,0] - 1
d0 = d[d[:,0] == 0][:1945]
d1 = d[d[:,0] == 1][:1945]
d2 = d[d[:,0] == 2][:1945]
d3 = d[d[:,0] == 3][:1945]
d = numpy.concatenate([d0,d1,d2,d3])
numpy.random.shuffle(d)
xtrain, ytrain = d[200:,1:],d[200:,0] 
xtest, ytest = d[:200,1:],d[:200,0]

In [63]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
model = tf.keras.Sequential([
    layers.Dense(128,activation='relu'),
    layers.Dense(256,activation='relu'),
    layers.Dense(512,activation='relu'),
    layers.Dense(1024,activation='relu'),
    layers.Dense(2056,activation='relu'),
    layers.Dense(4)
])
model.compile(optimizer='Adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])
model.fit(xtrain,ytrain,epochs=10)
model.summary()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_71 (Dense)             (None, 128)               4352      
_________________________________________________________________
dense_72 (Dense)             (None, 256)               33024     
_________________________________________________________________
dense_73 (Dense)             (None, 512)               131584    
_________________________________________________________________
dense_74 (Dense)             (None, 1024)              525312    
_________________________________________________________________
dense_75 (Dense)             (None, 2056)              2107400   
_________________________________________________________________
dense_76 (Dense)             (None, 4)                 8228      
Total pa

In [64]:
import sklearn.metrics
model.evaluate(xtest,ytest,verbose=1)
predictions = model.predict(xtest)
predict = numpy.argmax(predictions,axis = 1)
true = numpy.array(ytest,dtype=int)
print(sklearn.metrics.f1_score(true,predict,average=None),sklearn.metrics.f1_score(true,predict,average="weighted"))
print(sklearn.metrics.cohen_kappa_score(true,predict))

[0.55769231 0.4        0.61682243 0.44680851] 0.5003536105969989
0.3490534706077716


In [65]:
import sklearn.dummy
dummy = sklearn.dummy.DummyClassifier(strategy="uniform")
dummy.fit(xtrain,ytrain)
predict = dummy.predict(xtest)
true = numpy.array(ytest,dtype=int)
print(sklearn.metrics.accuracy_score(true,predict))
print(sklearn.metrics.f1_score(true,predict,average=None),sklearn.metrics.f1_score(true,predict,average="weighted"))
print(sklearn.metrics.cohen_kappa_score(true,predict))

0.185
[0.17582418 0.22222222 0.17307692 0.16494845] 0.1843193547574991
-0.08543650529400004
