## using NDVI for feature reduction

### NDVI = (NIR - GREEN) / (GREEN + NIR)

### NDVI = (BAND5 - BAND2) / (BAND2 + BAND5)

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd
import scipy
from PIL import Image
from scipy import ndimage
import gdal
import os
import geopandas as gpd
from skimage import io
from skimage.io import imread
%matplotlib inline

In [2]:
np.random.seed(1)

In [3]:
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import KMeans



# reading the masked tif images and forming a dataset.

In [4]:
path = ["E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I1B2.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I1B5.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I2B2.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I2B5.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I3B2.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I3B5.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I4B2.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I4B5.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I5B2.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I5B5.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I6B2.tif",
       "E:\\Internship_Harvesting\\Dataset\\Bathinda_Cropland\\Masked_images\\I6B5.tif"]

In [5]:
I1B1 = gdal.Open(path[0])
I1B1

<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x000002893C18B1E0> >

In [6]:
I1B1.RasterYSize, I1B1.RasterXSize, I1B1.RasterCount

(8963, 8298, 1)

In [7]:
I1B1_array = I1B1.GetRasterBand(1).ReadAsArray()
I1B1_array

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])

In [8]:
np.count_nonzero(np.isnan(I1B1_array)), np.count_nonzero(~np.isnan(I1B1_array))

(40574909, 33800065)

# creating an array which only consists the non-nan values indices

In [9]:
# I1B1_nan_index = np.argwhere(np.isnan(I1B1))

I1B1_non_nan_index = np.argwhere(~np.isnan(I1B1_array))

In [10]:
I1B1_non_nan_index

array([[   0, 5738],
       [   0, 5739],
       [   0, 5740],
       ...,
       [8962, 5788],
       [8962, 5789],
       [8962, 5790]], dtype=int64)

In [11]:
I1B1_array[0, 5738], I1B1_array[0, 5739], I1B1_array[8962, 5789], I1B1_array[8962, 5790]

(1200.0, 1182.0, 1130.0, 1144.0)

In [12]:
I1B1_non_nan_index.shape

(33800065, 2)

# we have indices now we will make the dataset using this indices

In [13]:
test_x = np.zeros((1, 33800065))

In [14]:
for p in path:
    print(p)
    
    image = gdal.Open(p)
    
    image_array = image.GetRasterBand(1).ReadAsArray()
    print(image_array.shape)
    sample = []
    
    for i in range(I1B1_non_nan_index.shape[0]):
        sample.append(image_array[I1B1_non_nan_index[i,0], I1B1_non_nan_index[i,1]])
        
    sample = np.array(sample)
    
    sample = sample.reshape(1, 33800065)
        
    test_x = np.concatenate((sample, test_x), axis = 0)
    
    print(test_x.shape)
    
    print("***************")
    

E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I1B2.tif
(8963, 8298)
(2, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I1B5.tif
(8963, 8298)
(3, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I2B2.tif
(8963, 8298)
(4, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I2B5.tif
(8963, 8298)
(5, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I3B2.tif
(8963, 8298)
(6, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I3B5.tif
(8963, 8298)
(7, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I4B2.tif
(8963, 8298)
(8, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images\I4B5.tif
(8963, 8298)
(9, 33800065)
***************
E:\Internship_Harvesting\Dataset\Bathinda_Cropland\Masked_images

In [15]:
test_x = test_x[:12, :]

In [16]:
test_x.shape

(12, 33800065)

In [17]:
test_x

array([[2929.5, 3312. , 3197.5, ..., 3368.5, 3290. , 3261. ],
       [1006.5, 1001. ,  955. , ...,  849.5,  850.5,  861. ],
       [2953. , 3448. , 3508. , ..., 3022. , 2969.5, 2969. ],
       ...,
       [1322.5, 1367. , 1398.5, ..., 1120. , 1125. , 1121. ],
       [2622. , 2757. , 2787. , ..., 2800. , 2755.5, 2731. ],
       [1200. , 1182. , 1184. , ..., 1139. , 1130. , 1144. ]])

## forming an array which consist NDVI of each image.

In [18]:
test_NDVI = np.zeros((6, 33800065))

In [19]:
for i in range(test_NDVI.shape[0]):
    
    test_NDVI[i, :] = (test_x[i*2+1, :] - test_x[i*2, :]) / (test_x[i*2+1, :] + test_x[i*2, :])

test_NDVI

array([[-0.48856707, -0.53582193, -0.54003612, ..., -0.59720247,
        -0.58918005, -0.58224163],
       [-0.46405553, -0.54861891, -0.54912784, ..., -0.50310868,
        -0.49164888, -0.4919598 ],
       [-0.40422345, -0.46852352, -0.45760234, ..., -0.3180593 ,
        -0.2977058 , -0.29591281],
       [-0.02903444, -0.03978422, -0.03464514, ..., -0.11183311,
        -0.11402314, -0.11167685],
       [-0.16138237, -0.15591232, -0.16904337, ..., -0.37691238,
        -0.37062937, -0.37163677],
       [-0.37205651, -0.39984768, -0.40367666, ..., -0.42168063,
        -0.41835028, -0.40954839]])

In [20]:
test_NDVI = test_NDVI.T

In [21]:
test_NDVI.shape

(33800065, 6)

# we have our test data, we will apply K-means unsupervised algorithm on this and capture details

# https://towardsdatascience.com/machine-learning-algorithms-part-9-k-means-example-in-python-f2ad05ed5203

# reference for the below mentioned code

In [22]:
kmeans = KMeans(n_clusters=2, init='k-means++', max_iter=50, n_init=5, random_state=0)
test_NDVI_predict = kmeans.fit_predict(test_NDVI)

In [23]:
np.unique(test_NDVI_predict)

array([0, 1])

In [24]:
test_NDVI_predict.shape, test_NDVI.shape

((33800065,), (33800065, 6))

In [25]:
test_NDVI_predict = test_NDVI_predict.reshape(33800065, 1)
test_NDVI_predict.shape

(33800065, 1)

In [26]:
I1B1_non_nan_index.shape

(33800065, 2)

In [27]:
I1B1_non_nan_index

array([[   0, 5738],
       [   0, 5739],
       [   0, 5740],
       ...,
       [8962, 5788],
       [8962, 5789],
       [8962, 5790]], dtype=int64)

In [28]:
result_index = np.concatenate((I1B1_non_nan_index, test_NDVI_predict), axis = 1)
result_index

array([[   0, 5738,    1],
       [   0, 5739,    1],
       [   0, 5740,    1],
       ...,
       [8962, 5788,    0],
       [8962, 5789,    0],
       [8962, 5790,    0]], dtype=int64)

In [29]:
result_index.shape

(33800065, 3)

In [47]:
result = np.zeros((8963, 8298))

In [48]:
result[:] = 128

In [49]:
result = result.astype("int")

In [50]:
for i in range(result_index.shape[0]):
    result[result_index[i,0], result_index[i,1]] = result_index[i, 2]

In [51]:
result

array([[128, 128, 128, ..., 128, 128, 128],
       [128, 128, 128, ..., 128, 128, 128],
       [128, 128, 128, ..., 128, 128, 128],
       ...,
       [128, 128, 128, ..., 128, 128, 128],
       [128, 128, 128, ..., 128, 128, 128],
       [128, 128, 128, ..., 128, 128, 128]])

In [52]:
np.unique(result)

array([  0,   1, 128])

In [53]:
# counting numbers of pixels having value = 0
np.count_nonzero(result == 0)

10394141

In [54]:
# counting numbers of pixels having value = 1
np.count_nonzero(result == 1)

23405924

In [55]:
result[result[:] == 1] = 255

In [56]:
np.unique(result)

array([  0, 128, 255])

In [57]:
# here 0 --> 0 during classification(minority)
# 255 --> 1 during classification(majority)
# 128 --> nan values (non-agri plus outside boundary)

In [58]:
# number of pixels belonging to majority class
np.count_nonzero(result == 255)

23405924

In [59]:
# number of pixels belonging to minority class
np.count_nonzero(result == 0)

10394141

In [60]:
# number of pixels belonging to non-agriculture class
np.count_nonzero(result == 128)

40574909

In [61]:
result = result.astype("uint8")

In [62]:
result.shape

(8963, 8298)

In [63]:
io.imsave("E:\\Internship_Harvesting\\Unsupervised_Result_Image\\NDVI_green_kmeans_result.jpg", result)