In [28]:
import numpy as np
import rasterio as rio
import pickle
import pandas as pd

In [7]:
clusterImg = r"D:\Ishan\imageProcessing\TestData\clusterOut\cluster1.tif"
classImg = r"D:\Ishan\imageProcessing\TestData\clusterOut\classification.tif"
with rio.open(classImg) as dst:
    img = dst.read(1)
    meta = dst.meta
classes = np.unique(img)
print(classes)

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


In [9]:
# Create the training data:

for c in classes:
    print(f"Class {c} number of values: {len(img[img==c])}")

Class 1.0 number of values: 73614
Class 2.0 number of values: 107450
Class 3.0 number of values: 312725
Class 4.0 number of values: 123118
Class 5.0 number of values: 64287
Class 6.0 number of values: 135106
Class 7.0 number of values: 6339
Class 8.0 number of values: 24803
Class 9.0 number of values: 92171
Class 10.0 number of values: 219227


In [10]:
img[img==c]

array([10., 10., 10., ..., 10., 10., 10.], dtype=float32)

In [11]:
simImgFile = r"D:\NewImage\Simulation\TestImages\simulatedImg.tif"
with rio.open(simImgFile) as dst:
    simImg = dst.read()
    simProfile = dst.profile
    simMeta = dst.meta
print(simImg.shape)

(30, 1073, 1080)


In [12]:
simProfile

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': None, 'width': 1080, 'height': 1073, 'count': 30, 'crs': CRS.from_epsg(32644), 'transform': Affine(10.0, 0.0, 278160.0,
       0.0, -10.0, 2031040.0), 'blockysize': 1, 'tiled': False, 'interleave': 'pixel'}

In [33]:
def getTraining(classImg, classes,simImg, trainingperclass=10000):
    #Setting the dictionary
    d = {}
    d['Class'] = []
    for i in range(len(simImg)):
        d[f'Val_{i}'] = []
    for c in classes:
        print(f'At class {c}')
        idxs = np.where(classImg==c)
        iterLen = min(trainingperclass, len(idxs[0]))
        print()
        randId = np.random.choice(range(len(idxs[0])), size=iterLen)
        print(f'randId len: {len(randId)}')
        print(f'randID: \n', randId)
        for r in randId:
            arr = simImg[:,idxs[0][r], idxs[1][r]]
            d['Class'].append(c)
            for i in range(len(simImg)):
                d[f'Val_{i}'].append(arr[i])
    return d   

In [34]:
test = getTraining(classImg=img,classes = classes, simImg=simImg)

At class 1.0

randId len: 10000
randID: 
 [68384 14392 24449 ... 62628 27849 42575]
At class 2.0

randId len: 10000
randID: 
 [79453 85152 53200 ... 82201 74473 74963]
At class 3.0

randId len: 10000
randID: 
 [165710 145783 261071 ... 267115 101971  75097]
At class 4.0

randId len: 10000
randID: 
 [ 16456 121152  13000 ...   6528  90467  14123]
At class 5.0

randId len: 10000
randID: 
 [19383 27193 35746 ... 31590 18394 15774]
At class 6.0

randId len: 10000
randID: 
 [ 48134 132035  66495 ... 127906   4693  18424]
At class 7.0

randId len: 6339
randID: 
 [5687 1300 3902 ... 2451 5600 5827]
At class 8.0

randId len: 10000
randID: 
 [21553 20549  2983 ... 11291 14754 24533]
At class 9.0

randId len: 10000
randID: 
 [33292 85733 31653 ... 18674 86490 65213]
At class 10.0

randId len: 10000
randID: 
 [196542 127757  29997 ... 100554 161225 181280]


In [24]:
test

{'Class': [1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.

In [35]:
df = pd.DataFrame.from_dict(test)

In [36]:
df

Unnamed: 0,Class,Val_0,Val_1,Val_2,Val_3,Val_4,Val_5,Val_6,Val_7,Val_8,...,Val_20,Val_21,Val_22,Val_23,Val_24,Val_25,Val_26,Val_27,Val_28,Val_29
0,1.0,0.168797,0.223737,0.087149,0.021011,0.115075,0.133659,0.131639,0.159620,0.160711,...,0.449798,0.276141,0.345660,0.190155,0.313933,0.176596,0.181569,0.268440,0.075615,0.155677
1,1.0,0.133823,0.161933,0.174773,0.079059,0.171956,0.143667,0.140063,0.146458,0.226241,...,0.473287,0.304922,0.317095,0.331674,0.165397,0.235636,0.151813,0.173856,0.170013,0.140633
2,1.0,0.166877,0.029106,0.124516,0.000821,0.151321,0.142650,0.074401,0.147411,0.191937,...,0.390616,0.340140,0.329386,0.320708,0.174519,0.232855,0.108825,0.159534,0.265159,0.069844
3,1.0,0.095215,0.077284,0.050399,0.131350,0.156786,0.076988,0.101956,0.050834,0.171963,...,0.344522,0.356976,0.276964,0.330270,0.246276,0.202508,0.181816,0.192579,0.136432,0.102436
4,1.0,0.249078,0.186422,0.177304,0.101030,0.150675,0.082389,0.029109,0.053456,0.167437,...,0.484504,0.315664,0.322852,0.287815,0.225601,0.208571,0.269056,0.154683,0.185261,0.081046
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96334,10.0,0.047114,0.102382,0.155984,0.082493,0.012617,-0.032269,-0.052804,0.021222,0.053752,...,0.124320,0.020399,-0.046273,-0.014378,0.003939,-0.003052,0.055073,-0.001878,0.059143,0.023239
96335,10.0,0.102878,0.148602,0.026899,0.110019,0.034143,0.020740,0.115815,0.016327,-0.005146,...,-0.018702,-0.019068,0.007237,-0.056633,0.011864,0.032610,0.062218,-0.014626,0.009784,-0.069769
96336,10.0,0.128785,0.102305,0.038295,0.161823,-0.032684,0.035695,0.069019,0.043571,0.030575,...,0.022052,-0.012730,0.025893,0.022576,0.039961,0.017310,0.030089,0.036177,0.043079,0.035250
96337,10.0,0.016306,0.165943,0.092693,0.031509,0.096346,-0.017510,0.140102,0.040992,-0.004525,...,-0.045600,0.040619,0.110549,0.091878,0.000912,0.003553,-0.020223,0.001978,0.040785,0.058488


In [37]:
df.to_csv?

[1;31mSignature:[0m
[0mdf[0m[1;33m.[0m[0mto_csv[0m[1;33m([0m[1;33m
[0m    [0mpath_or_buf[0m[1;33m:[0m [1;34m'FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0msep[0m[1;33m:[0m [1;34m'str'[0m [1;33m=[0m [1;34m','[0m[1;33m,[0m[1;33m
[0m    [0mna_rep[0m[1;33m:[0m [1;34m'str'[0m [1;33m=[0m [1;34m''[0m[1;33m,[0m[1;33m
[0m    [0mfloat_format[0m[1;33m:[0m [1;34m'str | Callable | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mcolumns[0m[1;33m:[0m [1;34m'Sequence[Hashable] | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mheader[0m[1;33m:[0m [1;34m'bool_t | list[str]'[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mindex[0m[1;33m:[0m [1;34m'bool_t'[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[0m[1;33m
[0m    [0mindex_label[0m[1;33m:[0m [1;34m'IndexLabel | None'[0m [1;33m=[0m [1;32mNone[0m[1;33m,

In [38]:
savecsvfile = r"D:\NewImage\Simulation\TrainingData\test0.csv"
df.to_csv(path_or_buf=savecsvfile,index=False)

In [46]:
from .. import randomforest

ImportError: attempted relative import with no known parent package