In [4]:
from scipy.stats import expon
from sklearn import svm
from sklearn.model_selection import RandomizedSearchCV
import pickle
import numpy as np
import geopandas as gpd
from geopandas import GeoDataFrame
import matplotlib
from matplotlib import pyplot

In [5]:
def train(X, Y):
    """
    Train classification algorithm.
    
    Train the Support Vector Machine classification algorithm using the
    specified fields. 

    Parameters
    ----------
    X: numpy 2D array
        A 2D numpy array where there is one row for each segment and each
        column represents an attribute of the segments. 

    Y: numpy 1D array
        A 1D numpy array equal in length to the number of records in segments.
        The single column contains actual class values for each of the
        segments.

    output_filename: string
        Output filename of the pickled trained SVM model.

    Returns
    -------
    model: svm.SVC
        Returns a trained SVM model that can be used to classify other data.

    """
    clf = svm.SVC()
        
    # specify parameters and distributions to sample from
    param_dist = {'C': expon(scale=100),
                  'gamma': expon(scale=.1),
                  'kernel': ['rbf'],
                  'class_weight':['balanced', None]}

    # run randomized search
    n_iter_search = 20
    random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search)

    random_search.fit(X, Y) # this may take time...
    
    return random_search

In [6]:
def predict(model, X):
    """
    Classify segments using a trained SVM model

    Classify image segments using the trained Support Vector Machine model. 

    Parameters
    ----------
     model: svm.SVC
        A trained SVM model that can be used to classify other data.

    X: numpy 2D array
        A 2D numpy array where there is one row for each segment and each
        column represents an attribute of the segments. Identical to segments
        from the train_classifier function.
    """
    predictions = model.predict(X)

    return predictions

In [7]:
for_training = gpd.read_file("part_class.shp")

In [8]:
for_training.head

<bound method NDFrame.head of      red_min  red_max    red_mean    red_std  green_min  green_max  \
0       92.0    251.0  204.971429  33.633729      114.0      251.0   
1       47.0     97.0   74.396985   5.126689       47.0      104.0   
2       23.0    170.0   69.481375  14.631591       13.0      200.0   
3       65.0     99.0   87.172239   5.063199       68.0       91.0   
4       43.0    150.0   86.477513  10.580502       40.0      168.0   
..       ...      ...         ...        ...        ...        ...   
476     60.0    108.0   75.977507   4.965671       63.0      119.0   
477     53.0    132.0   96.391158  11.817173       53.0      137.0   
478     30.0    232.0   96.959867  17.265619       33.0      243.0   
479     69.0    242.0   86.586800  13.041254       79.0      245.0   
480     61.0    181.0   81.848841  10.411873       68.0      200.0   

     green_mean  green_std  blue_min  blue_max  ...  sobel_max  sobel_mean  \
0    221.867532  27.235874     120.0     251.0  ...

In [9]:
big_train = for_training[for_training["Class"]!=0]

In [10]:
big_train.head

<bound method NDFrame.head of      red_min  red_max    red_mean    red_std  green_min  green_max  \
0       92.0    251.0  204.971429  33.633729      114.0      251.0   
1       47.0     97.0   74.396985   5.126689       47.0      104.0   
2       23.0    170.0   69.481375  14.631591       13.0      200.0   
3       65.0     99.0   87.172239   5.063199       68.0       91.0   
4       43.0    150.0   86.477513  10.580502       40.0      168.0   
..       ...      ...         ...        ...        ...        ...   
473     25.0    247.0   99.803759  33.991699       24.0      245.0   
476     60.0    108.0   75.977507   4.965671       63.0      119.0   
477     53.0    132.0   96.391158  11.817173       53.0      137.0   
478     30.0    232.0   96.959867  17.265619       33.0      243.0   
479     69.0    242.0   86.586800  13.041254       79.0      245.0   

     green_mean  green_std  blue_min  blue_max  ...  sobel_max  sobel_mean  \
0    221.867532  27.235874     120.0     251.0  ...

In [11]:
big_train.columns.values

array(['red_min', 'red_max', 'red_mean', 'red_std', 'green_min',
       'green_max', 'green_mean', 'green_std', 'blue_min', 'blue_max',
       'blue_mean', 'blue_std', 'area', 'perimeter', 'eccentrici',
       'equivalent', 'major_axis', 'minor_axis', 'orientatio',
       'sobel_min', 'sobel_max', 'sobel_mean', 'sobel_std', 'dn',
       'geom_Lengt', 'geom_Area', 'Class', 'Shape_Leng', 'Shape_Area',
       'geometry'], dtype=object)

In [12]:
labels = big_train['Class']

In [13]:
labels

0      1
1      2
2      4
3      4
4      4
      ..
473    4
476    4
477    4
478    4
479    4
Name: Class, Length: 251, dtype: int64

In [14]:
classes = big_train[['red_mean', 'green_mean', "blue_mean", "eccentrici", "orientatio", "sobel_max"]]

In [15]:
classes

Unnamed: 0,red_mean,green_mean,blue_mean,eccentrici,orientatio,sobel_max
0,204.971429,221.867532,223.831169,0.499759,0.363929,0.443004
1,74.396985,62.626466,55.075377,0.650904,-0.044261,0.108257
2,69.481375,64.138968,59.545845,0.827139,0.790657,0.391178
3,87.172239,82.045593,73.581560,0.642298,-0.698151,0.061959
4,86.477513,88.236332,82.322751,0.914234,0.052566,0.300343
5,60.641917,71.370301,67.048872,0.639717,0.704793,0.065098
6,130.298586,137.189909,130.415026,0.987925,0.015316,0.322621
7,144.947399,109.625950,91.652835,0.803932,0.416924,0.382887
9,52.362963,61.870707,58.649832,0.816491,-0.581560,0.341381
10,70.649742,78.104696,74.564592,0.968913,-0.147797,0.232171


In [16]:
type(classes)

pandas.core.frame.DataFrame

In [17]:
type(labels)

pandas.core.series.Series

In [18]:
model = train(classes, labels)



In [20]:
to_predict = for_training[['red_mean', 'green_mean', "blue_mean", "eccentrici", "orientatio", "sobel_max"]]

In [21]:
output = predict(model, to_predict.values)

In [22]:
output

array([1, 2, 4, 4, 4, 4, 4, 2, 1, 4, 4, 1, 1, 4, 2, 4, 1, 5, 4, 4, 4, 1,
       3, 2, 4, 2, 5, 1, 1, 4, 1, 4, 2, 1, 1, 1, 1, 6, 1, 1, 4, 4, 5, 1,
       1, 2, 2, 4, 4, 1, 1, 1, 5, 1, 1, 2, 2, 1, 5, 1, 1, 1, 4, 1, 1, 1,
       1, 1, 1, 2, 1, 6, 2, 2, 2, 1, 4, 4, 4, 1, 1, 6, 1, 1, 1, 1, 1, 5,
       2, 3, 4, 1, 1, 2, 4, 4, 1, 2, 4, 4, 1, 1, 4, 2, 4, 1, 2, 4, 2, 1,
       1, 2, 4, 6, 1, 4, 4, 2, 4, 1, 6, 1, 2, 2, 2, 4, 6, 1, 1, 1, 5, 2,
       6, 5, 6, 6, 1, 2, 4, 1, 1, 6, 1, 6, 2, 2, 1, 5, 1, 1, 4, 2, 2, 4,
       6, 1, 1, 1, 4, 5, 2, 4, 1, 1, 4, 4, 5, 4, 2, 2, 4, 1, 2, 1, 6, 4,
       2, 1, 6, 6, 6, 4, 2, 4, 6, 6, 1, 1, 4, 2, 1, 3, 3, 2, 6, 6, 5, 3,
       4, 6, 1, 2, 4, 6, 6, 5, 6, 3, 4, 4, 1, 6, 2, 2, 3, 4, 2, 2, 3, 6,
       6, 3, 2, 4, 6, 3, 2, 4, 4, 3, 3, 2, 3, 3, 3, 2, 2, 3, 4, 2, 6, 4,
       1, 5, 3, 2, 4, 6, 2, 4, 4, 6, 5, 6, 2, 1, 3, 6, 3, 4, 2, 1, 3, 4,
       2, 5, 3, 2, 2, 4, 2, 2, 6, 1, 6, 4, 6, 4, 3, 2, 6, 4, 6, 1, 4, 3,
       6, 1, 2, 1, 2, 3, 4, 3, 2, 3, 1, 5, 4, 2, 3,

In [23]:
for_training['classified'] = output

In [25]:
for_training

Unnamed: 0,red_min,red_max,red_mean,red_std,green_min,green_max,green_mean,green_std,blue_min,blue_max,...,sobel_mean,sobel_std,dn,geom_Lengt,geom_Area,Class,Shape_Leng,Shape_Area,geometry,classified
0,92.0,251.0,204.971429,33.633729,114.0,251.0,221.867532,27.235874,120.0,251.0,...,0.131967,0.121452,12,4.7,0.0,1,4.7,0.9625,"POLYGON ((367418.9500000002 5807250.949999999,...",1
1,47.0,97.0,74.396985,5.126689,47.0,104.0,62.626466,6.496449,41.0,105.0,...,0.014563,0.016402,15,8.5,0.0,2,8.5,1.4925,"POLYGON ((367426.2000000002 5807250.949999999,...",2
2,23.0,170.0,69.481375,14.631591,13.0,200.0,64.138968,17.686036,7.0,210.0,...,0.032773,0.061715,7,9.5,0.0,4,9.5,1.7450,"POLYGON ((367389.2999999998 5807250.949999999,...",4
3,65.0,99.0,87.172239,5.063199,68.0,91.0,82.045593,3.788607,59.0,88.0,...,0.008431,0.008344,8,11.6,0.0,4,11.6,2.4675,"POLYGON ((367390.25 5807250.949999999, 367390....",4
4,43.0,150.0,86.477513,10.580502,40.0,168.0,88.236332,18.912979,38.0,179.0,...,0.022699,0.033605,9,15.3,0.0,4,15.3,5.6700,"POLYGON ((367393.8499999996 5807250.949999999,...",4
5,50.0,82.0,60.641917,4.451131,55.0,95.0,71.370301,5.198693,50.0,95.0,...,0.014678,0.010548,19,9.7,0.0,4,9.7,2.6600,"POLYGON ((367433.25 5807250.949999999, 367433....",4
6,34.0,238.0,130.298586,52.064209,29.0,243.0,137.189909,60.096381,25.0,239.0,...,0.027450,0.040146,2,42.2,0.0,4,42.2,9.0175,"POLYGON ((367388.2999999998 5807251, 367400.75...",4
7,57.0,176.0,144.947399,15.064158,51.0,174.0,109.625950,10.968384,41.0,164.0,...,0.034046,0.039700,14,15.9,0.0,2,15.9,4.2775,"POLYGON ((367421.7000000002 5807250.949999999,...",2
8,30.0,126.0,101.099068,10.497784,34.0,156.0,133.145688,13.776301,28.0,166.0,...,0.030819,0.037488,24,7.8,0.0,0,7.8,2.1450,"POLYGON ((367406.7000000002 5807250.550000001,...",1
9,9.0,141.0,52.362963,10.574200,8.0,173.0,61.870707,13.253477,7.0,176.0,...,0.016130,0.026918,20,14.6,0.0,4,14.6,3.7125,"POLYGON ((367435.2000000002 5807250.949999999,...",4


In [28]:
for_training.to_file("output.gpkg", layer = "predictions", driver = "GPKG")