In [1]:
from scipy.stats import expon
from sklearn import svm
from sklearn.model_selection import RandomizedSearchCV
import pickle
import numpy as np
import geopandas as gpd
from geopandas import GeoDataFrame
import matplotlib
from matplotlib import pyplot

In [2]:
def train(X, Y):
    """
    Train classification algorithm.
    
    Train the Support Vector Machine classification algorithm using the
    specified fields. 

    Parameters
    ----------
    X: numpy 2D array
        A 2D numpy array where there is one row for each segment and each
        column represents an attribute of the segments. 

    Y: numpy 1D array
        A 1D numpy array equal in length to the number of records in segments.
        The single column contains actual class values for each of the
        segments.

    output_filename: string
        Output filename of the pickled trained SVM model.

    Returns
    -------
    model: svm.SVC
        Returns a trained SVM model that can be used to classify other data.

    """
    clf = svm.SVC()
        
    # specify parameters and distributions to sample from
    param_dist = {'C': expon(scale=100),
                  'gamma': expon(scale=.1),
                  'kernel': ['rbf'],
                  'class_weight':['balanced', None]}

    # run randomized search
    n_iter_search = 20
    random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search)

    random_search.fit(X, Y) # this may take time...
    
    return random_search

In [3]:
def predict(model, X):
    """
    Classify segments using a trained SVM model

    Classify image segments using the trained Support Vector Machine model. 

    Parameters
    ----------
     model: svm.SVC
        A trained SVM model that can be used to classify other data.

    X: numpy 2D array
        A 2D numpy array where there is one row for each segment and each
        column represents an attribute of the segments. Identical to segments
        from the train_classifier function.
    """
    predictions = model.predict(X)

    return predictions

In [26]:
for_training = gpd.read_file("ClassifiedChevelon.gpkg")

In [27]:
for_training.head

<bound method NDFrame.head of        blue_min  blue_max    blue_mean    blue_std  green_min  green_max  \
0         959.0     959.0   959.000000    0.000000     1227.0     1227.0   
1        1094.0    1094.0  1094.000000    0.000000     1393.0     1393.0   
2         866.0     866.0   866.000000    0.000000     1093.0     1093.0   
3        1162.0    1162.0  1162.000000    0.000000     1459.0     1459.0   
4         929.0    1169.0  1028.333333   94.270297     1206.0     1444.0   
...         ...       ...          ...         ...        ...        ...   
36799    1117.0    1419.0  1245.955056   68.133529     1395.0     1705.0   
36800       0.0    1274.0   373.324461  455.137399        0.0     1495.0   
36801       0.0    1552.0     1.712042   40.834082        0.0     1839.0   
36802       0.0    1465.0  1137.597938  487.652628        0.0     1717.0   
36803    1215.0    1406.0  1311.653061   40.608535     1502.0     1633.0   

        green_mean   green_std  red_min  red_max  ...  mi

In [28]:
big_train = for_training[~np.isnan(for_training["Class"])]

In [29]:
# big_train = for_training[for_training["Class"]!=np.isnan]

In [30]:
big_train.head

<bound method NDFrame.head of        blue_min  blue_max    blue_mean    blue_std  green_min  green_max  \
0         959.0     959.0   959.000000    0.000000     1227.0     1227.0   
2         866.0     866.0   866.000000    0.000000     1093.0     1093.0   
4         929.0    1169.0  1028.333333   94.270297     1206.0     1444.0   
6         963.0     963.0   963.000000    0.000000     1213.0     1213.0   
9         911.0     911.0   911.000000    0.000000     1190.0     1190.0   
...         ...       ...          ...         ...        ...        ...   
31776     829.0     920.0   861.133333   23.996852     1041.0     1113.0   
32382     865.0     892.0   878.500000   13.500000     1133.0     1143.0   
32872     608.0    1023.0   797.985714   72.170597      817.0     1204.0   
35294     757.0    1241.0  1038.830986  114.101510      989.0     1498.0   
36687     417.0     472.0   448.333333   23.098822      606.0      658.0   

        green_mean   green_std  red_min  red_max  ...  mi

In [31]:
big_train.columns.values

array(['blue_min', 'blue_max', 'blue_mean', 'blue_std', 'green_min',
       'green_max', 'green_mean', 'green_std', 'red_min', 'red_max',
       'red_mean', 'red_std', 'nir_min', 'nir_max', 'nir_mean', 'nir_std',
       'area', 'perimeter', 'eccentricity', 'equivalent_diameter',
       'major_axis_length', 'minor_axis_length', 'orientation',
       'sobel_min', 'sobel_max', 'sobel_mean', 'sobel_std', 'dn', 'Class',
       'ndvi', 'geometry'], dtype=object)

In [32]:
labels = big_train['Class']

In [33]:
labels

0        2.0
2        3.0
4        3.0
6        2.0
9        2.0
        ... 
31776    2.0
32382    2.0
32872    2.0
35294    3.0
36687    1.0
Name: Class, Length: 173, dtype: float64

In [34]:
classes = big_train[['red_mean', 'green_mean', "blue_mean", "nir_mean", "eccentricity", "orientation", "sobel_max"]]

In [35]:
classes

Unnamed: 0,red_mean,green_mean,blue_mean,nir_mean,eccentricity,orientation,sobel_max
0,1545.000000,1227.000000,959.000000,2561.000000,0.789692,1.197769,0.000000
2,1415.000000,1093.000000,866.000000,2318.000000,0.000000,0.785398,0.000000
4,1664.000000,1300.666667,1028.333333,2650.833333,0.925104,-0.673043,0.000000
6,1550.000000,1213.000000,963.000000,2562.000000,0.925104,-0.673043,0.000000
9,1511.000000,1190.000000,911.000000,2478.000000,1.000000,-0.785398,0.000000
11,1221.767857,1003.910714,756.107143,2143.625000,0.956790,0.914942,0.000000
13,1647.000000,1309.000000,1010.000000,2527.000000,0.489054,-1.386899,0.000000
15,1679.500000,1320.500000,1023.000000,2623.500000,1.000000,0.000000,0.000000
18,1506.152174,1196.369565,945.652174,2461.086957,0.875389,0.301837,0.000000
23,1661.150000,1324.375000,1038.100000,2605.425000,0.902664,0.919684,0.000000


In [36]:
type(classes)

pandas.core.frame.DataFrame

In [37]:
type(labels)

pandas.core.series.Series

In [38]:
labels

0        2.0
2        3.0
4        3.0
6        2.0
9        2.0
        ... 
31776    2.0
32382    2.0
32872    2.0
35294    3.0
36687    1.0
Name: Class, Length: 173, dtype: float64

In [39]:
model = train(classes, labels)



In [40]:
to_predict = for_training[['red_mean', 'green_mean', "blue_mean", "nir_mean", "eccentricity", "orientation", "sobel_max"]]

In [41]:
output = predict(model, to_predict.values)

In [42]:
output

array([2., 2., 3., ..., 3., 2., 3.])

In [43]:
for_training['classified'] = output

In [44]:
for_training

Unnamed: 0,blue_min,blue_max,blue_mean,blue_std,green_min,green_max,green_mean,green_std,red_min,red_max,...,orientation,sobel_min,sobel_max,sobel_mean,sobel_std,dn,Class,ndvi,geometry,classified
0,959.0,959.0,959.000000,0.000000,1227.0,1227.0,1227.000000,0.000000,1545.0,1545.0,...,1.197769,0.000000,0.000000,0.000000,0.000000e+00,8,2.0,0.247443,"POLYGON ((519804 3830847, 519804 3830844, 5198...",2.0
1,1094.0,1094.0,1094.000000,0.000000,1393.0,1393.0,1393.000000,0.000000,1786.0,1786.0,...,-0.785398,0.000000,0.000000,0.000000,0.000000e+00,5,,0.201431,"POLYGON ((519819 3830844, 519819 3830841, 5198...",2.0
2,866.0,866.0,866.000000,0.000000,1093.0,1093.0,1093.000000,0.000000,1415.0,1415.0,...,0.785398,0.000000,0.000000,0.000000,0.000000e+00,4,3.0,0.241897,"POLYGON ((519783 3830841, 519783 3830838, 5197...",3.0
3,1162.0,1162.0,1162.000000,0.000000,1459.0,1459.0,1459.000000,0.000000,1860.0,1860.0,...,-0.785398,0.000000,0.000000,0.000000,0.000000e+00,5,,0.199139,"POLYGON ((519822 3830841, 519822 3830838, 5198...",2.0
4,929.0,1169.0,1028.333333,94.270297,1206.0,1444.0,1300.666667,89.240624,1541.0,1832.0,...,-0.673043,0.000000,0.000000,0.000000,0.000000e+00,7,3.0,0.228707,"POLYGON ((519825 3830838, 519825 3830829, 5198...",3.0
5,763.0,903.0,840.333333,35.166515,1032.0,1131.0,1094.179487,23.650090,1298.0,1416.0,...,-0.419124,0.000000,0.000000,0.000000,0.000000e+00,2,,0.256126,"POLYGON ((519774 3830844, 519774 3830838, 5197...",3.0
6,963.0,963.0,963.000000,0.000000,1213.0,1213.0,1213.000000,0.000000,1550.0,1550.0,...,-0.673043,0.000000,0.000000,0.000000,0.000000e+00,7,2.0,0.246109,"POLYGON ((519834 3830829, 519834 3830826, 5198...",2.0
7,793.0,976.0,893.627451,44.732302,1059.0,1227.0,1157.725490,35.170218,1363.0,1544.0,...,0.333665,0.000000,0.000000,0.000000,0.000000e+00,3,,0.239773,"POLYGON ((519789 3830844, 519789 3830838, 5197...",2.0
8,830.0,980.0,905.615385,46.906681,1092.0,1268.0,1166.076923,54.644954,1397.0,1574.0,...,-0.793528,0.000000,0.000000,0.000000,0.000000e+00,9,,0.255844,"POLYGON ((519828 3830826, 519828 3830820, 5198...",2.0
9,911.0,911.0,911.000000,0.000000,1190.0,1190.0,1190.000000,0.000000,1511.0,1511.0,...,-0.785398,0.000000,0.000000,0.000000,0.000000e+00,12,2.0,0.242417,"POLYGON ((519837 3830823, 519837 3830820, 5198...",2.0


In [45]:
for_training.to_file("chev2_output.gpkg", layer = "predictions", driver = "GPKG")