In [2]:
import numpy as np
import os
import pandas as pd
import random
from tqdm import tqdm
import xgboost as xgb
import scipy
from sklearn.metrics import fbeta_score
from PIL import Image

In [3]:
# Set the seed 
random_seed = 1
random.seed(random_seed)
np.random.seed(random_seed)

In [22]:
ROOT_PATH = '/Users/syedsabeeth/Downloads/dataset'
TRAIN_PATH = ROOT_PATH + '/' + 'train_file'
TEST_PATH = ROOT_PATH + '/' + 'test_file'

TRAIN_CSV_PATH = ROOT_PATH + '/' + 'train_label.csv'
TEST_CSV_PATH = ROOT_PATH + '/' + 'test_label.csv'
train = pd.read_csv(TRAIN_CSV_PATH)
test = pd.read_csv(TEST_CSV_PATH)

In [27]:
# Function to extract the image features
def extract_features(df, data_path):
    im_features = df.copy()

    N = len(im_features.image_name.values)

    r_mean = np.zeros(N)
    g_mean = np.zeros(N)
    b_mean = np.zeros(N)

    r_std = np.zeros(N)
    g_std = np.zeros(N)
    b_std = np.zeros(N)

    r_max = np.zeros(N)
    g_max = np.zeros(N)
    b_max = np.zeros(N)

    r_min = np.zeros(N)
    g_min = np.zeros(N)
    b_min = np.zeros(N)

    r_kurtosis = np.zeros(N)
    g_kurtosis = np.zeros(N)
    b_kurtosis = np.zeros(N)
    
    r_skewness = np.zeros(N)
    g_skewness = np.zeros(N)
    b_skewness = np.zeros(N)

    for i, image_name in enumerate(tqdm(im_features.image_name.values, miniters=1000)): 
        im = Image.open(data_path + image_name + '.jpg')
        im = np.array(im)[:,:,:3]

        r = im[:,:,0].ravel()
        g = im[:,:,1].ravel()
        b = im[:,:,2].ravel()
        
        r_mean[i] = np.mean(r)
        g_mean[i] = np.mean(g)
        b_mean[i] = np.mean(b)

        r_std[i] = np.std(r)
        g_std[i] = np.std(g)
        b_std[i] = np.std(b)

        r_max[i] = np.max(r)
        g_max[i] = np.max(g)
        b_max[i] = np.max(b)

        r_min[i] = np.min(r)
        g_min[i] = np.min(g)
        b_min[i] = np.min(b)

        r_kurtosis[i] = scipy.stats.kurtosis(r)
        g_kurtosis[i] = scipy.stats.kurtosis(g)
        b_kurtosis[i] = scipy.stats.kurtosis(b)
        
        r_skewness[i] = scipy.stats.skew(r)
        g_skewness[i] = scipy.stats.skew(g)
        b_skewness[i] = scipy.stats.skew(b)


    im_features['r_mean'] = r_mean
    im_features['g_mean'] = g_mean
    im_features['b_mean'] = b_mean

    im_features['rgb_mean_mean'] = (r_mean + g_mean + b_mean)/3.0

    im_features['r_std'] = r_std
    im_features['g_std'] = g_std
    im_features['b_std'] = b_std

    im_features['rgb_mean_std'] = (r_std + g_std + b_std)/3.0

    im_features['r_max'] = r_max
    im_features['g_max'] = g_max
    im_features['b_max'] = b_max

    im_features['rgb_mean_max'] = (r_max + r_max + b_max)/3.0

    im_features['r_min'] = r_min
    im_features['g_min'] = g_min
    im_features['b_min'] = b_min

    im_features['rgb_mean_min'] = (r_min + g_min + b_min)/3.0

    im_features['r_range'] = r_max - r_min
    im_features['g_range'] = g_max - g_min
    im_features['b_range'] = b_max - b_min

    im_features['r_kurtosis'] = r_kurtosis
    im_features['g_kurtosis'] = g_kurtosis
    im_features['b_kurtosis'] = b_kurtosis
    
    im_features['r_skewness'] = r_skewness
    im_features['g_skewness'] = g_skewness
    im_features['b_skewness'] = b_skewness
    
    return im_features

In [29]:
# Extract features
print('Extracting train features')
train_features = extract_features(train,'/Users/syedsabeeth/Downloads/dataset/train_file/')
print('Extracting test features')
test_features = extract_features(test,'/Users/syedsabeeth/Downloads/dataset/test_file/')

  0%|          | 0/32383 [00:00<?, ?it/s]

Extracting train features


100%|██████████| 32383/32383 [03:49<00:00, 141.29it/s]
  0%|          | 0/8096 [00:00<?, ?it/s]

Extracting test features


100%|██████████| 8096/8096 [00:56<00:00, 144.17it/s]


In [31]:
# # pickle the data

# import pickle
# train_feat = open('train_features_xgb.pickle', 'wb')
# pickle.dump(train_features, train_feat, protocol=4)
# train_feat.close()

# test_feat = open('test_features_xgb.pickle', 'wb')
# pickle.dump(test_features, test_feat, protocol=4)
# test_feat.close()

In [32]:
#load the pickled features:
import pickle
pickle_XGtrain = open('train_features_xgb.pickle', 'rb')
train_features = pickle.load(pickle_XGtrain)

pickle_yGtrain = open('test_features_xgb.pickle', 'rb')
test_features = pickle.load(pickle_yGtrain)

In [33]:
# Prepare data
X = np.array(train_features.drop(['image_name', 'tags'], axis=1))
y_train = []

flatten = lambda l: [item for sublist in l for item in sublist]
labels = np.array(list(set(flatten([l.split(' ') for l in train_features['tags'].values]))))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

In [34]:
for tags in tqdm(train.tags.values, miniters=1000):
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    y_train.append(targets)
    
y = np.array(y_train, np.uint8)

100%|██████████| 32383/32383 [00:00<00:00, 424239.66it/s]


In [35]:
print('X.shape = ' + str(X.shape))
print('y.shape = ' + str(y.shape))

X.shape = (32383, 25)
y.shape = (32383, 17)


In [36]:
#for i in range(50):
print(y[4])
#Multiclass indicators in y are present

[1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]


In [37]:
n_classes = y.shape[1]

#Create X train and test dataset

X_test = np.array(test_features.drop(['image_name', 'tags'], axis=1))
X_train = np.array(train_features.drop(['image_name', 'tags'], axis=1))

In [38]:
# Train and predict with one-vs-all strategy
y_pred = np.zeros((X_test.shape[0], n_classes))
train_pred = np.zeros((X_train.shape[0], n_classes))

In [39]:
print('Wait for it ....')
for class_i in tqdm(range(n_classes), miniters=1): 
    model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1, n_estimators=100, \
                              silent=True, objective='binary:logistic', nthread=-1, \
                              gamma=0, min_child_weight=1, max_delta_step=0, \
                              subsample=1, colsample_bytree=1, colsample_bylevel=1, \
                              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, \
                              base_score=0.5, seed=random_seed, missing=None)
    model.fit(X, y[:, class_i])
    y_pred[:, class_i] = model.predict_proba(X_test)[:, 1]
    train_pred[:, class_i] = model.predict_proba(X_train)[:, 1]

  0%|          | 0/17 [00:00<?, ?it/s]

Wait for it ....
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




  6%|▌         | 1/17 [00:05<01:33,  5.87s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 12%|█▏        | 2/17 [00:11<01:26,  5.78s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 18%|█▊        | 3/17 [00:16<01:18,  5.61s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 24%|██▎       | 4/17 [00:21<01:08,  5.23s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 29%|██▉       | 5/17 [00:26<01:02,  5.22s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 35%|███▌      | 6/17 [00:31<00:57,  5.18s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 41%|████      | 7/17 [00:37<00:53,  5.36s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 47%|████▋     | 8/17 [00:42<00:48,  5.36s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 53%|█████▎    | 9/17 [00:47<00:43,  5.42s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 59%|█████▉    | 10/17 [00:52<00:36,  5.23s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 65%|██████▍   | 11/17 [00:58<00:32,  5.37s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 71%|███████   | 12/17 [01:03<00:26,  5.27s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 76%|███████▋  | 13/17 [01:09<00:21,  5.35s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 82%|████████▏ | 14/17 [01:14<00:16,  5.47s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 88%|████████▊ | 15/17 [01:20<00:10,  5.49s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




 94%|█████████▍| 16/17 [01:26<00:05,  5.55s/it]

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




100%|██████████| 17/17 [01:31<00:00,  5.40s/it]


In [40]:
y_pred[10]

array([9.94925261e-01, 4.87024290e-03, 8.64038710e-04, 1.49306710e-04,
       5.40878566e-04, 1.23659600e-04, 8.36878549e-04, 1.12133194e-03,
       2.74723163e-03, 4.20916127e-04, 9.98564780e-01, 4.82067007e-05,
       3.52773437e-04, 7.06552807e-03, 9.99794574e-04, 8.23378470e-03,
       1.74472257e-02])

In [41]:
train_pred.shape

(32383, 17)

In [42]:
xg_results = pd.DataFrame(y_pred, columns = labels)
xg_results[:10]

Unnamed: 0,clear,haze,blooming,conventional_mine,selective_logging,slash_burn,partly_cloudy,bare_ground,cultivation,blow_down,primary,artisinal_mine,cloudy,road,habitation,agriculture,water
0,0.998341,0.000315,0.000329,7.3e-05,6.5e-05,0.000114,0.001345,0.000403,0.001744,0.000142,0.999667,4.9e-05,0.000224,0.005614,0.000494,0.002974,0.008357
1,0.995323,0.001,0.000202,7.6e-05,0.000102,9.1e-05,0.000468,0.000415,0.006018,9.7e-05,0.999574,4.8e-05,0.000248,0.010772,0.000375,0.009449,0.038274
2,0.890882,0.000344,0.006274,0.000398,0.007108,0.017153,0.067683,0.013973,0.365004,0.002031,0.999589,0.00066,0.000235,0.081552,0.033002,0.66851,0.228518
3,0.998472,0.000252,0.00286,6.3e-05,0.000194,0.000109,0.000546,0.000307,0.001513,0.000221,0.999427,5.4e-05,0.000169,0.005056,0.000577,0.002784,0.007021
4,0.978246,0.002394,0.000507,0.006535,0.000713,0.001995,0.013906,0.083064,0.085349,0.000224,0.992847,0.000495,0.000484,0.541024,0.057571,0.777353,0.512305
5,0.996188,0.000334,0.024992,6.3e-05,0.00068,0.000145,0.003682,0.000572,0.003001,0.001948,0.999485,5.4e-05,0.000198,0.008104,0.00182,0.008736,0.025754
6,0.957331,0.000458,0.000992,0.00517,0.026554,0.00769,0.023057,0.027906,0.307997,0.020988,0.998927,0.002662,0.000125,0.622697,0.92738,0.561388,0.400289
7,0.601991,0.041488,0.00022,0.00245,0.001949,0.001462,0.439706,0.092196,0.116436,0.000611,0.924812,0.001123,0.002551,0.734563,0.306583,0.827225,0.206105
8,0.875645,0.018891,0.007095,0.000909,0.007357,0.04182,0.097733,0.045964,0.374879,0.005474,0.997043,6.4e-05,0.000193,0.284766,0.182164,0.751313,0.185466
9,0.986003,0.001522,0.029218,0.001487,0.01653,0.006204,0.01339,0.0046,0.401443,0.000711,0.999674,0.000119,0.000165,0.10145,0.001765,0.245209,0.426771


In [43]:
xg_train_res = pd.DataFrame(train_pred, columns = labels)
xg_train_res[:10]

Unnamed: 0,clear,haze,blooming,conventional_mine,selective_logging,slash_burn,partly_cloudy,bare_ground,cultivation,blow_down,primary,artisinal_mine,cloudy,road,habitation,agriculture,water
0,0.243691,0.537098,0.000263,0.000102,0.004613,0.000354,0.110202,0.002239,0.159694,0.000582,0.980574,8.1e-05,0.01066,0.243954,0.156625,0.56863,0.134333
1,0.996312,0.00044,0.000867,6.4e-05,0.000405,0.000232,0.000415,0.000351,0.002222,0.000179,0.999122,5.4e-05,0.001018,0.003406,0.000348,0.003109,0.00614
2,0.983728,0.000384,0.022148,0.000225,0.084303,0.019704,0.01598,0.021317,0.172788,0.026234,0.999617,6.9e-05,0.000185,0.285623,0.065269,0.491435,0.192996
3,0.997905,0.000233,0.001709,7.6e-05,0.000104,0.000109,0.000568,0.000342,0.003633,0.001881,0.999667,4.6e-05,0.000151,0.005932,0.002179,0.009913,0.02575
4,0.998829,0.000351,0.001119,6.3e-05,0.000194,0.000296,0.001292,0.000979,0.005659,0.000448,0.999393,5.4e-05,0.000174,0.004608,0.000525,0.00567,0.028705
5,0.064405,0.000871,0.011737,0.00027,0.028095,0.001856,0.927209,0.00565,0.046233,0.001139,0.99954,0.00012,0.000299,0.095922,0.006057,0.043219,0.109918
6,0.734244,0.002392,0.008751,0.000172,0.029138,0.003507,0.207602,0.009302,0.182307,0.00454,0.997012,6.1e-05,0.000181,0.144552,0.034107,0.236053,0.135172
7,0.998593,0.000232,0.001356,6.3e-05,0.000151,0.000154,0.000417,0.000353,0.001595,0.000239,0.999478,5.4e-05,0.000225,0.003839,0.000541,0.003089,0.010229
8,0.946485,0.032685,0.000845,8.6e-05,0.000773,0.000116,0.000235,0.00057,0.001973,0.000218,0.996651,4.8e-05,0.002013,0.004193,0.00063,0.005117,0.024188
9,0.011373,0.003451,9e-05,0.004859,0.000119,0.000536,0.224775,0.012778,0.038144,9.4e-05,0.398304,0.003852,0.360448,0.021172,0.010437,0.169243,0.519069


In [44]:
xg_results.to_csv('result_xgb_0.3.csv', index=False)

In [45]:
#Pickle the result
# import pickle
res = open('result_xgb_0.5.pickle', 'wb')
pickle.dump(xg_results, res, protocol=4)
res.close()

In [46]:
#Check predicted rare labels
print("Blow downs: ",len(xg_results[xg_results['blow_down']>0.2]))
print("conventional_mine: ",len(xg_results[xg_results['conventional_mine']>0.2]))
print("selective_logging: ",len(xg_results[xg_results['selective_logging']>0.2]))
print("slash_burn: ",len(xg_results[xg_results['slash_burn']>0.2]))
print("Cultivation: ",len(xg_results[xg_results['cultivation']>0.2]))

Blow downs:  2
conventional_mine:  7
selective_logging:  18
slash_burn:  1
Cultivation:  1529


In [47]:
#create prediction flag
preds = [' '.join(labels[y_pred_row > 0.2]) for y_pred_row in y_pred]

In [48]:
#Create a  dataframe wiht test images and labels 
subm = pd.DataFrame()
subm['image_name'] = test_features.image_name.values
subm['tags'] = preds
subm[:10]

Unnamed: 0,image_name,tags
0,test_32752.jpg,clear primary
1,test_7767.jpg,clear primary
2,test_4254.jpg,clear cultivation primary agriculture water
3,test_23119.jpg,clear primary
4,test_22101.jpg,clear primary road agriculture water
5,test_26818.jpg,clear primary
6,test_25502.jpg,clear cultivation primary road habitation agri...
7,test_35301.jpg,clear partly_cloudy primary road habitation ag...
8,test_29796.jpg,clear cultivation primary road agriculture
9,test_36349.jpg,clear cultivation primary agriculture water


In [49]:
#Create flags for prediction flags
train_preds = [' '.join(labels[y_pred_row > 0.2]) for y_pred_row in train_pred]

In [50]:
#Create a  dataframe wiht train images and labels 
trainfrm = pd.DataFrame()
trainfrm['image_name'] = train_features.image_name.values
trainfrm['pred_tags'] = train_preds
trainfrm[:10]

Unnamed: 0,image_name,pred_tags
0,train_3577.jpg,clear haze primary road agriculture
1,train_10327.jpg,clear primary
2,train_1243.jpg,clear primary road agriculture
3,train_17066.jpg,clear primary
4,train_15959.jpg,clear primary
5,train_32931.jpg,partly_cloudy primary
6,train_4151.jpg,clear partly_cloudy primary agriculture
7,train_37844.jpg,clear primary
8,train_28283.jpg,clear primary
9,train_35805.jpg,partly_cloudy primary cloudy water


In [52]:
df_train= train
df_train[:10]

Unnamed: 0,image_name,tags
0,train_3577.jpg,haze primary
1,train_10327.jpg,clear primary
2,train_1243.jpg,clear primary water
3,train_17066.jpg,clear primary
4,train_15959.jpg,clear primary
5,train_32931.jpg,partly_cloudy primary
6,train_4151.jpg,partly_cloudy primary
7,train_37844.jpg,clear primary
8,train_28283.jpg,clear primary
9,train_35805.jpg,agriculture partly_cloudy primary water


In [53]:
# Join to train_df to get the original 
df_train=df_train.merge(trainfrm,on='image_name')
df_train[:5]

Unnamed: 0,image_name,tags,pred_tags
0,train_3577.jpg,haze primary,clear haze primary road agriculture
1,train_10327.jpg,clear primary,clear primary
2,train_1243.jpg,clear primary water,clear primary road agriculture
3,train_17066.jpg,clear primary,clear primary
4,train_15959.jpg,clear primary,clear primary


In [54]:
#Create one hot encoding to compare the predicted and original labels 
df_train['tags1'] = df_train['tags'].apply(lambda x: x.split(' '))
df_train['pred_tags1'] = df_train['pred_tags'].apply(lambda x: x.split(' '))

In [55]:
df_train_actual=df_train[['image_name','tags1']]
df_train_actual[:2]


Unnamed: 0,image_name,tags1
0,train_3577.jpg,"[haze, primary]"
1,train_10327.jpg,"[clear, primary]"


In [56]:
from sklearn.preprocessing import MultiLabelBinarizer
xgbst = MultiLabelBinarizer()

actual_results = pd.DataFrame(xgbst.fit_transform(df_train_actual['tags1']),
                   columns=xgbst.classes_,
                   index=df_train_actual.index)

actual_results

Unnamed: 0,agriculture,artisinal_mine,bare_ground,blooming,blow_down,clear,cloudy,conventional_mine,cultivation,habitation,haze,partly_cloudy,primary,road,selective_logging,slash_burn,water
0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1
3,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32378,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
32379,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
32380,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0
32381,1,0,0,0,0,1,0,0,1,1,0,0,1,1,1,0,0


In [57]:
df_train_pred=df_train[['image_name','pred_tags1']]
df_train_pred[:2]

Unnamed: 0,image_name,pred_tags1
0,train_3577.jpg,"[clear, haze, primary, road, agriculture]"
1,train_10327.jpg,"[clear, primary]"


In [58]:
from sklearn.preprocessing import MultiLabelBinarizer
xgbst = MultiLabelBinarizer()

pred_results = pd.DataFrame(xgbst.fit_transform(df_train_pred['pred_tags1']),
                   columns=xgbst.classes_,
                   index=df_train_pred.index)

pred_results

Unnamed: 0,agriculture,artisinal_mine,bare_ground,blooming,blow_down,clear,cloudy,conventional_mine,cultivation,habitation,haze,partly_cloudy,primary,road,selective_logging,slash_burn,water
0,1,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,0
1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
2,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0
3,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32378,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1
32379,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
32380,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0
32381,1,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,1


In [59]:
#get precision -recall report
from sklearn.metrics import classification_report
import numpy as np
print(classification_report(actual_results, pred_results, target_names=labels))

                   precision    recall  f1-score   support

            clear       0.62      0.96      0.76      9855
             haze       0.77      0.90      0.83       278
         blooming       0.66      0.44      0.53       694
conventional_mine       0.73      0.30      0.42       259
selective_logging       1.00      0.43      0.60        72
       slash_burn       0.89      0.99      0.94     22647
    partly_cloudy       0.74      0.96      0.84      1708
      bare_ground       0.97      0.77      0.86        77
      cultivation       0.41      0.70      0.52      3550
        blow_down       0.55      0.83      0.66      2925
          primary       0.60      0.88      0.71      2185
   artisinal_mine       0.76      0.96      0.85      5842
           cloudy       0.96      1.00      0.98     29979
             road       0.54      0.91      0.68      6452
       habitation       0.80      0.25      0.38       270
      agriculture       1.00      0.19      0.31       