# Train/Test

Perform simple experiments training a naive and an XgBoost classifiers to classify Urban Atlas land-use categories from Foursquare POIs.

### Setup

In [1]:
%matplotlib inline

import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, Polygon
import os
import re
import zipfile,fnmatch
import matplotlib.pyplot as plt
import pathlib
import sys

import xgboost
from sklearn.svm import SVR

from sklearn.model_selection import train_test_split
from sklearn.model_selection import learning_curve
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import learning_curve
from sklearn.metrics import mean_squared_error
from sklearn.svm import LinearSVR
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import confusion_matrix
import sklearn.metrics as metrics


In [1]:
# SET CITY NAME
CITIES= ["barcelona", "london","paris","milan","amsterdam","lisbon"]
CITY_NAME= CITIES[0]

In [2]:
tessellation_sizes = [50,100,200,250]
SIZE = 200

In [4]:
# Base directory
BASE_DIR = os.path.abspath(".")
# base directory for data files
BASE_DIR_DATA = os.path.join(BASE_DIR, "data")
# Base dir for a city data
BASE_DIR_CITY = os.path.join(BASE_DIR_DATA , CITY_NAME)
# Base dir for the city tesselation/grid
BASE_DIR_TESSELLATION = os.path.join(BASE_DIR_CITY, "tessellation")
# Empty tesselation/grid 
TESSELLATION = os.path.join(BASE_DIR_TESSELLATION, "tessellation_"+CITY_NAME +"-Square-"+ str(SIZE) +".geojson")
# Base dir landuse from Urban Atlas
BASE_DIR_LANDUSE = os.path.join(BASE_DIR_CITY, 'landuse')
# Base dir landuse FourSquare
BASE_DIR_FOURSQUARE = os.path.join(BASE_DIR_CITY, 'foursquare_raw')
# City shape fron Nominatim API
OUTPUT_CITY_SHAPE = os.path.join(BASE_DIR_CITY,'osm_raw', CITY_NAME + ".geojson")
#  POIs in Polygon(city shape), output of the clipping operation
OUTPUT_POIS = os.path.join(BASE_DIR_CITY, CITY_NAME +"_POIs.csv")
# Foursquare raw data (formerly known as FOURSQUARE_GRID)
FOURSQUARE_RAW_DATA = os.path.join(BASE_DIR_FOURSQUARE, CITY_NAME +"_poi.csv")
# Foursquare mapped to the grid/tessellation
FOURSQUARE_TESSELLATION = os.path.join(BASE_DIR_CITY, 'mapped', CITY_NAME +"_fs_tessellation_"+ str(SIZE) +".csv")
# Count of Foursquare features per each cell in the tessellation
FOURSQUARE_COUNT = os.path.join(BASE_DIR_CITY, 'count', CITY_NAME +"_fs_count_"+ str(SIZE) +".csv")
# Count of Urban Atlas features per each cell in the tessellation
UA_COUNT = os.path.join(BASE_DIR_CITY, 'count', CITY_NAME +"_ua_count_"+ str(SIZE) +".csv")


### Merge  Urban Atlas + Foursquare POI datasets

Merging **FourSquare POI** derived from count stage onto **UA landuse** (both mapped with `poi_mapping.py`). Then divide them in Train/Test.

In [19]:
def mergeUA_POI(CITY_NAME, SIZE):
    
    # input Foursquare data mapped to grid
    FOURSQUARE_COUNT = os.path.join(BASE_DIR_CITY, 'count', CITY_NAME +"_fs_count_"+ SIZE +".csv")
    # input UA data mapped to grid
    UA_COUNT = os.path.join(BASE_DIR_CITY, 'count', CITY_NAME +"_ua_count_"+ SIZE +".csv")

    # load Foursquare Count
    foursquare_tessellation = pd.read_csv(FOURSQUARE_COUNT)
    foursquare_tessellation.columns = list(map(lambda x : 'f_fs_'+x if x != "cellID" else x, foursquare_tessellation.columns))
    # foursquare_tessellation['f_fs_Highway'] = osm_tessellation['f_osm_Highway']
    foursquare_tessellation.head(2)

    # load UA mapped onto grid
    ua_tessellation = pd.read_csv(UA_COUNT)
    ua_tessellation.columns = list(map(lambda x : 't_'+x if x != "cellID" else x, ua_tessellation.columns))


    # select only relevant columns
    ua_tessellation_target = ua_tessellation.loc[:,['cellID','t_predominant']]
    
    # Merge the UA and Foursquare dataframes
    df_ua_fs = ua_tessellation_target.merge(foursquare_tessellation, on="cellID", how='left')
#     print('SPEREM----------------------------------------------------------')
#     print(df_ua_fs[df_ua_fs['cellID']==39269])

    return df_ua_fs

###  Divide Train/Test

Split the dataset in the usual percentages of 80-20

In [20]:
def split_train_test(df_ua_fs, CITY_NAME, SIZE):

    # output UA data mapped to grid
    OUTPUT_TRAIN,OUTPUT_TEST = [os.path.join(BASE_DIR_CITY, STEP, CITY_NAME +"_fs_"+ SIZE +".csv") for STEP in ["train","test"]]
    OUTPUT_TRAIN_SCALED = '_scaled_.'.join(OUTPUT_TRAIN.split('.'))
    OUTPUT_TEST_SCALED = '_scaled_.'.join(OUTPUT_TEST.split('.'))
    print(OUTPUT_TRAIN, OUTPUT_TEST)
    print(OUTPUT_TRAIN_SCALED, OUTPUT_TEST_SCALED)
    # Divide train/test General
    df_feat = df_ua_fs[[x for x in df_ua_fs.columns if x.startswith('f_')]+['cellID']].set_index('cellID')
    df_target = df_ua_fs[[x for x in df_ua_fs.columns if x.startswith('t_')]+['cellID']].set_index('cellID')
    df_X_train, df_X_test, df_y_train, df_y_test = train_test_split(df_feat, df_target, test_size=0.2, random_state=42, stratify=df_target)

    df_X_train.dropna(inplace=True)    
    df_X_test.dropna(inplace=True)    
    #     df_X_test.fillna(0, inplace=True)
    
    # save datasets
    df_train = df_X_train.merge(df_y_train, left_index=True, right_index=True)
    df_test = df_X_test.merge(df_y_test, left_index=True, right_index=True)    
    df_train.to_csv(OUTPUT_TRAIN, index_label="cellID", sep="\t",float_format='%.6f')
    df_test.to_csv(OUTPUT_TEST, index_label="cellID", sep="\t",float_format='%.6f')
    

    # Create scaled version of train and test
    X_scaler = StandardScaler()
    df_X_train_scaled = X_scaler.fit_transform(df_X_train)
    df_X_train_scaled = pd.DataFrame(df_X_train_scaled, index=df_X_train.index, columns=df_X_train.columns)
    df_X_test_scaled = X_scaler.transform(df_X_test)
    df_X_test_scaled = pd.DataFrame(df_X_test_scaled, index=df_X_test.index, columns=df_X_test.columns)

    df_train_scaled = df_X_train_scaled.merge(df_y_train,left_index=True, right_index=True)
    df_test_scaled = df_X_test_scaled.merge(df_y_test,left_index=True, right_index=True)

    df_train_scaled.to_csv(OUTPUT_TRAIN_SCALED, index_label="cellID", sep="\t",float_format='%.6f')
    df_test_scaled.to_csv(OUTPUT_TEST_SCALED, index_label="cellID", sep="\t",float_format='%.6f')

    
    return df_train,df_test

### Run Experiments

In [21]:
def printEvalutationMetrics(df_y_test,y_pred):
    print( metrics.classification_report(df_y_test.values, y_pred))
    print( "ACCURACY: {}".format(metrics.accuracy_score(df_y_test.values, y_pred)))
    print("F1 SCORE: {}".format(metrics.f1_score(df_y_test.values, y_pred, average='macro')))
    
def runExperiment(df_train, df_test):

    dfs = []
    dim = 200
    df = {} #{"area": boro, "cell": dim}

    suffix_train = "General"
    suffix_test = "General"
    
    df_y_train = df_train['t_predominant']
    df_y_test = df_test['t_predominant']
    
    # Baseline
    df_train['t_predominant'].value_counts().max()
    y_pred = [df_train['t_predominant'].value_counts().idxmax()] * len(df_y_test)
    
    print( "*****************************"+CITY_NAME+"  "+SIZE+"*********************************")
    
    print("****** BASELINE ******")
    # Print Metrics
    printEvalutationMetrics(df_y_test,y_pred)
    df['model'] = "baseline"

    df['accuracy'] = metrics.accuracy_score(df_y_test.values, y_pred) #metrics.accuracy_score(df_y_test.values, y_pred)
    df['f1-score'] = metrics.f1_score(df_y_test.values, y_pred,average='macro') #metrics.accuracy_score(df_y_test.values, y_pred)
    df['precision'] = metrics.precision_score(df_y_test.values, y_pred,average='macro') #metrics.accuracy_score(df_y_test.values, y_pred)
    df['recall'] = metrics.recall_score(df_y_test.values, y_pred,average='macro') #metrics.accuracy_score(df_y_test.values, y_pred)

    dfs.append(df)
    print( "**********************")

    # # xgboost Classifier
    df = {}
    print( "****** XGBOOST ******")
    df_X_train = df_train[[c for c in df_train.columns if c.startswith('f_')]]
    df_X_test = df_test[[c for c in df_test.columns if c.startswith('f_')]]
    
    clf = xgboost.XGBClassifier()#colsample_bytree=0.8, scale_pos_weight=1, learning_rate=0.1, min_child_weight=5,n_estimators=177, subsample=0.8, max_depth=3, gamma=0)
    clf.fit(df_X_train.as_matrix(), df_y_train.values)
    y_pred = clf.predict(df_X_test.as_matrix())
    # Print Metrics
    printEvalutationMetrics(df_y_test,y_pred)
    df['model'] = 'GBT'
    df['accuracy'] = metrics.accuracy_score(df_y_test.values, y_pred) #metrics.accuracy_score(df_y_test.values, y_pred)
    df['f1_score'] = metrics.f1_score(df_y_test.values, y_pred, average='macro') 
    print('********* CONFUSION MATRIX *******************')
    print(confusion_matrix(df_y_test.values, y_pred))
    
    print("********************************************************************************")

In [22]:
# iterate over city name
for CITY_NAME in CITIES:

    BASE_DIR_CITY =  os.path.join(BASE_DIR_DATA, CITY_NAME)    
    BASE_DIR_TESSELLATION = os.path.join(BASE_DIR_CITY, "tessellation")
    
    #  iterate over grid size
    for INPUT_TESSELLATION in os.listdir(BASE_DIR_TESSELLATION):
        
        INPUT_DIR = os.path.join(BASE_DIR_TESSELLATION,INPUT_TESSELLATION)
        SIZE = INPUT_DIR.split('-')[-1].split('.')[0]
        
        # merge FS + UA
        df_ua_fs = mergeUA_POI(CITY_NAME, SIZE)
            
        # divide Train/Test 
        df_train,df_test =split_train_test(df_ua_fs, CITY_NAME, SIZE)

        # run experiment
        runExperiment(df_train, df_test)

/home/public/geotk/data/barcelona/train/barcelona_fs_250.csv /home/public/geotk/data/barcelona/test/barcelona_fs_250.csv
/home/public/geotk/data/barcelona/train/barcelona_fs_250_scaled_.csv /home/public/geotk/data/barcelona/test/barcelona_fs_250_scaled_.csv
*****************************barcelona  250*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00         2
    Forests       0.00      0.00      0.00        11
Green_Urban       0.00      0.00      0.00        23
         HD       0.54      1.00      0.70       216
 Industrial       0.00      0.00      0.00        72
         LD       0.00      0.00      0.00        12
         MD       0.00      0.00      0.00        13
     Sports       0.00      0.00      0.00        14
  Transport       0.00      0.00      0.00        36

avg / total       0.29      0.54      0.38       399

ACCURACY: 0.5413533834586466
F1 SCORE: 0.078048780487804

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00         2
    Forests       0.23      0.27      0.25        11
Green_Urban       0.20      0.09      0.12        23
         HD       0.75      0.92      0.83       216
 Industrial       0.42      0.47      0.44        72
         LD       0.00      0.00      0.00        12
         MD       0.00      0.00      0.00        13
     Sports       0.29      0.14      0.19        14
  Transport       0.45      0.25      0.32        36

avg / total       0.55      0.62      0.58       399

ACCURACY: 0.6215538847117794
F1 SCORE: 0.23917348084014753
********* CONFUSION MATRIX *******************
[[  0   1   0   0   1   0   0   0   0]
 [  0   3   1   1   4   0   0   0   2]
 [  0   1   2  11   5   0   0   3   1]
 [  0   0   3 198  14   0   0   1   0]
 [  0   3   1  27  34   1   1   1   4]
 [  0   2   1   2   5   0   0   0   2]
 [  0   1   0   6   4   0   0   0   2]
 [  0   0   2   6   3   1   0   2  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************barcelona  50*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        13
    Forests       0.00      0.00      0.00         9
Green_Urban       0.00      0.00      0.00        91
         HD       0.68      1.00      0.81      2640
 Industrial       0.00      0.00      0.00       421
         LD       0.00      0.00      0.00        38
         MD       0.00      0.00      0.00        75
     Sports       0.00      0.00      0.00       107
  Transport       0.00      0.00      0.00       467

avg / total       0.47      0.68      0.56      3861

ACCURACY: 0.6837606837606838
F1 SCORE: 0.09024252679075014
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        13
    Forests       0.00      0.00      0.00         9
Green_Urban       0.00      0.00      0.00        91
         HD       0.69      1.00      0.81      2640
 Industrial       0.71      0.04      0.08       421
         LD       0.00      0.00      0.00        38
         MD       0.00      0.00      0.00        75
     Sports       0.00      0.00      0.00       107
  Transport       0.00      0.00      0.00       467

avg / total       0.55      0.69      0.56      3861

ACCURACY: 0.6868686868686869
F1 SCORE: 0.09889475426058633
********* CONFUSION MATRIX *******************
[[   0    0    0   13    0    0    0    0    0]
 [   0    0    0    9    0    0    0    0    0]
 [   0    0    0   91    0    0    0    0    0]
 [   0    0    0 2635    5    0    0    0    0]
 [   0    0    0  404   17    0    0    0    0]
 [   0    0    0   38    0    0    0    0    0]
 [   0    0    0   7

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************barcelona  100*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        10
    Forests       0.00      0.00      0.00         9
Green_Urban       0.00      0.00      0.00        53
         HD       0.63      1.00      0.77      1082
 Industrial       0.00      0.00      0.00       261
         LD       0.00      0.00      0.00        16
         MD       0.00      0.00      0.00        52
     Sports       0.00      0.00      0.00        64
  Transport       0.00      0.00      0.00       170
      Water       0.00      0.00      0.00         2

avg / total       0.40      0.63      0.49      1719

ACCURACY: 0.629435718440954
F1 SCORE: 0.07725812209925027
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        10
    Forests       0.00      0.00      0.00         9
Green_Urban       0.00      0.00      0.00        53
         HD       0.65      0.99      0.78      1082
 Industrial       0.59      0.12      0.20       261
         LD       0.00      0.00      0.00        16
         MD       0.00      0.00      0.00        52
     Sports       0.25      0.02      0.03        64
  Transport       0.40      0.05      0.08       170
      Water       0.00      0.00      0.00         2

avg / total       0.55      0.65      0.53      1719

ACCURACY: 0.6451425247236765
F1 SCORE: 0.1101225832866859
********* CONFUSION MATRIX *******************
[[   0    0    0   10    0    0    0    0    0    0]
 [   0    0    0    9    0    0    0    0    0    0]
 [   0    0    0   49    2    0    0    0    2    0]
 [   0    0    0 1068   11    0    0    0    3    0]
 [   0    0    0  224   32    0    0    2   

  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        12
    Forests       0.00      0.00      0.00        10
Green_Urban       0.18      0.26      0.21        27
         HD       0.77      0.91      0.83       322
 Industrial       0.37      0.45      0.40       101
         LD       0.00      0.00      0.00        23
         MD       0.00      0.00      0.00        19
     Sports       0.31      0.15      0.20        27
  Transport       0.57      0.42      0.48        50

avg / total       0.55      0.62      0.58       591

ACCURACY: 0.6243654822335025
F1 SCORE: 0.23670845651843195
********* CONFUSION MATRIX *******************
[[  0   0   6   0   4   0   0   1   1]
 [  0   0   4   2   1   0   0   1   2]
 [  0   0   7   3  13   0   0   3   1]
 [  0   0   3 292  21   0   0   1   5]
 [  0   0   6  42  45   0   0   3   5]
 [  0   0   4   6  12   0   0   0   1]
 [  0   0   2   7  10   0   0   0   0]
 [  0   0   4  11   7   0   0   4  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************london  250*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        78
    Forests       0.00      0.00      0.00        16
Green_Urban       0.00      0.00      0.00       270
         HD       0.52      1.00      0.69      2274
 Industrial       0.00      0.00      0.00       757
         LD       0.00      0.00      0.00        32
         MD       0.00      0.00      0.00       475
     Sports       0.00      0.00      0.00       206
  Transport       0.00      0.00      0.00       177
      Water       0.00      0.00      0.00        62

avg / total       0.27      0.52      0.36      4347

ACCURACY: 0.52311939268461
F1 SCORE: 0.06869053013140008
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        78
    Forests       0.00      0.00      0.00        16
Green_Urban       0.23      0.06      0.09       270
         HD       0.53      0.98      0.69      2274
 Industrial       0.49      0.04      0.08       757
         LD       0.00      0.00      0.00        32
         MD       0.00      0.00      0.00       475
     Sports       0.00      0.00      0.00       206
  Transport       0.79      0.11      0.19       177
      Water       0.00      0.00      0.00        62

avg / total       0.41      0.53      0.39      4347

ACCURACY: 0.5270301357257879
F1 SCORE: 0.10538558419941349
********* CONFUSION MATRIX *******************
[[   0    0    0   78    0    0    0    0    0    0]
 [   0    0    1   15    0    0    0    0    0    0]
 [   0    0   16  250    4    0    0    0    0    0]
 [   0    0   20 2222   27    0    0    1    4    0]
 [   0    0    3  719   34    0    0    0  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************london  100*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        51
    Forests       0.00      0.00      0.00        14
Green_Urban       0.00      0.00      0.00       485
         HD       0.52      1.00      0.68      4663
 Industrial       0.00      0.00      0.00      2108
         LD       0.00      0.00      0.00        55
         MD       0.00      0.00      0.00       680
     Sports       0.00      0.00      0.00       278
  Transport       0.00      0.00      0.00       548
      Water       0.00      0.00      0.00        96

avg / total       0.27      0.52      0.36      8978

ACCURACY: 0.519380708398307
F1 SCORE: 0.06836742174327395
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        51
    Forests       0.00      0.00      0.00        14
Green_Urban       0.00      0.00      0.00       485
         HD       0.52      0.99      0.69      4663
 Industrial       0.60      0.03      0.06      2108
         LD       0.00      0.00      0.00        55
         MD       0.00      0.00      0.00       680
     Sports       0.00      0.00      0.00       278
  Transport       0.64      0.09      0.16       548
      Water       0.00      0.00      0.00        96

avg / total       0.45      0.53      0.38      8978

ACCURACY: 0.5268433949654712
F1 SCORE: 0.0901200919934249
********* CONFUSION MATRIX *******************
[[   0    0    0   51    0    0    0    0    0    0]
 [   0    0    0   14    0    0    0    0    0    0]
 [   0    0    0  478    7    0    0    0    0    0]
 [   0    0    0 4619   28    0    0    0   16    0]
 [   0    0    0 2038   61    0    0    0   

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************london  50*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00       463
   Airports       0.00      0.00      0.00       206
    Forests       0.00      0.00      0.00       148
Green_Urban       0.00      0.00      0.00       821
         HD       0.59      1.00      0.74      6100
         LD       0.00      0.00      0.00       227
         MD       0.00      0.00      0.00      1492
     Sports       0.00      0.00      0.00       689
      Water       0.00      0.00      0.00       256

avg / total       0.34      0.59      0.43     10402

ACCURACY: 0.5864256873678139
F1 SCORE: 0.08214492519425254
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00       463
   Airports       0.62      0.07      0.13       206
    Forests       0.00      0.00      0.00       148
Green_Urban       0.00      0.00      0.00       821
         HD       0.59      1.00      0.74      6100
         LD       0.00      0.00      0.00       227
         MD       0.00      0.00      0.00      1492
     Sports       0.00      0.00      0.00       689
      Water       0.00      0.00      0.00       256

avg / total       0.36      0.59      0.44     10402

ACCURACY: 0.5870986348779081
F1 SCORE: 0.09665940680222324
********* CONFUSION MATRIX *******************
[[   0    1    0    0  462    0    0    0    0]
 [   0   15    0    0  191    0    0    0    0]
 [   0    0    0    0  148    0    0    0    0]
 [   0    0    0    0  821    0    0    0    0]
 [   0    7    0    0 6092    0    1    0    0]
 [   0    0    0    0  227    0    0    0    0]
 [   0    1    0    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************london  200*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        61
    Forests       0.00      0.00      0.00        13
Green_Urban       0.00      0.00      0.00       349
         HD       0.52      1.00      0.69      2783
 Industrial       0.00      0.00      0.00      1000
         LD       0.00      0.00      0.00        44
         MD       0.00      0.00      0.00       565
     Sports       0.00      0.00      0.00       228
  Transport       0.00      0.00      0.00       213
      Water       0.00      0.00      0.00        65

avg / total       0.27      0.52      0.36      5321

ACCURACY: 0.5230219883480549
F1 SCORE: 0.06868213228035538
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        61
    Forests       0.00      0.00      0.00        13
Green_Urban       0.33      0.00      0.01       349
         HD       0.53      0.99      0.69      2783
 Industrial       0.44      0.03      0.06      1000
         LD       0.00      0.00      0.00        44
         MD       0.00      0.00      0.00       565
     Sports       0.67      0.01      0.02       228
  Transport       0.83      0.11      0.20       213
      Water       0.00      0.00      0.00        65

avg / total       0.44      0.53      0.38      5321

ACCURACY: 0.5282841571133245
F1 SCORE: 0.09726990288985154
********* CONFUSION MATRIX *******************
[[   0    0    0   61    0    0    0    0    0    0]
 [   0    0    0   13    0    0    0    0    0    0]
 [   0    0    1  341    6    0    0    0    1    0]
 [   0    0    1 2750   30    0    0    0    2    0]
 [   0    0    0  964   34    0    0    1  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************paris  100*********************************
****** BASELINE ******
             precision    recall  f1-score   support

Green_Urban       0.00      0.00      0.00       156
         HD       0.70      1.00      0.83      2098
 Industrial       0.00      0.00      0.00       384
     Sports       0.00      0.00      0.00        57
  Transport       0.00      0.00      0.00       252
      Water       0.00      0.00      0.00        38

avg / total       0.49      0.70      0.58      2985

ACCURACY: 0.7028475711892798
F1 SCORE: 0.13758279231425014
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


             precision    recall  f1-score   support

Green_Urban       0.35      0.25      0.29       156
         HD       0.72      0.98      0.83      2098
 Industrial       0.54      0.04      0.07       384
     Sports       0.00      0.00      0.00        57
  Transport       0.00      0.00      0.00       252
      Water       0.00      0.00      0.00        38

avg / total       0.60      0.71      0.61      2985

ACCURACY: 0.7072026800670017
F1 SCORE: 0.19846329667505
********* CONFUSION MATRIX *******************
[[  39  117    0    0    0    0]
 [  29 2058   11    0    0    0]
 [  15  355   14    0    0    0]
 [  15   42    0    0    0    0]
 [  12  239    1    0    0    0]
 [   3   35    0    0    0    0]]
********************************************************************************
/home/public/geotk/data/paris/train/paris_fs_250.csv /home/public/geotk/data/paris/test/paris_fs_250.csv
/home/public/geotk/data/paris/train/paris_fs_250_scaled_.csv /home/public/geotk/data/

  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

Green_Urban       0.37      0.54      0.44        56
         HD       0.78      0.96      0.86       450
 Industrial       0.36      0.19      0.25        91
     Sports       0.00      0.00      0.00        29
  Transport       0.25      0.02      0.04        45
      Water       0.67      0.10      0.17        20

avg / total       0.62      0.70      0.63       691

ACCURACY: 0.6960926193921853
F1 SCORE: 0.2926535388168545
********* CONFUSION MATRIX *******************
[[ 30  18   8   0   0   0]
 [  9 431  10   0   0   0]
 [ 12  59  17   0   2   1]
 [ 15   8   6   0   0   0]
 [  7  33   4   0   1   0]
 [  8   7   2   0   1   2]]
********************************************************************************
/home/public/geotk/data/paris/train/paris_fs_50.csv /home/public/geotk/data/paris/test/paris_fs_50.csv
/home/public/geotk/data/paris/train/paris_fs_50_scaled_.csv /home/public/geotk/data/paris/test/paris_fs_50_scaled_.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************paris  50*********************************
****** BASELINE ******
             precision    recall  f1-score   support

Green_Urban       0.00      0.00      0.00       265
         HD       0.74      1.00      0.85      4858
 Industrial       0.00      0.00      0.00       678
     Sports       0.00      0.00      0.00        78
  Transport       0.00      0.00      0.00       662
      Water       0.00      0.00      0.00        63

avg / total       0.54      0.74      0.62      6604

ACCURACY: 0.7356147789218656
F1 SCORE: 0.14127842726691095
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt


             precision    recall  f1-score   support

Green_Urban       0.00      0.00      0.00       265
         HD       0.74      1.00      0.85      4858
 Industrial       0.33      0.00      0.00       678
     Sports       0.00      0.00      0.00        78
  Transport       0.00      0.00      0.00       662
      Water       0.00      0.00      0.00        63

avg / total       0.58      0.74      0.62      6604

ACCURACY: 0.7353119321623258
F1 SCORE: 0.141729948710509
********* CONFUSION MATRIX *******************
[[   0  265    0    0    0    0]
 [   0 4855    2    0    1    0]
 [   0  677    1    0    0    0]
 [   0   78    0    0    0    0]
 [   0  662    0    0    0    0]
 [   0   63    0    0    0    0]]
********************************************************************************
/home/public/geotk/data/paris/train/paris_fs_200.csv /home/public/geotk/data/paris/test/paris_fs_200.csv
/home/public/geotk/data/paris/train/paris_fs_200_scaled_.csv /home/public/geotk/data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
  'precision', 'predicted', average, warn_for)


*****************************paris  200*********************************
****** BASELINE ******
             precision    recall  f1-score   support

Green_Urban       0.00      0.00      0.00        70
         HD       0.67      1.00      0.80       682
 Industrial       0.00      0.00      0.00       137
     Sports       0.00      0.00      0.00        28
  Transport       0.00      0.00      0.00        72
      Water       0.00      0.00      0.00        24

avg / total       0.45      0.67      0.54      1013

ACCURACY: 0.6732477788746298
F1 SCORE: 0.13411996066863324
**********************
****** XGBOOST ******


  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


             precision    recall  f1-score   support

Green_Urban       0.34      0.27      0.30        70
         HD       0.74      0.96      0.84       682
 Industrial       0.30      0.14      0.19       137
     Sports       0.00      0.00      0.00        28
  Transport       0.50      0.04      0.08        72
      Water       0.50      0.04      0.08        24

avg / total       0.61      0.69      0.62      1013

ACCURACY: 0.6910167818361304
F1 SCORE: 0.24745319495319493
********* CONFUSION MATRIX *******************
[[ 19  39  12   0   0   0]
 [  7 658  16   0   0   1]
 [ 11 105  19   0   2   0]
 [  7  16   5   0   0   0]
 [  7  53   9   0   3   0]
 [  5  15   2   0   1   1]]
********************************************************************************
/home/public/geotk/data/milan/train/milan_fs_250.csv /home/public/geotk/data/milan/test/milan_fs_250.csv
/home/public/geotk/data/milan/train/milan_fs_250_scaled_.csv /home/public/geotk/data/milan/test/milan_fs_250_scaled_.c

  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        37
Green_Urban       0.11      0.09      0.10        44
         HD       0.60      0.91      0.72       422
 Industrial       0.34      0.18      0.24       144
         LD       0.00      0.00      0.00        15
         MD       0.00      0.00      0.00        27
     Sports       0.40      0.06      0.11        31
  Transport       0.54      0.14      0.22        50

avg / total       0.45      0.55      0.46       770

ACCURACY: 0.548051948051948
F1 SCORE: 0.17389673455021715
********* CONFUSION MATRIX *******************
[[  0   7  25   5   0   0   0   0]
 [  0   4  31   7   0   0   2   0]
 [  0   9 383  27   0   0   1   2]
 [  0   4 112  26   0   0   0   2]
 [  0   1  13   0   0   0   0   1]
 [  0   4  19   4   0   0   0   0]
 [  0   6  20   2   0   0   2   1]
 [  0   0  37   6   0   0   0   7]]
********************************************************************************


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************milan  50*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        16
    Forests       0.00      0.00      0.00         1
Green_Urban       0.00      0.00      0.00       162
         HD       0.62      1.00      0.76      2940
 Industrial       0.00      0.00      0.00       714
         LD       0.00      0.00      0.00        24
         MD       0.00      0.00      0.00        80
     Sports       0.00      0.00      0.00        58
  Transport       0.00      0.00      0.00       750
      Water       0.00      0.00      0.00        15

avg / total       0.38      0.62      0.47      4760

ACCURACY: 0.6176470588235294
F1 SCORE: 0.07636363636363637
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        16
    Forests       0.00      0.00      0.00         1
Green_Urban       0.00      0.00      0.00       162
         HD       0.62      0.99      0.77      2940
 Industrial       0.58      0.04      0.08       714
         LD       0.00      0.00      0.00        24
         MD       0.00      0.00      0.00        80
     Sports       0.00      0.00      0.00        58
  Transport       0.57      0.03      0.07       750
      Water       0.00      0.00      0.00        15

avg / total       0.56      0.62      0.49      4760

ACCURACY: 0.6224789915966387
F1 SCORE: 0.0908655614889067
********* CONFUSION MATRIX *******************
[[   0    0    0   16    0    0    0    0    0    0]
 [   0    0    0    1    0    0    0    0    0    0]
 [   0    0    0  162    0    0    0    0    0    0]
 [   0    0    0 2907   14    0    0    0   17    2]
 [   0    0    0  681   30    0    0    0   

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************milan  100*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        28
Green_Urban       0.00      0.00      0.00       105
         HD       0.63      1.00      0.77      1596
 Industrial       0.00      0.00      0.00       411
         LD       0.00      0.00      0.00        17
         MD       0.00      0.00      0.00        70
     Sports       0.00      0.00      0.00        61
  Transport       0.00      0.00      0.00       257
      Water       0.00      0.00      0.00         2

avg / total       0.39      0.63      0.48      2547

ACCURACY: 0.6266195524146054
F1 SCORE: 0.0856062434628691
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        28
Green_Urban       0.00      0.00      0.00       105
         HD       0.63      0.99      0.77      1596
 Industrial       0.51      0.04      0.08       411
         LD       0.00      0.00      0.00        17
         MD       0.00      0.00      0.00        70
     Sports       0.00      0.00      0.00        61
  Transport       0.57      0.03      0.06       257
      Water       0.00      0.00      0.00         2

avg / total       0.54      0.63      0.50      2547

ACCURACY: 0.6305457400863761
F1 SCORE: 0.10131199115415791
********* CONFUSION MATRIX *******************
[[   0    0   28    0    0    0    0    0    0]
 [   0    0  104    1    0    0    0    0    0]
 [   0    0 1580   14    0    0    0    2    0]
 [   0    0  388   18    0    0    1    4    0]
 [   0    0   17    0    0    0    0    0    0]
 [   0    0   70    0    0    0    0    0    0]
 [   0    0   59    

  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        30
Green_Urban       0.25      0.02      0.03        58
         HD       0.58      0.97      0.72       596
 Industrial       0.43      0.09      0.15       199
         LD       0.00      0.00      0.00        11
         MD       0.00      0.00      0.00        43
     Sports       0.00      0.00      0.00        39
  Transport       0.25      0.02      0.04        81
      Water       0.00      0.00      0.00         1

avg / total       0.44      0.57      0.44      1058

ACCURACY: 0.5652173913043478
F1 SCORE: 0.10536450458514687
********* CONFUSION MATRIX *******************
[[  0   0  28   2   0   0   0   0   0]
 [  0   1  56   1   0   0   0   0   0]
 [  0   2 577  12   0   0   0   5   0]
 [  0   0 179  18   0   0   1   1   0]
 [  0   0  11   0   0   0   0   0   0]
 [  0   0  40   3   0   0   0   0   0]
 [  0   1  37   1   0   0   0   0   0]
 [  0   0  74   5   0   0   0   2  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************amsterdam  100*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        31
    Forests       0.00      0.00      0.00         4
Green_Urban       0.00      0.00      0.00       126
         HD       0.46      1.00      0.63      1113
 Industrial       0.00      0.00      0.00       490
         LD       0.00      0.00      0.00        21
         MD       0.00      0.00      0.00        59
     Sports       0.00      0.00      0.00        80
  Transport       0.00      0.00      0.00       322
      Water       0.00      0.00      0.00       194

avg / total       0.21      0.46      0.29      2440

ACCURACY: 0.45614754098360655
F1 SCORE: 0.06265128060793695
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        31
    Forests       0.00      0.00      0.00         4
Green_Urban       0.00      0.00      0.00       126
         HD       0.46      0.97      0.63      1113
 Industrial       0.51      0.07      0.12       490
         LD       0.00      0.00      0.00        21
         MD       0.00      0.00      0.00        59
     Sports       0.22      0.05      0.08        80
  Transport       0.58      0.03      0.06       322
      Water       0.20      0.01      0.01       194

avg / total       0.41      0.46      0.32      2440

ACCURACY: 0.4627049180327869
F1 SCORE: 0.09048785988835573
********* CONFUSION MATRIX *******************
[[   0    0    0   31    0    0    0    0    0    0]
 [   0    0    0    4    0    0    0    0    0    0]
 [   0    0    0  121    1    0    0    3    1    0]
 [   0    0    0 1079   21    0    0    8    2    3]
 [   0    0    0  453   34    0    0    1  

  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        28
    Forests       0.00      0.00      0.00         4
Green_Urban       0.00      0.00      0.00        74
         HD       0.47      0.91      0.62       488
 Industrial       0.42      0.27      0.33       195
         LD       0.00      0.00      0.00         9
         MD       0.00      0.00      0.00        41
     Sports       0.34      0.18      0.23        57
  Transport       0.47      0.06      0.11       146
      Water       0.00      0.00      0.00        84

avg / total       0.36      0.46      0.35      1126

ACCURACY: 0.46003552397868563
F1 SCORE: 0.129331193714963
********* CONFUSION MATRIX *******************
[[  0   0   0  27   0   0   0   1   0   0]
 [  0   0   0   3   0   0   0   0   1   0]
 [  0   0   0  63   3   0   0   6   2   0]
 [  0   0   1 446  35   0   0   3   3   0]
 [  0   0   0 139  53   0   0   1   1   1]
 [  0   0   0   8   1   0   0   0   0   0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************amsterdam  50*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        27
    Forests       0.00      0.00      0.00         3
Green_Urban       0.00      0.00      0.00       152
         HD       0.48      1.00      0.64      1953
 Industrial       0.00      0.00      0.00       812
         LD       0.00      0.00      0.00        24
         MD       0.00      0.00      0.00        72
     Sports       0.00      0.00      0.00        97
  Transport       0.00      0.00      0.00       674
      Water       0.00      0.00      0.00       297

avg / total       0.23      0.48      0.31      4111

ACCURACY: 0.47506689369982974
F1 SCORE: 0.06441292875989446
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        27
    Forests       0.00      0.00      0.00         3
Green_Urban       0.00      0.00      0.00       152
         HD       0.48      0.99      0.65      1953
 Industrial       0.59      0.05      0.09       812
         LD       0.00      0.00      0.00        24
         MD       0.00      0.00      0.00        72
     Sports       0.00      0.00      0.00        97
  Transport       0.47      0.02      0.05       674
      Water       0.00      0.00      0.00       297

avg / total       0.42      0.48      0.33      4111

ACCURACY: 0.4813913889564583
F1 SCORE: 0.07794570085702472
********* CONFUSION MATRIX *******************
[[   0    0    0   27    0    0    0    0    0    0]
 [   0    0    0    3    0    0    0    0    0    0]
 [   0    0    0  149    2    0    0    0    1    0]
 [   0    0    0 1924   16    0    0    0   13    0]
 [   0    0    0  772   39    0    0    0  

  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00        37
    Forests       0.00      0.00      0.00         4
Green_Urban       0.00      0.00      0.00        63
         HD       0.45      0.90      0.60       352
 Industrial       0.49      0.35      0.41       147
         LD       0.00      0.00      0.00         4
         MD       0.00      0.00      0.00        27
     Sports       0.23      0.11      0.15        44
  Transport       0.32      0.07      0.11       103
      Water       0.50      0.01      0.02        79

avg / total       0.36      0.45      0.34       860

ACCURACY: 0.44534883720930235
F1 SCORE: 0.1300976636044493
********* CONFUSION MATRIX *******************
[[  0   0   0  31   4   0   0   2   0   0]
 [  0   0   0   2   0   0   0   1   1   0]
 [  0   0   0  54   5   0   0   3   0   1]
 [  0   0   1 318  23   0   0   2   8   0]
 [  0   0   0  90  52   0   0   3   2   0]
 [  0   0   0   4   0   0   0   0   0   

  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00         5
    Forests       0.00      0.00      0.00         8
Green_Urban       0.50      0.06      0.10        18
         HD       0.59      0.92      0.72       165
 Industrial       0.47      0.31      0.37        75
         LD       0.00      0.00      0.00         4
         MD       0.00      0.00      0.00         9
     Sports       0.00      0.00      0.00        12
  Transport       0.61      0.37      0.46        38

avg / total       0.49      0.57      0.50       334

ACCURACY: 0.5688622754491018
F1 SCORE: 0.1829961408281754
********* CONFUSION MATRIX *******************
[[  0   0   0   4   1   0   0   0   0]
 [  0   0   1   6   1   0   0   0   0]
 [  0   0   1  12   3   0   0   0   2]
 [  0   0   0 152  10   0   0   0   3]
 [  0   0   0  48  23   0   0   1   3]
 [  0   0   0   2   2   0   0   0   0]
 [  0   0   0   6   2   0   0   0   1]
 [  0   0   0  10   2   0   0   0   

  'precision', 'predicted', average, warn_for)
  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00         6
    Forests       0.00      0.00      0.00         3
Green_Urban       0.00      0.00      0.00        51
         HD       0.58      0.95      0.72       613
 Industrial       0.52      0.15      0.23       234
         LD       0.00      0.00      0.00         8
         MD       0.00      0.00      0.00        23
     Sports       0.00      0.00      0.00        28
  Transport       0.56      0.07      0.13       127
      Water       0.00      0.00      0.00         1

avg / total       0.50      0.57      0.47      1094

ACCURACY: 0.5722120658135283
F1 SCORE: 0.10703960460097153
********* CONFUSION MATRIX *******************
[[  0   0   0   6   0   0   0   0   0   0]
 [  0   0   0   3   0   0   0   0   0   0]
 [  0   0   0  50   1   0   0   0   0   0]
 [  0   0   0 583  26   0   0   0   4   0]
 [  0   0   0 197  34   0   0   0   3   0]
 [  0   0   0   8   0   0   0   0   0   

  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00         7
    Forests       0.00      0.00      0.00         6
Green_Urban       0.00      0.00      0.00        22
         HD       0.53      0.90      0.67       227
 Industrial       0.52      0.28      0.36       118
         LD       0.00      0.00      0.00         3
         MD       0.00      0.00      0.00         8
     Sports       0.00      0.00      0.00        18
  Transport       0.61      0.21      0.31        53
      Water       0.00      0.00      0.00         2

avg / total       0.46      0.53      0.46       464

ACCURACY: 0.5344827586206896
F1 SCORE: 0.13433524979238715
********* CONFUSION MATRIX *******************
[[  0   0   0   7   0   0   0   0   0   0]
 [  0   0   0   6   0   0   0   0   0   0]
 [  0   0   0  19   3   0   0   0   0   0]
 [  0   0   0 204  19   0   0   0   4   0]
 [  0   0   0  82  33   0   0   0   3   0]
 [  0   0   0   2   1   0   0   0   0   

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import matplotlib.pyplot as plt
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


*****************************lisbon  50*********************************
****** BASELINE ******
             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00         6
    Forests       0.00      0.00      0.00         4
Green_Urban       0.00      0.00      0.00        83
         HD       0.56      1.00      0.72      1108
 Industrial       0.00      0.00      0.00       367
         LD       0.00      0.00      0.00        10
         MD       0.00      0.00      0.00        48
     Sports       0.00      0.00      0.00        53
  Transport       0.00      0.00      0.00       302
      Water       0.00      0.00      0.00         1

avg / total       0.31      0.56      0.40      1982

ACCURACY: 0.5590312815338042
F1 SCORE: 0.07171521035598705
**********************
****** XGBOOST ******


  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

       Agri       0.00      0.00      0.00         6
    Forests       0.00      0.00      0.00         4
Green_Urban       0.00      0.00      0.00        83
         HD       0.57      0.96      0.72      1108
 Industrial       0.53      0.14      0.22       367
         LD       0.00      0.00      0.00        10
         MD       0.00      0.00      0.00        48
     Sports       0.00      0.00      0.00        53
  Transport       0.42      0.04      0.07       302
      Water       0.00      0.00      0.00         1

avg / total       0.48      0.57      0.45      1982

ACCURACY: 0.5696266397578204
F1 SCORE: 0.10063782419594325
********* CONFUSION MATRIX *******************
[[   0    0    0    6    0    0    0    0    0    0]
 [   0    0    0    4    0    0    0    0    0    0]
 [   0    0    0   80    3    0    0    0    0    0]
 [   0    0    0 1067   31    0    0    0   10    0]
 [   0    0    0  312   51    0    0    0  

  if diff:
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
