# EU crop map - Benchmarking on the time period for the mask classes
## 1) Split by polygones - accuracy per pixels and per polygone
## 2) Split by pixels - accuracy per pixels
### Set up

In [2]:
# JEODPP
data_path='/eos/jeodpp/data/projects/REFOCUS/data/S1_GS/all-10days/Map_v7/'
project_path='/eos/jeodpp/data/projects/REFOCUS/classification/'
path_pol = '/eos/jeodpp/data/projects/REFOCUS/data/polygons/v7'
results='/eos/jeodpp/data/projects/REFOCUS/classification/'

local='/eos/jeodpp/home/users/verheas/data/LUCAS/v7/'

#working directory
pwd = project_path

# !pip install matplotlib --user
# !pip install sklearn --user
#import 
import pandas as pd
from pandas import Series,DataFrame
import csv
import numpy as np
import time
import sklearn
import scipy
import matplotlib.pyplot as plt
import os
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

import glob
import os

### Load the data

In [3]:
## Load the data
#1) load the S1 10 days extracted values in GEE for all polygons

pd_lucas= pd.read_csv(os.path.join(data_path,'S1_point_allV7_10days_10m_1Jan-31Dec_EU_ratio-db.csv'),dtype={'level_1':int,'level_2':int})
print('pd_lucas',pd_lucas.shape)

#concatenate all the data in one dataframe
#group cropland, grassland and bareland 
#number of pixels per class
print(pd_lucas.level_1.value_counts())
print(pd_lucas.level_2.value_counts())
pd_lucas.head()

#number of pixels per class
#pd_lucas.LC1_COD.value_counts()
#pd_lucas.head()
pd_lucas.columns

##############1.2 Load the shapefile with the polygons - useful to split the polygons in training and test dataset for the accuracy ######################
# load csv with of the polygons
#2)load csv with the polygons for the split test/validation
lucas_polygons = pd.read_csv(os.path.join(path_pol,'LUCAS_2018_Copernicus_attributes_cropmap_level1-2.csv'))
lucas_polygons.head()

pd_lucas (2956889, 116)
300    1216530
200    1000318
500     732964
600       3856
100       3221
Name: level_1, dtype: int64
300    1216530
500     732964
211     290116
213     142886
216     125644
232      64452
290      63609
250      59053
214      35215
231      34577
240      34067
215      34005
212      28825
222      23174
218      19274
221      15815
230      12000
233       7094
223       4920
219       4724
600       3856
100       3221
217        868
Name: level_2, dtype: int64


  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0.1,Unnamed: 0,POINT_ID,NUTS0,NUTS1,NUTS2,NUTS3,TH_LAT,TH_LONG,OFFICE_PI,EX_ANTE,...,LU2_LABEL,LU1_TYPE_LABEL,LU2_TYPE_LABEL,CPRN_LC_LABEL,CPRN_LC_SAME_LC1,LUCAS_CORE_INTERSECT,COPERNICUS_CLEANED,stratum,level_2,level_1
0,0,34562080,ES,ES2,ES24,ES243,41.288386,-0.319428,0,0,...,Not relevant,,,Other bare soil,True,True,True,2,290,200
1,1,31243520,IE,IE0,IE04,IE042,53.422977,-8.226052,0,0,...,Not relevant,,,Spontaneously re-vegetated surfaces,True,True,True,1,500,500
2,2,33661774,ES,ES5,ES52,ES521,38.434388,-0.905705,0,0,...,Not relevant,,,Permanent crops: fruit trees,True,True,True,2,300,300
3,3,28922250,ES,ES1,ES11,ES113,41.867145,-7.320304,0,0,...,Not relevant,,,Shrubland with sparse tree cover,True,True,True,2,300,300
4,4,35082906,FR,FRD,FRD1,FRD12,48.71519,-1.09219,0,0,...,Not relevant,,,Grassland without tree/shrub cover,True,True,True,1,500,500


### Parameters

In [5]:
##################################Parameters##################################################
#classes - stored in a table 'legend-lucas-all'
table_class=pd.read_csv(os.path.join(project_path,'table/legend-lucas-all-v7.csv'),dtype=pd.Int64Dtype())

classes_L1=list(table_class['classes_L1'].dropna()) 
classes_L2=list(table_class['classes_L2'].dropna())

#level
level_1='level_1'
level_2='level_2'

##################################Parameters##################################################
#classes - stored in a table 'legend-lucas-all'
table_class=pd.read_csv(os.path.join(project_path,'table/legend-lucas-all-v2.csv'),dtype=pd.Int64Dtype())

classes_L1=list(table_class['classes_L1'].dropna())
classes_L2=list(table_class['classes_L2'].dropna())

#remap classes and selection of classes to map Level 1
classes_in_L1 =  list(table_class['classes_all'].dropna())

classes_in_L2 = list(table_class['classes_all'].dropna()),

#classes affected by biome selection
classes_L1_B= list(table_class['classes_L1_B'].dropna())
classes_L2_B= list(table_class['classes_L2'].dropna())

#Classes non affected by biome selection
#Classes from (A) Artificial, (F) Bare lands and (H) Wetlands can be considered in each models - no biome dependent
classes_L1_NB=list(table_class['classes_L1_NB'].dropna())
classes_L2_NB=[]
#summary of the classes used in the classification
classes_classif_L1= list(table_class['L1_BIOME'].dropna())
classes_classif_L1_simplify=list(table_class['L1_B_harmon'].dropna())

classes_classif_L2=list(table_class['L2_BIOME'].dropna())
classes_classif_L2_simplify=list(table_class['L2_B_harmon'].dropna())

###################################Choose parameters for this run #############################################
#classes for the classification and biome/no biome differentiation if needed
classes_B=classes_L1_B
print ('classes_B',classes_B)

classes_NB=classes_L1_NB
print ('classes_NB',classes_NB)

#level
level=level_1
print('level',level)
#crop - level 2, from the table we load only the crop type classes
classes=classes_L1
print('level',classes)

#Split for the train/test dataset - we run it with all the polygons
#split_test = 0

#summary of the classses used for the classification
classes_classif=classes_classif_L1
print ('classes_classif',classes_classif)
classes_classif_simplify=classes_classif_L1_simplify
print ('classes_classif_simplify',classes_classif_simplify)

###################################Labels of the classes #############################################
labels_csv = pd.read_csv(os.path.join(project_path,'table/legend-lucas2.csv'))
labels=labels_csv[labels_csv['class'].isin(classes)] # select only the used labels
labels_s=labels_csv[labels_csv['class'].isin(classes)] # select only the used labels
print(classes)
print(labels)
print(labels_s)

classes_B [200, 300, 500]
classes_NB [100, 600]
level level_1
level [100, 200, 300, 500, 600]
classes_classif [1000, 2001, 2002, 2003, 2004, 3001, 3002, 3003, 3004, 5001, 5002, 5003, 5004, 6000]
classes_classif_simplify [100, 200, 200, 200, 200, 300, 300, 300, 300, 500, 500, 500, 500, 600]
[100, 200, 300, 500, 600]
   class                          label
0    100                Artificial land
1    200  Cropland without trees/shrubs
2    300                       Woodland
4    600                      Bare land
   class                          label
0    100                Artificial land
1    200  Cropland without trees/shrubs
2    300                       Woodland
4    600                      Bare land


### Prepare the data

In [7]:
##############################################################
#### 2) Prepare the data for the classification ##############
##############################################################

#############2.1 Select level of work and classes
#copy values in a new column 'Classif' that we will use in the rest of the script
pd_lucas['Classif']=pd_lucas[level]
print(pd_lucas.shape)

#add the biome after the class
pd_lucas_biome=pd_lucas[pd_lucas.Classif.isin(classes_B)]
#pd_lucas_nobiome=#pd_lucas[pd_lucas.Classif.isin(classes_NB)]

#for this test
#pd_lucas_biome['ClassifB']=pd_lucas_biome['Classif'].astype(str) + pd_lucas_biome['stratum'].astype(str)
#pd_lucas_nobiome['ClassifB']=pd_lucas_nobiome['Classif'].astype(str) + pd_lucas_biome['stratum'].astype(str)

pd_lucas_b=pd_lucas_biome#.append(pd_lucas_nobiome)
#legend level 1 - create new column and copy values
#pd_level1['ClassL1B']=pd_level1[['LC1_COD', 'BIOME_N']].apply(lambda x: ''.join(x.map(str)), axis=1)
#pd_level1['ClassL1B']=pd_level1['ClassL1'].astype(str) + pd_level1['BIOME_N'].astype(str)
#print(pd_lucas_b.head())
#print(pd_lucas_b.ClassifB.value_counts())

#2) Prepare the dataframe with the pixels extraction
lucas_polygons['Classif']=lucas_polygons.level_1

#reclassify
#lucas_polygons.Classif=lucas_polygons.Classif.replace(classes_in,
#                                                        classes_remap)
#print(lucas_polygons.shape)
#print(lucas_polygons.Classif.value_counts())

#select the classes of interest for Level 1
#add the biome after the class

lucas_polygons_biome=lucas_polygons[lucas_polygons.Classif.isin(classes_B)]
#lucas_polygons_nobiome=lucas_polygons[lucas_polygons.Classif.isin(classes_NB)]

print(lucas_polygons_biome.shape)
print(lucas_polygons_biome.Classif.value_counts())
#print(lucas_polygons_nobiome.shape)
#print(lucas_polygons_nobiome.Classif.value_counts())

#lucas_polygons_biome['ClassifB']=lucas_polygons_biome['Classif'].astype(str) + lucas_polygons_biome['stratum'].astype(str)
#lucas_polygons_nobiome['ClassifB']=lucas_polygons_nobiome['Classif'].astype(str) + '0'
#print(lucas_polygons_biome.ClassifB.value_counts())

(2956889, 117)
(57897, 114)
300    22396
200    18376
500    17125
Name: Classif, dtype: int64


### Parameters of the benchmarking on the time period


In [8]:
parameters = pd.read_csv(os.path.join(project_path,'table/RF-parameters-table-DATE-VV-VH.csv'))
parameters['name']=parameters['NAME_indice']+parameters['NAME_date']
parameters['regex']=parameters['REGEX_indice']+parameters['REGEX_time']
print(parameters)

   NAME_indice    NAME_date                         REGEX_indice  \
0        VV-VH   MONTH[1-1]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
1        VV-VH   MONTH[1-2]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
2        VV-VH   MONTH[1-3]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
3        VV-VH   MONTH[1-4]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
4        VV-VH   MONTH[1-5]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
5        VV-VH   MONTH[1-6]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
6        VV-VH   MONTH[1-7]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
7        VV-VH   MONTH[1-8]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
8        VV-VH   MONTH[1-9]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
9        VV-VH  MONTH[1-10]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
10       VV-VH  MONTH[1-11]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   
11       VV-VH  MONTH[1-12]  (((?<![\w\d])VH_)|((?<![\w\d])VV_))   

                 REGEX_time   TEST  month              name  \
0              (20180[1-1])  dates      1   VV-VHMON

### A) Split on polygons


In [9]:
manip='DATE-BIOME-STRATIFY-LEVEL1_pol'
if not os.path.exists(os.path.join('result',manip)):
    os.mkdir(os.path.join('result',manip))

In [None]:
#Option when the biomes are separated and put back together
for i_test in range(0,len(parameters['name'])):
    print('processing : '+manip+'  ' +parameters['name'][i_test])
    #lucas_polygons_biome=lucas_polygons_biome[lucas_polygons_biome.ClassifB!='2332']
    #lucas_polygons_biome=lucas_polygons_biome[lucas_polygons_biome.ClassifB!='2194']
    #lucas_polygons_biome=lucas_polygons_biome[lucas_polygons_biome.ClassifB!='2234']
    
    #subset by biomes and create another loop for the 4 biomes
    #execute the split/train
    #join the results and calculate the OA
    y_test_s_all=pd.Series([])
    y_test_pred_s_all=pd.Series([])
    y_test_s_all_pol=pd.Series([])
    y_test_pred_s_all_pol=pd.Series([])
    
    for biome in range(1,3):
        print(biome)
        # 1 / create a text file for log recording
        file = open(os.path.join(local,'result',manip,parameters['name'][i_test]+'_'+str(biome)+'_remap_1_1_Processing_Log.txt'),"w") 

        file.write('Processing summary \n') 
        file.write("Date and time start: "+ datetime.now().strftime("%d/%m/%Y %H:%M:%S")+"\n") 
        file.write("Classes : "+ str(classes)+"\n") 
        file.write("Regex : "+ str(parameters['regex'][i_test])+"\n") 
        file.write("Name : "+ str(parameters['name'][i_test])+"\n") 
    
        #select biome on the polygons
        lucas_polygons_biome_b=lucas_polygons_biome[lucas_polygons_biome.stratum.isin([biome])]
        lucas_polygons_b=lucas_polygons_biome_b#.append(lucas_polygons_nobiome)
        #drop 2143 as there is only one
        #lucas_polygons_b = lucas_polygons_b[lucas_polygons_b.ClassifB != 2143]

        #print('dataframe complet',lucas_polygons_b.shape)
        #variety of classes per pixels for the selected biome
        #print('dataframe complet',pd.value_counts(lucas_polygons_b.Classif,sort=True))
        #print('dataframe complet',lucas_polygons_b.head())
        print(lucas_polygons_b.Classif.value_counts())

        # Subset the polygons
        X_featuresP=lucas_polygons_b.filter(items=['POINT_ID','Classif'])
        y_classP=lucas_polygons_b['Classif']#.astype(np.float32)
        file.write("Input DB polygons shape  : "+ str(X_featuresP.shape)+"\n") 
        file.write("Input DB polygons columns  : "+ str(list(X_featuresP.columns))+"\n") 
    
        # 1/ Split between test and train
        #TO BE DONE ON THE LUCAS POLYGONS
        #https://elitedatascience.com/python-machine-learning-tutorial-scikit-learn
        X_trainP,X_testP,y_trainP,y_testP  = train_test_split(X_featuresP,y_classP, test_size=0.2,random_state=5,stratify=y_classP)
        file.write("X_trainP.shape  : "+ str(X_trainP.shape)+"\n") 
        file.write("X_testP.shape  : "+ str(X_testP.shape)+"\n")
        file.write("y_trainP.shape  : "+ str(y_trainP.shape)+"\n")
        file.write("y_testP.shape  : "+ str(y_testP.shape)+"\n")

        # 2/select the pixels from the polygons
        #and Subset the DB with regex
        indexPOINItrain=pd_lucas_b['POINT_ID'].isin(X_trainP['POINT_ID'])
        indexPOINItest=pd_lucas_b['POINT_ID'].isin(X_testP['POINT_ID'])
        
        X_train=pd_lucas_b[indexPOINItrain].filter(regex=parameters['regex'][i_test])
        y_train=pd_lucas_b[indexPOINItrain]['Classif']
        X_test=pd_lucas_b[indexPOINItest].filter(regex=parameters['regex'][i_test])
        y_test=pd_lucas_b[indexPOINItest]['Classif']
        
        #write
        file.write("Input DB X_train pixels shape  : "+ str(X_train.shape)+"\n") 
        file.write("Input DB X_train pixels columns  : "+ str(list(X_train.columns))+"\n") 
        
        #keep all info to aggregate prediction per polygons
        y_train_pol=pd_lucas_b[indexPOINItrain]
        y_test_pol=pd_lucas_b[indexPOINItest]

       
        # 4/ Save the class distribution for training and testing as CSV
        #x = pd.DataFrame(y_train.value_counts().rename_axis('class').reset_index(name='counts'))
        x = pd.DataFrame({"count_pol": y_train_pol.groupby('POINT_ID').apply(max)['Classif'].value_counts(), "count_pixel": y_train.value_counts()}).rename_axis('class')
        x.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_'+str(biome)+'_remap_1_2_Training_class_count_polygons.csv'))
        #x = pd.DataFrame(y_test.value_counts().rename_axis('class').reset_index(name='counts'))
        x = pd.DataFrame({"count_pol": y_test_pol.groupby('POINT_ID').apply(max)['Classif'].value_counts(), "count_pixel": y_test.value_counts()}).rename_axis('class')
        x.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_'+str(biome)+'_remap_1_3_Testing_class_count_polygons.csv'))


        # 5/ Fit the RANDOM PARAMETERS T
        t = time.time()    
        clf = RandomForestClassifier(bootstrap=0, criterion='gini', max_depth=None, max_features='auto', 
                                     min_samples_leaf=12, min_samples_split=3, n_estimators=500, n_jobs=40)
                                                                                                                                                                                    
        clf.fit(X_train, y_train)

        training_time=time.time() - t
        file = open(os.path.join(local,'result',manip,parameters['name'][i_test]+'_'+str(biome)+'_remap_1_1_Processing_Log.txt'),"a") 
        file.write("Elapsed time for training  : "+ str(round(training_time))+" sec \n")
        #file.write("Model  : " +str(clf)+"\n")
        file.close()

        # 6/ Feature importances as  CSV
        x = list(zip(clf.feature_importances_,X_train.columns))
        x = pd.DataFrame(x,columns=["Importance","Feature_Name"])
        x.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_'+str(biome)+'_remap_2_Feature_importance.csv') )
        
        #append the test value in a file for the 4 biomes
        # 7/ OA -evaluate accuracy with the test dataset for the unique rf model
        #reclassify the classes by biomes to the simple classes 
        #transform to series to use replace

        #Prediction
        y_test_pred=clf.predict(X_test)      
        y_test_s=pd.Series(y_test, dtype='float')
        y_test_s=y_test_s.replace(classes_classif,classes_classif_simplify)
        
        y_test_pred_s=pd.Series(y_test_pred, dtype='float')
        y_test_pred_s=y_test_pred_s.replace(classes_classif,classes_classif_simplify)
                
        #to calculate accuracy, go back to array    
        accuracy = 100.0*(y_test_s.array == y_test_pred_s.array).sum()/y_test_s.shape[0]
        print('Accuracy is :' + str(round(accuracy,2)))
    
        #del(file)
        file1 = open(os.path.join(local,'result',manip,parameters['name'][i_test]+'biome'+str(biome)+'_1_1_Accuracy.txt'),"w") 
        #file.write("Accuracy of the classifier  : " +str(round(accuracy,2))+" % "+" \n")
        file1.write(str(accuracy)+"\n") 
        file1.close()
        
        # 8/ Classification report
        report = classification_report(y_test_s, y_test_pred_s, output_dict=True)
        df = pd.DataFrame(report).transpose()
        df.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'biome'+str(biome)+'_3_classification_report.csv') )
        
        # 9 / Extract confusion matrix to CSV - to fix - labels not correct
        confusion_mat=confusion_matrix(y_test_s,y_test_pred_s,labels=classes)
        confusion_mat_class=pd.DataFrame(confusion_mat,index=classes,columns=classes)
        confusion_mat_class.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'biome'+str(biome)+'_4_confusion_matrix_class.csv'))
        
        #accuracy mode polygon
        #apply a majority rule (mode)
        #group it with y_test
        y_test_results=pd.DataFrame({'POINT_ID':y_test_pol['POINT_ID'],'ref':y_test,'predict':y_test_pred})
        y_test_results=y_test_results.groupby(['POINT_ID'])['predict','ref'].agg(lambda x: x.mode()[0])
        
        #to calculate accuracy, go back to array    
        accuracy_pol = 100.0*(y_test_results['ref'].array == y_test_results['predict'].array).sum()/y_test_results.shape[0]
        print('Accuracy is :' + str(round(accuracy_pol,2)))
        
        #del(file)
        file1 = open(os.path.join(local,'result',manip,parameters['name'][i_test]+'biome'+str(biome)+'_1_1_Accuracy_pol.txt'),"w") 
        #file.write("Accuracy of the classifier  : " +str(round(accuracy,2))+" % "+" \n")
        file1.write(str(accuracy_pol)+"\n") 
        file1.close()
        # 8/ Classification report
        report = classification_report(y_test_results['ref'],y_test_results['predict'], output_dict=True)
        df = pd.DataFrame(report).transpose()
        df.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'biome'+str(biome)+'_3_classification_report_pol.csv') )
        
        # 9 / Extract confusion matrix to CSV - to fix - labels not correct
        confusion_mat=confusion_matrix(y_test_results['ref'],y_test_results['predict'],labels=classes)
        confusion_mat_class=pd.DataFrame(confusion_mat,index=classes,columns=classes)
        confusion_mat_class.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'biome'+str(biome)+'_4_confusion_matrix_class_pol.csv'))
        
        y_test_s_all=y_test_s_all.append(y_test_s)      
        #print(y_test_all)        
        y_test_pred_s_all=y_test_pred_s_all.append(y_test_pred_s)
        
        #pol
        y_test_s_all_pol=y_test_s_all_pol.append(y_test_results['ref'])      
        #print(y_test_all)        
        y_test_pred_s_all_pol=y_test_pred_s_all_pol.append(y_test_results['predict'])
        
    #to calculate accuracy, go back to array    
    accuracy = 100.0*(y_test_s_all.array == y_test_pred_s_all.array).sum()/y_test_s_all.shape[0]
    print('Accuracy is :' + str(round(accuracy,2)))
   
    #del(file)
    file1 = open(os.path.join(local,'result',manip,parameters['name'][i_test]+'_regroup_remap_1_1_Accuracy.txt'),"w") 
    #file.write("Accuracy of the classifier  : " +str(round(accuracy,2))+" % "+" \n")
    file1.write(str(accuracy)+"\n") 
    file1.close()
    
    # 8/ Classification report
    report = classification_report(y_test_s_all, y_test_pred_s_all, output_dict=True)
    df = pd.DataFrame(report).transpose()
    df.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_regroup_remap_3_classification_report.csv') )

    # 9 / Extract confusion matrix to CSV - to fix - labels not correct
    confusion_mat=confusion_matrix(y_test_s_all,y_test_pred_s_all,labels=classes)
    confusion_mat_class=pd.DataFrame(confusion_mat,index=classes,columns=classes)
    confusion_mat_class.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_regroup_remap_4_confusion_matrix_class.csv'))
    #confusion_mat_class=pd.DataFrame(confusion_mat,index= list(labels_s['class']),columns=list(labels_s['class']))
    #confusion_mat_class.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_remap_4_confusion_matrix_class.csv') )
    #confusion_mat_label=pd.DataFrame(confusion_mat,index= list(labels_s['label']),columns=list(labels_s['label']))
    #confusion_mat_label.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_remap_4_confusion_matrix_label.csv') )

    #to calculate accuracy, go back to array    
    accuracy_pol = 100.0*(y_test_s_all_pol.array == y_test_pred_s_all_pol.array).sum()/y_test_s_all_pol.shape[0]
    print('Accuracy is :' + str(round(accuracy_pol,2)))
    
    #del(file)
    file1 = open(os.path.join(local,'result',manip,parameters['name'][i_test]+'_regroup_remap_1_1_Accuracy_pol.txt'),"w") 
    #file.write("Accuracy of the classifier  : " +str(round(accuracy,2))+" % "+" \n")
    file1.write(str(accuracy_pol)+"\n") 
    file1.close()
    
    # 8/ Classification report
    report = classification_report(y_test_s_all_pol, y_test_pred_s_all_pol, output_dict=True)
    df = pd.DataFrame(report).transpose()
    df.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_regroup_remap_3_classification_report_pol.csv') )

    # 9 / Extract confusion matrix to CSV - to fix - labels not correct
    confusion_mat=confusion_matrix(y_test_s_all_pol,y_test_pred_s_all_pol,labels=classes)
    confusion_mat_class=pd.DataFrame(confusion_mat,index=classes,columns=classes)
    confusion_mat_class.to_csv(os.path.join(local,'result',manip,parameters['name'][i_test]+'_regroup_remap_4_confusion_matrix_class_pol.csv'))
    
    file = open(os.path.join(local,'result',manip,parameters['name'][i_test]+'_regroup_remap_1_1_Processing_Log.txt'),"a") 
    file.write("Date and time end: "+ datetime.now().strftime("%d/%m/%Y %H:%M:%S")+"\n") 
    file.close()
    
    del(df,clf,confusion_mat)#confusion_mat_label,confusion_mat)

processing : DATE-BIOME-STRATIFY-LEVEL1_pol  VV-VHMONTH[1-1]
1
300    15526
200    14142
500    14034
Name: Classif, dtype: int64
Accuracy is :76.51
Accuracy is :77.73
2
300    6870
200    4234
500    3091
Name: Classif, dtype: int64
Accuracy is :70.79
Accuracy is :69.81
Accuracy is :75.39
Accuracy is :75.79
processing : DATE-BIOME-STRATIFY-LEVEL1_pol  VV-VHMONTH[1-2]
1
300    15526
200    14142
500    14034
Name: Classif, dtype: int64


### B) Split on pixels

In [None]:
manip='DATE-BIOME-STRATIFY-LEVEL1_pix'
if not os.path.exists(os.path.join('result',manip)):
    os.mkdir(os.path.join('result',manip))