## Libary Set-up

In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import os
import warnings
import rasterio as rio

%matplotlib inline

from netCDF4 import Dataset
from pyhdf.SD import SD, SDC
from skimage.transform import resize

In [2]:
from collections import Counter

from sklearn.utils import shuffle
from sklearn.pipeline import Pipeline
from sklearn.metrics import  confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from skimage.transform import resize
from sklearn.metrics import mean_squared_error

ncores = os.cpu_count()

## 2010 cropland boundaries - crop dataset

In [3]:
lidar_dem_path = 'lower_scaled_gfsad.tif'
with rio.open(lidar_dem_path) as lidar_dem:
    im_array = lidar_dem.read()
    lidar_dem.bounds

In [4]:
# Get a list of cropland and their classes
im_array = im_array.reshape((2160,4320))

def apply_mask(pixel):
    if pixel == 9:
        return 0
    else:
        return pixel

filter_function = np.vectorize(apply_mask)
unmasked_pixels = filter_function(im_array)

land_pixels = np.nonzero(unmasked_pixels) 
# print(np.unique(imarray))
land_pixel_classes = im_array[land_pixels].tolist()
print(len(land_pixel_classes))

702138


In [5]:
land_indices = land_pixels 
non_zero_indices = np.array(land_indices)
clean_frame = non_zero_indices.T
print(clean_frame.shape)
n = clean_frame.shape[0]
non_zeros = np.nonzero(im_array)

clean_frame_2 = clean_frame 
clean_frame_df = pd.DataFrame({'lon': clean_frame[:,0], 'lat': clean_frame[:,1]})
clean_frame_df['labels'] = land_pixel_classes

(702138, 2)


In [9]:
clean_frame_df

Unnamed: 0,lon,lat,labels
0,294,2469,8
1,295,2467,8
2,295,2468,8
3,300,2466,8
4,300,2467,8
...,...,...,...
702133,1745,1328,8
702134,1745,1334,8
702135,1745,1339,8
702136,1745,1340,8


In [6]:
## Add siebert labels

In [7]:
tifs = ['1985', '1990', '1995', '2000', '2005']
years = ['1985', '1990', '1995', '2000', '2005', '2010', '2015']
lidar_dem_path = 'C:\\Users\\Elle\\Documents\\w210\\1985.tif'
times_series_labels = np.zeros((5, 2160, 4320))
for i in range(len(tifs)):
    lidar_dem_path = tifs[i] +'.tif'
    with rio.open(lidar_dem_path) as lidar_dem:
        array = lidar_dem.read() 
        array = array.reshape(2160, 4320)
        clean_frame_df[str(tifs[i])] = array[land_indices].reshape(n,1)

times_series_labels[0]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
clean_frame_df[clean_frame_df['1985']>2000].groupby(['labels']).count()

Unnamed: 0_level_0,lon,lat,1985,1990,1995,2000,2005
labels,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,10944,10944,10944,10944,10944,10944,10944
2,6774,6774,6774,6774,6774,6774,6774
3,8528,8528,8528,8528,8528,8528,8528
4,2280,2280,2280,2280,2280,2280,2280
5,1718,1718,1718,1718,1718,1718,1718
6,495,495,495,495,495,495,495
7,4680,4680,4680,4680,4680,4680,4680
8,1456,1456,1456,1456,1456,1456,1456


In [9]:
df = clean_frame_df[['labels','1985', '1990', '1995', '2000', '2005']]

In [10]:
df[df['labels']>3].quantile(.75)

labels     8.000
1985      65.040
1990      73.682
1995      78.392
2000      79.313
2005      83.540
Name: 0.75, dtype: float64

In [11]:
clean_frame_df[df['labels']<4].quantile(.25)

lon        598.000
lat       2701.000
labels       1.000
1985        50.897
1990        81.681
1995       107.460
2000       125.600
2005       133.300
Name: 0.25, dtype: float64

In [12]:
df_85, df_90, df_95, df_00, df_05, df_10, df_15  =  pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame()


df_lists = [df_85, df_90, df_95, df_00, df_05, df_10, df_15]

for i in range(len(df_lists)):
    df_lists[i]['lat'] = clean_frame_df['lat']
    df_lists[i]['lon'] = clean_frame_df['lon']
    if i < 5:
        df_lists[i]['labels'] = clean_frame_df[tifs[i]]
    

In [13]:
for i in range(len(df_lists[:5])):
    df_lists[i]['year'] = years[i]

In [14]:
measures = ['max_y1', 'min_y1', 'mean_y1', 'var_y1', 'max_y2', 'min_y2', 'mean_y2', 'var_y2']
ndvi_list = os.listdir('ndvi')
def retrieve_ndvi(indices, length, year):
            path = 'ndvi/ndvi3g_geo_v1_'
            file_1h = path + year + '_0106.nc4'
            file_2h = path + year + '_0712.nc4' 
            ds_1, ds_2 = np.array(Dataset(file_1h)['ndvi']) , np.array(Dataset(file_2h)['ndvi'])
            max_y1 = np.max(ds_1, axis = 0)[indices].reshape(length,1)
            min_y1 = np.min(ds_1, axis = 0)[indices].reshape(length,1)
            var_y1 = np.var(ds_1, axis = 0)[indices].reshape(length,1)
            mean_y1= np.mean(ds_1, axis = 0)[indices].reshape(length,1)
            max_y2 = np.max(ds_2, axis = 0)[indices].reshape(length,1)
            min_y2 = np.min(ds_2, axis = 0)[indices].reshape(length,1)
            var_y2 = np.var(ds_2, axis = 0)[indices].reshape(length,1)
            mean_y2 = np.mean(ds_2, axis = 0)[indices].reshape(length,1)
            return max_y1, min_y1, mean_y1, var_y1, max_y2, min_y2, mean_y2, var_y2

for i in range(len(df_lists)):
    df_lists[i]['max_y1_ndvi'], df_lists[i]['min_y1_ndvi'], df_lists[i]['mean_y1_ndvi'], df_lists[i]['var_y1_ndvi'], df_lists[i]['max_y2_ndvi'], df_lists[i]['min_y2_ndvi'], df_lists[i]['mean_y2_ndvi'], df_lists[i]['var_y2_ndvi'] = retrieve_ndvi(land_indices, n, years[i])


cannot be safely cast to variable data type
  import sys


In [15]:
print(n)

702138


In [16]:
measures = ['aet', 'def', 'pet', 'ppt', 'srad', 'tmax', 'tmin', 'vap', 'vpd'] 

def extract_nc(indices, length, year, variable):
    path = 'climate/' + year + '/' + variable + '/'
    full_path = path + 'TerraClimate_' + variable +'_' + year + '.nc'
    ds = np.array(Dataset(full_path)[variable])
    max_y1 = np.max(ds, axis = 0)
    min_y1 = np.min(ds, axis = 0)
    var_y1 = np.var(ds, axis = 0)
    mean_y1 = np.mean(ds, axis = 0)
    max_y1 = resize(max_y1, (2160, 4320))[indices].reshape(length,1)
    min_y1 = resize(min_y1, (2160, 4320))[indices].reshape(length,1)
    var_y1 = resize(var_y1, (2160, 4320))[indices].reshape(length,1)
    mean_y1 = resize(mean_y1, (2160, 4320))[indices].reshape(length,1)
    return max_y1, min_y1, var_y1, mean_y1


for i in measures:
    for j in range(len(df_lists)):
        df_lists[j][i+'_max'], df_lists[j][i+'_min'], df_lists[j][i+'_var'], df_lists[j][i+'_mean'] = extract_nc(land_indices, n, years[j], i)
        print(i+","+str(j))

aet,0
aet,1
aet,2
aet,3
aet,4
aet,5
aet,6
def,0
def,1
def,2
def,3
def,4
def,5
def,6
pet,0
pet,1
pet,2
pet,3
pet,4
pet,5
pet,6
ppt,0
ppt,1
ppt,2
ppt,3
ppt,4
ppt,5
ppt,6
srad,0
srad,1
srad,2
srad,3
srad,4
srad,5
srad,6
tmax,0
tmax,1
tmax,2
tmax,3
tmax,4
tmax,5
tmax,6
tmin,0
tmin,1
tmin,2
tmin,3
tmin,4
tmin,5
tmin,6
vap,0
vap,1
vap,2
vap,3
vap,4
vap,5
vap,6
vpd,0
vpd,1
vpd,2
vpd,3
vpd,4
vpd,5
vpd,6


In [None]:
for j in df_lists:
    for i in measures_2:
        j[i+'_lt_max'] = clean_frame_df[i+'_lt_max'] 
        j[i+'_lt_min'] = clean_frame_df[i+'_lt_min'] 
        j[i+'_lt_var'] = clean_frame_df[i+'_lt_var']
        j[i+'_lt_mean'] = clean_frame_df[i+'_lt_mean']

In [17]:
df_95

Unnamed: 0,lat,lon,labels,year,max_y1_ndvi,min_y1_ndvi,mean_y1_ndvi,var_y1_ndvi,max_y2_ndvi,min_y2_ndvi,...,tmin_var,tmin_mean,vap_max,vap_min,vap_var,vap_mean,vpd_max,vpd_min,vpd_var,vpd_mean
0,2469,294,0.000,1995,8125,1355,3456.166667,6.154483e+06,8354,1757,...,72.708703,-2.309421,1.052261,0.190029,0.096392,0.602155,0.655236,0.060001,0.043997,0.257991
1,2467,295,0.000,1995,8234,1013,2970.250000,7.409264e+06,8532,1013,...,71.795190,-2.016761,1.070505,0.200001,0.097947,0.613331,0.648065,0.052740,0.044318,0.257815
2,2468,295,0.000,1995,8164,1789,3760.000000,5.416447e+06,8384,2036,...,72.464317,-2.046715,1.067528,0.199254,0.098399,0.611172,0.656253,0.055240,0.044871,0.261548
3,2466,300,12.671,1995,7921,1602,3387.833333,4.432473e+06,8055,1978,...,70.747906,-1.139078,1.127174,0.220447,0.103092,0.646327,0.695474,0.050030,0.051355,0.270614
4,2467,300,10.419,1995,8364,2066,4057.083333,4.549127e+06,8269,2525,...,69.954438,-1.288086,1.119525,0.214760,0.103172,0.639063,0.691459,0.054466,0.047753,0.268260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
702133,1328,1745,0.000,1995,6965,3985,5416.166667,9.403010e+05,6335,3757,...,7.356412,0.090630,0.912536,0.521160,0.015502,0.732485,0.253341,0.024216,0.006231,0.144536
702134,1334,1745,0.000,1995,7712,5112,6512.333333,8.967406e+05,6732,4542,...,7.688577,1.994188,3.051164,2.665147,0.016046,2.873173,2.427776,2.198685,0.005796,2.319364
702135,1339,1745,0.000,1995,8395,4063,6706.750000,1.273591e+06,7141,4469,...,8.642017,4.519217,4.982084,4.606134,0.015706,4.802324,4.427008,4.136039,0.009057,4.297690
702136,1340,1745,0.000,1995,7581,5566,7030.750000,2.755637e+05,7510,5030,...,8.545514,4.709310,5.216053,4.833997,0.015893,5.028848,4.660798,4.368007,0.008984,4.533811


NameError: name 'feat' is not defined

In [None]:
combined_df = pd.concat([df_85, df_90, df_95, df_00, df_05])
len(combined_df.columns)

In [20]:
df_95.columns

Index(['lat', 'lon', 'labels', 'year', 'max_y1_ndvi', 'min_y1_ndvi',
       'mean_y1_ndvi', 'var_y1_ndvi', 'max_y2_ndvi', 'min_y2_ndvi',
       'mean_y2_ndvi', 'var_y2_ndvi', 'aet_max', 'aet_min', 'aet_var',
       'aet_mean', 'def_max', 'def_min', 'def_var', 'def_mean', 'pet_max',
       'pet_min', 'pet_var', 'pet_mean', 'ppt_max', 'ppt_min', 'ppt_var',
       'ppt_mean', 'srad_max', 'srad_min', 'srad_var', 'srad_mean', 'tmax_max',
       'tmax_min', 'tmax_var', 'tmax_mean', 'tmin_max', 'tmin_min', 'tmin_var',
       'tmin_mean', 'vap_max', 'vap_min', 'vap_var', 'vap_mean', 'vpd_max',
       'vpd_min', 'vpd_var', 'vpd_mean'],
      dtype='object')

In [29]:
feat = ['labels', 'max_y1_ndvi', 'min_y1_ndvi',
       'mean_y1_ndvi', 'var_y1_ndvi', 'max_y2_ndvi', 'min_y2_ndvi',
       'mean_y2_ndvi', 'var_y2_ndvi', 'aet_max', 'aet_min', 'aet_var',
       'aet_mean', 'def_max', 'def_min', 'def_var', 'def_mean', 'pet_max',
       'pet_min', 'pet_var', 'pet_mean', 'ppt_max', 'ppt_min', 'ppt_var',
       'ppt_mean', 'srad_max', 'srad_min', 'srad_var', 'srad_mean', 'tmax_max',
       'tmax_min', 'tmax_var', 'tmax_mean', 'tmin_max', 'tmin_min', 'tmin_var',
       'tmin_mean', 'vap_max', 'vap_min', 'vap_var', 'vap_mean', 'vpd_max',
       'vpd_min', 'vpd_var', 'vpd_mean']
features = feat[1:]

In [33]:
df_4 = df_05[feat] - df_00[feat]
df_3 = df_00[feat] - df_95[feat]
df_2 = df_95[feat] - df_90[feat]
df_1 = df_90[feat] - df_85[feat]
df_1['time'] = 1
df_2['time'] = 2
df_3['time'] = 3
df_4['time'] = 4

In [34]:
combined_df = pd.concat([df_1, df_2, df_3, df_4])

In [35]:
combined_df['labels'].mean()

20.48073398220863

In [40]:
combined_df['labels'][combined_df['labels']>20].count()

389414

In [41]:
combined_df['labels'][combined_df['labels']==0].count()

1771948

In [39]:
combined_df['labels'][combined_df['labels']<-20].count()

225213

In [38]:
combined_df['labels'].count()

2808552

In [None]:
combined_df['labels'][combined_df['labels']]

In [68]:
def apply_label(pixel):
    if pixel <-10 :
        return  -1
    elif pixel > 10:
        return 1
    else:
        return 0

In [69]:
combined_df['irrigated'] = combined_df['labels'].map(apply_label)

In [79]:
test, train = combined_df[combined_df['time'] == 4], combined_df[combined_df['time'] != 4]
train_irr = train[train['irrigated'] !=0]
train_non = train[train['irrigated'] == 0]
train_non = train_non.sample(frac=0.05, replace=False, random_state=1)
train = pd.concat([train_irr, train_non])

test_labels, train_labels = test['irrigated'], train['irrigated']
test_data, train_data = test[features], train[features]

In [74]:
train_labels.value_counts()

 0    471746
 1    361776
-1    172153
Name: irrigated, dtype: int64

In [71]:
combined_df['irrigated']

0         0
1         0
2         0
3         0
4         0
         ..
702133    0
702134    0
702135    0
702136    0
702137    0
Name: irrigated, Length: 2808552, dtype: int64

In [None]:
trai

In [80]:
clf_unb = RandomForestClassifier(n_estimators=100 , min_samples_leaf=2, n_jobs=ncores)
%time clf_unb.fit(train_data, train_labels)

CPU times: user 12min 55s, sys: 171 ms, total: 12min 55s
Wall time: 28.8 s


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=2, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=32, oob_score=False, random_state=None, verbose=0,
                       warm_start=False)

In [81]:
%time predictions= clf_unb.predict(test_data)
print("acc",accuracy_score(test_labels, predictions))
print("kappa", cohen_kappa_score(test_labels, predictions))
print(classification_report(test_labels, predictions))

CPU times: user 14.7 s, sys: 236 ms, total: 14.9 s
Wall time: 862 ms
acc 0.5107927501431343
kappa 0.020124390003934844
              precision    recall  f1-score   support

          -1       0.15      0.02      0.04     87646
           0       0.72      0.62      0.67    504623
           1       0.18      0.41      0.25    109869

    accuracy                           0.51    702138
   macro avg       0.35      0.35      0.32    702138
weighted avg       0.57      0.51      0.52    702138



In [None]:
combined_df['labels'].max()

In [None]:
combined_df['irrigated'] = combined_df['labels'].map(apply_label)

In [None]:
combined_df['irrigated'].shape

In [None]:
combined_df['irrigated'].value_counts()

In [None]:
# combined_df['irrigated'] = [1 if x > 100 else 0 for x in combined_df['labels']]

In [None]:
combined_df[combined_df['irrigated'] == 1].shape

In [None]:
data = combined_df[features]
labels = combined_df['labels']
ncores = os.cpu_count()
train_data_unb, test_data_unb, train_labels_unb, test_labels_unb = train_test_split(data, labels, test_size=0.7)
clf_unb = RandomForestRegressor(n_estimators=20, max_depth=20, min_samples_leaf = 50)


In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
%time clf_unb.fit(train_data_unb, train_labels_unb)

In [None]:
%time predictions_unb = clf_unb.predict(test_data_unb)
from sklearn.metrics import mean_squared_error

print("mse",mean_squared_error(test_labels_unb, predictions_unb)))

In [None]:
data = combined_df[features]
labels = combined_df['irrigated']

train_data_unb, test_data_unb, train_labels_unb, test_labels_unb = train_test_split(data, labels, test_size=0.2)
clf_unb = RandomForestClassifier(n_estimators=100 , min_samples_leaf=10, max_depth=50, n_jobs=ncores)
%time clf_unb.fit(train_data_unb, train_labels_unb)
# Train the Classifier to take the training features and learn how they relate
# to the training y (the species)


In [None]:
%time predictions_unb = clf_unb.predict(test_data_unb)
print("acc",accuracy_score(test_labels_unb, predictions_unb))
print("kappa", cohen_kappa_score(test_labels_unb, predictions_unb))
print(classification_report(test_labels_unb, predictions_unb))

In [None]:
combined_df.columns

In [None]:
test, train = combined_df[combined_df['year'] == '2005'], combined_df[combined_df['year'] != '2005']
test_labels, train_labels = test['irrigated'], train['irrigated']
test_data, train_data = test[features], train[features]

In [None]:
clf_unb = RandomForestClassifier(n_estimators=100 , min_samples_leaf=10, max_depth=50, n_jobs=ncores)
%time clf_unb.fit(train_data, train_labels)

In [None]:
%time predictions = clf_unb.predict(test_data)
print("acc",accuracy_score(test_labels, predictions))
print("kappa", cohen_kappa_score(test_labels, predictions))
print(classification_report(test_labels, predictions))

In [None]:
train_irr = train[train['irrigated'] !=0]
train_non = train[train['irrigated'] == 0]
train_non = train_non.sample(frac=0.5, replace=False, random_state=1)
train = pd.concat([train_irr, train_non])

test_labels, train_labels = test['irrigated'], train['irrigated']
test_data, train_data = test[features], train[features]

clf_unb = ExtraTreesClassifier(n_estimators=500, min_samples_leaf=2, max_depth=50, n_jobs=ncores)

# Train the Classifier to take the training features and learn how they relate
# to the training y (the species)
%time clf_unb.fit(train_data, train_labels)

In [None]:
%time predictions= clf_unb.predict(test_data)
print("acc",accuracy_score(test_labels, predictions))
print("kappa", cohen_kappa_score(test_labels, predictions))
print(classification_report(test_labels, predictions))

In [None]:
test, train = combined_df[combined_df['year'] == '2005'], combined_df[combined_df['year'] != '2005']
train_irr = train[train['irrigated'] == 1]
train_non = train[train['irrigated'] == 0]
train_non = train_non.sample(frac=0.5, replace=False, random_state=1)
train = pd.concat([train_irr, train_non])
test_labels, train_labels = test['irrigated'], train['irrigated']
test_data, train_data = test[features], train[features]

In [None]:
clf_unb = ExtraTreesClassifier(n_estimators=500, min_samples_leaf=2, max_depth=50, n_jobs=ncores)

# Train the Classifier to take the training features and learn how they relate
# to the training y (the species)
%time clf_unb.fit(train_data, train_labels)
%time predictions= clf_unb.predict(test_data)

In [None]:
sample_df = combined_df.sample(n=2000000, random_state=1)

In [None]:
sample_df.shape

In [None]:


test, train = combined_df[combined_df['year'] == '2005'], combined_df[combined_df['year'] != '2005']

test_labels, train_labels = test['labels'], train['labels']
test_data, train_data = test[features], train[features]





In [None]:
clf_unb = RandomForestRegressor(n_estimators=50, max_depth=50, min_samples_leaf=2, n_jobs=ncores)
%time clf_unb.fit(train_data, train_labels)

In [None]:
%time predictions_unb = clf_unb.predict(test_data_unb)

print("mse",mean_squared_error(test_labels, predictions_unb))

In [None]:
import sklearn.linear_model

In [None]:
clf_unb = LogisticRegression(penalty='l2', tol=0.0001, C=1.0, n_jobs=ncores)
%time clf_unb.fit(train_data, train_labels)

In [None]:
lab_2000 = combined_df[combined_df['year'] == '2000']['labels']
lab_2005 = combined_df[combined_df['year'] == '2005']['labels']

In [None]:
lab_2000

In [None]:
print("mse",mean_squared_error(lab_2000, lab_2005))

In [None]:
clean_frame_df['2005_binary'] = [1 if x > 100 else 0 for x in clean_frame_df['2005']]
clean_frame_df['2000_binary'] = [1 if x > 100 else 0 for x in clean_frame_df['2000']]
clean_frame_df['1995_binary'] = [1 if x > 100 else 0 for x in clean_frame_df['1995']]
clean_frame_df['1990_binary'] = [1 if x > 100 else 0 for x in clean_frame_df['1990']]
clean_frame_df['1985_binary'] = [1 if x > 100 else 0 for x in clean_frame_df['1985']]

In [None]:
clean_frame_df['predictions'][clean_frame_df['predictions']==1].count() 

In [None]:
clean_frame_df['predictions'][clean_frame_df['predictions']==1].count() 

In [None]:
clean_frame_df['predictions'][clean_frame_df['predictions']==1].count() 

In [None]:
clean_frame_df['2005_binary'][clean_frame_df['2005_binary']==1].count() 

In [None]:
clean_frame_df['2000_binary'][clean_frame_df['2000_binary']==1].count() 

In [None]:
clean_frame_df['2005_binary'][clean_frame_df['2005_binary']==1].count() 

In [None]:
clean_frame_df['2005_binary'][clean_frame_df['2005_binary']==1].count() 

In [None]:
clean_frame_df['1990_binary'][clean_frame_df['1990_binary']==1].count() 

In [None]:
clean_frame_df['1985_binary'][clean_frame_df['1985_binary']==1].count() 

In [None]:
clean_frame_df['1990_binary'][clean_frame_df['1990_binary']==1].count() 

In [None]:
clean_frame_df['2000_binary'][clean_frame_df['2000_binary']==1].count() 

In [None]:
df_import = pd.DataFrame()
df_import['feature'] = features
df_import['importances'] = clf_unb.feature_importances_
df_import=df_import.sort_values('importances', ascending=False)
df_import.head(n = 10)