In [27]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import warnings
warnings.simplefilter('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Create Meta Dataset

**Features:**


* **v1**
    - Area
    - Subregion (one-hot encoded)
    
* **v2**:
    - X, Y coordinates
    - 5 Nearest Neighbor features
        - `Area` - Area of neighbour
        - `Distance` - Distance to neighbour
        - `class count` - Count of surrounding classes
    ***v2.1**:
        - 10 nearest neighbour features
        - `nn_count` - Count of neighbours in 500m
    

## Set Dataset Version

In [28]:
dataset_version = 'v2.1'

### Load Data

In [50]:
import numpy as np
import pandas as pd

from src.utils import read_shapefile

train_shp = read_shapefile('train')
test_shp = read_shapefile('test')

# project to GMT for Lat Long coords
train_shp = train_shp.to_crs({'init': 'epsg:4326'})
test_shp = test_shp.to_crs({'init': 'epsg:4326'})

train_shp.head()

Unnamed: 0_level_0,Area,Subregion,Crop_Id_Ne,geometry,y
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.915905,3,8,POLYGON ((22.16935378930472 -28.97707248953319...,Vineyard
2,2.06441,3,6,POLYGON ((22.17427414231474 -28.97676532206398...,Pecan
3,1.0803,3,8,POLYGON ((22.17341929705008 -28.97728122949355...,Vineyard
4,1.31619,3,8,POLYGON ((22.17588169099293 -28.97691670488457...,Vineyard
7,5.52922,3,8,"POLYGON ((22.1762250942478 -28.97449847161998,...",Vineyard


In [51]:
# Convert to Pandas
train_df = pd.DataFrame(train_shp[['Area','Subregion']])

train_labels = train_shp.y
labels_one_hot = pd.get_dummies(train_labels)

test_df = test_shp[['Area','Subregion']]

### One-hot Encode Subregion

In [52]:
from category_encoders import OneHotEncoder

encoder = OneHotEncoder(use_cat_names=True, return_df=True, cols=['Subregion'])

train_df = encoder.fit_transform(train_df)
test_df = encoder.transform(test_df)

train_df.head()

Unnamed: 0_level_0,Area,Subregion_3.0,Subregion_1.0,Subregion_4.0,Subregion_2.0,Subregion_5.0,Subregion_6.0,Subregion_0.0,Subregion_7.0
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.915905,1,0,0,0,0,0,0,0
2,2.06441,1,0,0,0,0,0,0,0
3,1.0803,1,0,0,0,0,0,0,0
4,1.31619,1,0,0,0,0,0,0,0
7,5.52922,1,0,0,0,0,0,0,0


## Nearest Neighbour Analysis

Find the 5 closest farms _in the training set_ and return the following information:

- distance to farm centroid
- farm class
- farm area
- farm subregion

In [53]:
from pysal.lib.cg import KDTree, RADIUS_EARTH_KM

In [54]:
# Add centroids
train_shp['centroid'] = train_shp.geometry.centroid
test_shp['centroid'] = test_shp.geometry.centroid

# Add centroid coords
train_df['lat'] = train_shp.centroid.x
train_df['lon'] = train_shp.centroid.y

test_df['lat'] = test_shp.centroid.x
test_df['lon'] = test_shp.centroid.y

train_df.head()

Unnamed: 0_level_0,Area,Subregion_3.0,Subregion_1.0,Subregion_4.0,Subregion_2.0,Subregion_5.0,Subregion_6.0,Subregion_0.0,Subregion_7.0,lat,lon
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0.915905,1,0,0,0,0,0,0,0,22.170005,-28.977219
2,2.06441,1,0,0,0,0,0,0,0,22.174372,-28.97595
3,1.0803,1,0,0,0,0,0,0,0,22.172834,-28.977781
4,1.31619,1,0,0,0,0,0,0,0,22.175138,-28.977433
7,5.52922,1,0,0,0,0,0,0,0,22.175166,-28.973708


In [55]:
# Create a KDTree from the farms in the training set
# Use earth's radius in meters
train_kdtree = KDTree(train_df[['lat','lon']].values, distance_metric='ARC', radius=RADIUS_EARTH_KM*1000)

coords = tuple(test_df.iloc[0][['lat','lon']])

In [56]:
train_kdtree.query_ball_point(coords, r=500)

[2245, 1, 3, 4, 5, 6, 12, 2248, 2493, 2, 47]

In [57]:
import time
from multiprocessing import Pool
from functools import partial

nn_count_dist = 500

def get_neighbours(farm_id, dataset, k):
    """
    Get the neighbours in the training set
    """
    
    train = dataset=='train'
    test = dataset=='test'
    
    # Get the coordinates of this farm
    if train:
        coords = tuple(train_df[['lat','lon']].loc[farm_id])
    elif test:
        coords = tuple(test_df[['lat','lon']].loc[farm_id])
    else:
        raise ValueError('Cannot find ID: {} in dataset: {}'.format(farm_id, dataset))
    
    # Get closest farms
    distances, indexes = train_kdtree.query(coords, k=k+1)
    
    # The training set will always return this farm as the first (closest) farm
    # So need to remove it from the list
    if train:
        distances = distances[1:]
        indexes = indexes[1:]
    else:
        distances = distances[:-1]
        indexes = indexes[:-1]
    

    neighbours_df = train_df[['Area']].iloc[indexes]
    neighbours_df['distance'] = distances

    # Some index magic
    neighbours_df.index=map(str,range(len(neighbours_df)))

    # Flatten the neighbours data
    n_flat = neighbours_df.unstack().to_frame().T
    n_flat.columns = n_flat.columns.map('_'.join)

    labels_count = labels_one_hot.iloc[indexes].sum(axis=0)
    # Use name 0 for merging with neighbours data
    labels_count.name = 0
    
    result = n_flat.join(labels_count.to_frame().T)
    
    # Get the number of neighbours within a certain radius
    result['nn_count'] = len(train_kdtree.query_ball_point(coords, r=nn_count_dist))
    
    result.index = [farm_id]

    return result
    

## Multi process all the things!

In [58]:
def knn_features(ids_list, dataset, k):
    
    partials = partial(get_neighbours, dataset=dataset, k=k)
    
    pool = Pool(processes=10)
    result = pool.map_async(partials, ids_list)

    i = 0
    t_start = time.time()
    while not result.ready():
        if i == 10:
            i=0
            e = int(time.time() - t_start)
            print('\n Elapsed: {:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60), e % 60))
        print('.',end='')
        i += 1
        time.sleep(0.5)
        
    df = pd.concat(result.get(), axis=0)
    
    return df

## Generate KNN Features

### Run for train set

In [59]:
%%time

k=10

print('-'*25,'Train','-'*25)
train_ids = train_df.index.to_list()
train_knn_features = knn_features(train_ids, dataset='train', k=k)

print('\n\n')
print('-'*25,'Test','-'*25)
test_ids = test_df.index.to_list()
test_knn_features = knn_features(test_ids, dataset='test', k=k)

------------------------- Train -------------------------
........


------------------------- Test -------------------------
....CPU times: user 3.09 s, sys: 385 ms, total: 3.47 s
Wall time: 7.36 s


## Join KNN Features with others

In [60]:
pd.set_option('display.max_columns',None)

train_features_data = train_df.join(train_knn_features)
test_features_data = test_df.join(test_knn_features)

# Show head of train_features
train_features_data.head()

Unnamed: 0_level_0,Area,Subregion_3.0,Subregion_1.0,Subregion_4.0,Subregion_2.0,Subregion_5.0,Subregion_6.0,Subregion_0.0,Subregion_7.0,lat,lon,Area_0,Area_1,Area_2,Area_3,Area_4,Area_5,Area_6,Area_7,Area_8,Area_9,distance_0,distance_1,distance_2,distance_3,distance_4,distance_5,distance_6,distance_7,distance_8,distance_9,Cotton,Dates,Grass,Lucern,Maize,Pecan,Vacant,Vineyard,"Vineyard & Pecan (""Intercrop"")",nn_count
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
1,0.915905,1,0,0,0,0,0,0,0,22.170005,-28.977219,1.0803,0.774865,1.81556,4.62284,2.06441,5.82118,1.31619,1.59031,2.03598,1.43812,282.149095,309.726013,357.634133,443.962361,447.60952,467.795875,499.838735,582.179327,589.531139,606.000154,2,0,0,1,0,2,0,4,1,8
2,2.06441,1,0,0,0,0,0,0,0,22.174372,-28.97595,1.81556,1.43812,1.31619,4.62284,1.0803,5.52922,1.48271,0.774865,2.03598,0.915905,90.197077,163.249105,180.910239,197.627907,252.603794,261.082736,317.420623,364.403883,440.843881,447.60952,0,0,0,1,0,1,0,7,1,13
3,1.0803,1,0,0,0,0,0,0,0,22.172834,-28.977781,1.81556,1.31619,2.06441,5.82118,0.915905,4.62284,0.774865,1.43812,1.59031,5.52922,182.14984,227.423921,252.603794,255.491683,282.149095,373.366213,400.470025,410.444118,483.619231,506.554948,2,0,0,1,0,2,0,4,1,10
4,1.31619,1,0,0,0,0,0,0,0,22.175138,-28.977433,2.06441,1.81556,1.0803,1.43812,5.82118,4.62284,5.52922,1.48271,0.915905,0.774865,180.910239,205.428381,227.423921,264.680854,316.604451,377.641327,414.274624,426.015864,499.838735,515.230647,1,0,0,1,0,2,0,5,1,10
7,5.52922,1,0,0,0,0,0,0,0,22.175166,-28.973708,1.48271,1.43812,4.62284,2.06441,1.01882,2.43183,2.03598,1.81556,1.31619,0.774865,131.300151,166.446162,200.212759,261.082736,293.414491,293.785881,304.353489,325.649965,414.274624,418.595577,0,0,0,1,0,2,0,6,1,12


In [61]:
train_features_data.tail()

Unnamed: 0_level_0,Area,Subregion_3.0,Subregion_1.0,Subregion_4.0,Subregion_2.0,Subregion_5.0,Subregion_6.0,Subregion_0.0,Subregion_7.0,lat,lon,Area_0,Area_1,Area_2,Area_3,Area_4,Area_5,Area_6,Area_7,Area_8,Area_9,distance_0,distance_1,distance_2,distance_3,distance_4,distance_5,distance_6,distance_7,distance_8,distance_9,Cotton,Dates,Grass,Lucern,Maize,Pecan,Vacant,Vineyard,"Vineyard & Pecan (""Intercrop"")",nn_count
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
3598,4.66092,0,0,0,0,0,0,0,1,21.416851,-28.433257,1.94837,4.82975,1.67616,7.54383,9.91648,7.10855,4.03062,1.43219,8.73261,2.31705,201.854948,308.376576,375.441834,376.186829,447.887561,467.866076,478.49189,493.587258,494.313,495.7403,0,0,0,1,2,4,1,2,0,11
3599,4.82975,0,0,0,0,0,0,0,1,21.413747,-28.43375,1.43219,4.66092,2.31705,1.4755,7.10855,1.94837,2.12326,8.73261,3.44555,7.54383,303.47612,308.376576,331.410912,409.450371,448.940683,498.535324,530.820062,585.774191,645.337099,650.294734,0,0,0,1,2,3,2,2,0,7
3600,7.54383,0,0,0,0,0,0,0,1,21.420298,-28.434759,1.94837,4.03062,9.91648,1.80795,4.66092,8.73261,2.06707,1.7149,1.67616,1.42959,182.797565,231.487533,249.134407,356.323159,376.186829,400.668203,430.290413,438.774659,443.252225,521.24813,0,0,0,2,4,3,0,1,0,10
3602,5.14485,0,0,0,0,0,1,0,0,21.448342,-28.412573,2.88294,1.60659,3.07007,3.67649,3.20698,1.19806,3.44046,2.99134,1.83395,2.98785,168.410991,238.44778,311.524217,321.398891,323.37719,328.500811,351.189946,373.044312,374.644096,378.313791,0,0,0,1,0,0,2,7,0,15
3603,0.774865,1,0,0,0,0,0,0,0,22.170977,-28.974566,4.62284,2.03598,0.915905,1.81556,0.565466,2.06441,2.43183,1.0803,5.52922,1.43812,225.378437,283.274187,309.726013,309.911653,349.685868,364.403883,364.603347,400.470025,418.595577,468.465873,0,0,0,1,0,1,0,7,1,11


### Cluster lat long

In [62]:
# from sklearn.cluster import DBSCAN

In [63]:
# TODO

## Save Raw Dataset

In [64]:
from pathlib import Path
from src.utils import safe_create_dir

from config import processed_data_dir

out_dir = processed_data_dir / 'meta_data' / dataset_version
safe_create_dir(out_dir)

train_features_data.to_csv(out_dir / 'train.csv')
test_features_data.to_csv(out_dir / 'test.csv')

In [65]:
# Save to references for Stefan :D
ref_dir = Path('../references/')
out_dir = ref_dir / 'meta_data' / dataset_version
safe_create_dir(out_dir)

train_features_data.to_csv(out_dir / 'train.csv')
test_features_data.to_csv(out_dir / 'test.csv')

## Dataset Processing 

### Feature Scaling

In [66]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

cols = ['Area', 'lat', 'lon', 'nn_count'] + [c for c in train_features_data.columns if 'distance' in c] + ['Cotton','Dates','Grass','Lucern','Maize','Pecan','Vacant','Vineyard','Vineyard & Pecan ("Intercrop")']

train_features_data[cols] = scaler.fit_transform(train_features_data[cols])
test_features_data[cols] = scaler.transform(test_features_data[cols])

train_features_data[cols].head()

Unnamed: 0_level_0,Area,lat,lon,nn_count,distance_0,distance_1,distance_2,distance_3,distance_4,distance_5,distance_6,distance_7,distance_8,distance_9,Cotton,Dates,Grass,Lucern,Maize,Pecan,Vacant,Vineyard,"Vineyard & Pecan (""Intercrop"")"
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1,-0.589766,1.739183,-1.652844,-1.102456,2.459255,1.570768,1.426737,1.721516,1.299743,1.096452,0.995017,1.2638,1.034786,0.906862,0.990434,-0.146653,-0.482659,-0.58047,-0.672012,1.226142,-0.779975,0.106967,0.889687
2,-0.157671,1.761063,-1.646056,-0.207195,-0.55707,-0.156189,-0.379945,-0.511104,-0.304332,-0.472433,-0.283296,-0.170441,0.119416,-0.019905,-0.412259,-0.146653,-0.482659,-0.58047,-0.672012,0.357886,-0.779975,1.340142,0.889687
3,-0.527917,1.753355,-1.655849,-0.744352,0.887871,0.600429,0.352992,0.013337,-0.061299,0.379761,0.298681,0.132774,0.382756,0.324994,0.990434,-0.146653,-0.482659,-0.58047,-0.672012,1.226142,-0.779975,0.106967,0.889687
4,-0.439169,1.764898,-1.65399,-0.744352,0.868392,0.341103,0.095573,0.096621,0.222124,0.412208,0.395418,0.235328,0.482609,0.375756,0.289088,-0.146653,-0.482659,-0.58047,-0.672012,1.226142,-0.779975,0.518026,0.889687
7,1.145875,1.765042,-1.634054,-0.386247,0.088822,-0.118496,-0.182612,0.06401,0.031368,-0.224227,-0.374865,-0.425669,-0.044154,-0.189669,-0.412259,-0.146653,-0.482659,-0.58047,-0.672012,1.226142,-0.779975,0.929084,0.889687


### Save Scaled Data

In [67]:
out_dir = processed_data_dir / 'meta_data' / dataset_version
safe_create_dir(out_dir)

train_features_data.to_csv(out_dir / 'train_scaled.csv')
test_features_data.to_csv(out_dir / 'test_scaled.csv')


# Save to references for Stefan :D
ref_dir = Path('../references/')
out_dir = ref_dir / 'meta_data' / dataset_version
safe_create_dir(out_dir)

train_features_data.to_csv(out_dir / 'train_scaled.csv')
test_features_data.to_csv(out_dir / 'test_scaled.csv')

In [68]:
train_features_data.tail()

Unnamed: 0_level_0,Area,Subregion_3.0,Subregion_1.0,Subregion_4.0,Subregion_2.0,Subregion_5.0,Subregion_6.0,Subregion_0.0,Subregion_7.0,lat,lon,Area_0,Area_1,Area_2,Area_3,Area_4,Area_5,Area_6,Area_7,Area_8,Area_9,distance_0,distance_1,distance_2,distance_3,distance_4,distance_5,distance_6,distance_7,distance_8,distance_9,Cotton,Dates,Grass,Lucern,Maize,Pecan,Vacant,Vineyard,"Vineyard & Pecan (""Intercrop"")",nn_count
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
3598,0.819199,0,0,0,0,0,0,0,1,-2.034321,1.257978,1.94837,4.82975,1.67616,7.54383,9.91648,7.10855,4.03062,1.43219,8.73261,2.31705,1.197517,1.554858,1.608789,1.107241,1.30203,1.096984,0.845427,0.680344,0.44859,0.261716,-0.412259,-0.146653,-0.482659,-0.58047,0.48825,2.962654,-0.011706,-0.715149,-0.405631,-0.5653
3599,0.882717,0,0,0,0,0,0,0,1,-2.04987,1.25534,1.43219,4.66092,2.31705,1.4755,7.10855,1.94837,2.12326,8.73261,3.44555,7.54383,2.794387,1.554858,1.158652,1.408721,1.310693,1.329754,1.212122,1.287475,1.378347,1.166036,-0.412259,-0.146653,-0.482659,-0.58047,0.48825,2.094398,0.756563,-0.715149,-0.405631,-1.281508
3600,1.90382,0,0,0,0,0,0,0,1,-2.017049,1.24994,1.94837,4.03062,9.91648,1.80795,4.66092,8.73261,2.06707,1.7149,1.67616,1.42959,0.89805,0.648339,0.317524,0.92721,0.712235,0.586975,0.507651,0.319356,0.134242,0.410966,-0.412259,-0.146653,-0.482659,-0.077799,1.648512,2.094398,-0.779975,-1.126207,-0.405631,-0.744352
3602,1.001266,0,0,0,0,0,1,0,0,-1.876541,1.368658,2.88294,1.60659,3.07007,3.67649,3.20698,1.19806,3.44046,2.99134,1.83395,2.98785,0.67198,0.7304,0.955347,0.610678,0.277835,0.039248,-0.046654,-0.113536,-0.288133,-0.425364,-0.412259,-0.146653,-0.482659,-0.58047,-0.672012,-0.51037,0.756563,1.340142,-0.405631,0.150909
3603,-0.642829,1,0,0,0,0,0,0,0,1.74405,-1.638649,4.62284,2.03598,0.915905,1.81556,0.565466,2.06441,2.43183,1.0803,5.52922,1.43812,1.567164,1.258902,0.936963,0.506565,0.494244,0.31174,0.047342,0.067086,-0.017553,0.102129,-0.412259,-0.146653,-0.482659,-0.58047,-0.672012,0.357886,-0.779975,1.340142,0.889687,-0.5653


In [49]:
nans = lambda df: df[df.isnull().any(axis=1)]
nans(test_features_data)

Unnamed: 0_level_0,Area,Subregion_3.0,Subregion_1.0,Subregion_4.0,Subregion_2.0,Subregion_5.0,Subregion_6.0,Subregion_0.0,Subregion_7.0,lat,lon,Area_0,Area_1,Area_2,Area_3,Area_4,Area_5,Area_6,Area_7,Area_8,Area_9,distance_0,distance_1,distance_2,distance_3,distance_4,distance_5,distance_6,distance_7,distance_8,distance_9,Cotton,Dates,Grass,Lucern,Maize,Pecan,Vacant,Vineyard,"Vineyard & Pecan (""Intercrop"")",nn_count
Field_Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
