## Spatial Features Vs Urban Footprint Vs Population Size

In [1]:
import sklearn
import pandas as pd
import numpy as np
import csv
import scipy.stats as stats
from statistics import pstdev
from statistics import mean
from sklearn import preprocessing
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import ElasticNet, Lasso
from sklearn.datasets import make_regression
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from itertools import product
import copy


### Read the Features data

In [2]:
!ls spfeas

ls: spfeas: No such file or directory


In [3]:
spfeas = pd.read_excel('sri_spfeas_v3.xlsx', sheet_name=0)

In [4]:
spfeas['gid'] = spfeas['gid'].astype(int)
spfeas = spfeas.set_index('gid')

In [5]:
spfeas.head()

Unnamed: 0_level_0,fid,gnd_n,gnd_c,fourier_sc31_mean_mean,fourier_sc31_mean_std,fourier_sc31_mean_sum,fourier_sc31_variance_mean,fourier_sc31_variance_std,fourier_sc31_variance_sum,fourier_sc51_mean_mean,...,sfs_sc71_min_ll_sum,sfs_sc71_roa_mean,sfs_sc71_roa_std,sfs_sc71_roa_sum,sfs_sc71_std_mean,sfs_sc71_std_std,sfs_sc71_std_sum,sfs_sc71_w_mean_mean,sfs_sc71_w_mean_std,sfs_sc71_w_mean_sum
gid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
43,0,Karagaskada South,2148065,69.453586,3.136141,237670.2,18.62913,0.97624,63748.88,78.4939,...,3933,136.394391,0.938602,466878,16.233955,3.64845,55568.828125,1.228705,0.707158,4205.858398
564,1,Panvila,2115070,59.535572,7.845283,1400872.0,17.086123,2.204723,402036.5,69.651317,...,40076188,137.910516,2.093903,3245724,14.498129,5.118067,341213.46875,2.838843,3.471181,66812.164062
7917,2,Ganepalla,9227135,50.730112,5.682764,3625884.0,14.450501,2.109653,1032835.0,60.710447,...,7900571136,139.556597,2.014015,9974110,9.856942,7.202832,704475.625,3.388618,5.864246,242184.515625
2263,3,Akkaraipattu 11,5236065,73.670361,1.821871,97318.55,18.843218,1.23169,24891.89,82.54471,...,1331,136.077853,0.267941,180031,10.558998,4.329045,13969.554688,0.514668,0.239074,680.906372
8,4,Delgasthenna,2109125,66.878083,2.710715,265773.5,18.708531,1.015577,74347.7,75.957167,...,4809,136.681028,1.37759,542487,15.218429,4.46516,60401.945312,1.180995,0.868924,4687.369141


### Load GUF

In [6]:
guf = pd.read_csv('sri_ghs.csv')

In [7]:
guf['gid'] = guf['gid'].astype(int)
guf = guf.set_index('gid')
guf.head()

Unnamed: 0_level_0,gnd_n,gnd_c,prov_c,dis_c,dsd_c,ghs_count,ghs_sum,ghs_mean
gid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
43,Karagaskada South,2148065,2,21,2148,235,635,2.702128
564,Panvila,2115070,2,21,2115,1626,6326,3.890529
7917,Ganepalla,9227135,9,92,9227,4929,6929,1.405762
2263,Akkaraipattu 11,5236065,5,52,5236,89,5889,66.168539
8,Delgasthenna,2109125,2,21,2109,274,2874,10.489051


### Merge population data with urbanfootprint and featuures

In [8]:
#guf['builtup_pct'] = guf['ghs_sum']/guf['ghs_count']
#guf.describe()

In [9]:
#spfeas_world_pop_merged = spfeas.merge(world_pop, left_on='gid', right_on='gid', how='outer')
spfeas_guf = spfeas.merge(guf, left_on='gid', right_on="gid", how='outer')

In [10]:
spfeas_guf = spfeas_guf.round(3)
spfeas_guf.head()

Unnamed: 0_level_0,fid,gnd_n_x,gnd_c_x,fourier_sc31_mean_mean,fourier_sc31_mean_std,fourier_sc31_mean_sum,fourier_sc31_variance_mean,fourier_sc31_variance_std,fourier_sc31_variance_sum,fourier_sc51_mean_mean,...,sfs_sc71_w_mean_std,sfs_sc71_w_mean_sum,gnd_n_y,gnd_c_y,prov_c,dis_c,dsd_c,ghs_count,ghs_sum,ghs_mean
gid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
43,0,Karagaskada South,2148065,69.454,3.136,237670.172,18.629,0.976,63748.883,78.494,...,0.707,4205.858,Karagaskada South,2148065,2,21,2148,235,635,2.702
564,1,Panvila,2115070,59.536,7.845,1400872.0,17.086,2.205,402036.469,69.651,...,3.471,66812.164,Panvila,2115070,2,21,2115,1626,6326,3.891
7917,2,Ganepalla,9227135,50.73,5.683,3625884.0,14.451,2.11,1032835.125,60.71,...,5.864,242184.516,Ganepalla,9227135,9,92,9227,4929,6929,1.406
2263,3,Akkaraipattu 11,5236065,73.67,1.822,97318.547,18.843,1.232,24891.891,82.545,...,0.239,680.906,Akkaraipattu 11,5236065,5,52,5236,89,5889,66.169
8,4,Delgasthenna,2109125,66.878,2.711,265773.5,18.709,1.016,74347.703,75.957,...,0.869,4687.369,Delgasthenna,2109125,2,21,2109,274,2874,10.489


### Filter Dataset by Builtup Surface. 

Select Rows where  builtup is greater than or equal to 10 percent

In [11]:
#filter_by_builtup = spfeas_guf_wp[spfeas_guf_wp['ghs_mean']>=10]

In [12]:
#filter_by_builtup.shape

In [13]:
#np.sum(filter_by_builtup['stats_sum_y'])

In [14]:
#pop_density = filter_by_builtup['stats_sum_y']/filter_by_builtup['GN_area']

In [15]:
#pop_count = filter_by_builtup['LKA_ppp_v2b_2015_UNadj_sum']
#np.sum(pop_count)

In [16]:
# import matplotlib.pyplot as plt
# plt.subplots(figsize=(8,8))
# y = pop_count
# x = filter_by_builtup['ghs_mean']
# plt.scatter(x, y)

# z = np.polyfit(x, y, 1)
# p = np.poly1d(z)
# plt.plot(x,p(x),"r--")
# plt.show()

In [17]:
#filter_by_builtup.shape

In [18]:
spfeas_guf.shape

(14021, 434)

### Analysis

Get the list of dependent variables from the DataFrame to store in list y_vars

In [19]:
#y_var = list(filter_by_builtup.axes[1])[440]

y_var = list(spfeas_guf.axes[1])[433]
y_var

'ghs_mean'

Get a list of all independent variables from the DataFrame in list all_x

In [20]:
all_x = list(spfeas_guf.axes[1])[3:423]

#Check
all_x

['fourier_sc31_mean_mean',
 'fourier_sc31_mean_std',
 'fourier_sc31_mean_sum',
 'fourier_sc31_variance_mean',
 'fourier_sc31_variance_std',
 'fourier_sc31_variance_sum',
 'fourier_sc51_mean_mean',
 'fourier_sc51_mean_std',
 'fourier_sc51_mean_sum',
 'fourier_sc51_variance_mean',
 'fourier_sc51_variance_std',
 'fourier_sc51_variance_sum',
 'fourier_sc71_mean_mean',
 'fourier_sc71_mean_std',
 'fourier_sc71_mean_sum',
 'fourier_sc71_variance_mean',
 'fourier_sc71_variance_std',
 'fourier_sc71_variance_sum',
 'gabor_sc3_filter_1_mean',
 'gabor_sc3_filter_1_std',
 'gabor_sc3_filter_1_sum',
 'gabor_sc3_filter_10_mean',
 'gabor_sc3_filter_10_std',
 'gabor_sc3_filter_10_sum',
 'gabor_sc3_filter_11_mean',
 'gabor_sc3_filter_11_std',
 'gabor_sc3_filter_11_sum',
 'gabor_sc3_filter_12_mean',
 'gabor_sc3_filter_12_std',
 'gabor_sc3_filter_12_sum',
 'gabor_sc3_filter_13_mean',
 'gabor_sc3_filter_13_std',
 'gabor_sc3_filter_13_sum',
 'gabor_sc3_filter_14_mean',
 'gabor_sc3_filter_14_std',
 'gabor_sc3

### Compute Coorelation of features with population density

Store all features with the least correlation (stat. significance (p < 0.05)) 

The Pearson correlation coefficient **measures the linear relationship
between two datasets.** Strictly speaking, Pearson's correlation requires
that each dataset be **normally distributed, and not necessarily zero-mean.**

Like other correlation coefficients, this one varies between -1 and +1
with 0 implying no correlation. Correlations of -1 or +1 imply an exact
linear relationship. Positive correlations imply that as x increases, so
does y. Negative correlations imply that as x increases, y decreases.

The p-value roughly indicates **the probability of an uncorrelated system**
producing datasets that have a Pearson correlation at least as extreme
as the one computed from these datasets. 

***The p-values are not entirely
reliable but are probably reasonable for datasets larger than 500 or so.***

In [21]:
spfeas_guf[y_var] = spfeas_guf[y_var].fillna(0)
spfeas_guf[y_var].isnull().values.any()

False

In [22]:
y_dict = {}
x = []

for x_var in all_x:
    
    #Calculate the Pearson statistics, 
    # returns the Pearson value and p value
    
    p = stats.skew(spfeas_guf[x_var])
    
    #print(y_var, x_var, p)
    # print back for mike
    #print (y_var + " , " + x_var + " , " +  p)

In [42]:
y_dict = {}
x = []

for x_var in all_x:
    
    #Calculate the Pearson statistics, 
    # returns the Pearson value and p value
    
    p = stats.pearsonr(spfeas_guf[x_var],spfeas_guf[y_var])
    
    # print back for mike
    #print (y_var + " , " + x_var + " , " +  str(p[0]) + " , " + str(p[1]))
    
    #If p < 0.05 append to list x
    if p[1] < 0.05:
        x.append([x_var,abs(p[0])])

#List x is made into a DataFrame 
# which is sorted by the absolute values of the Pearson values
x_df = pd.DataFrame(x,columns=["x_var","abs_r2"]).sort_values("abs_r2",ascending=False)


#The dependent variable dictionary is given an entry 
# where the key is the name of the dependent variable
# and the value is a list of top 200 most significant values

y_dict[y_var] = list(x_df["x_var"][0:200])
#y_dict[y_var]
#Print out each dependent variable and 
#the number of x values that remain to check completion

In [43]:
x_df.head(15)

Unnamed: 0,x_var,abs_r2
186,hog_sc7_kurtosis_mean,0.793919
281,mean_sc3_variance_mean,0.768958
174,hog_sc5_kurtosis_mean,0.766846
192,hog_sc7_skew_mean,0.748357
371,sfs_sc31_mean_mean,0.734304
63,gabor_sc3_variance_mean,0.726576
33,gabor_sc3_filter_14_mean,0.726186
36,gabor_sc3_filter_2_mean,0.72507
180,hog_sc5_skew_mean,0.724472
27,gabor_sc3_filter_12_mean,0.724284


In [44]:
#check 

for key in y_dict.keys():
    print(key,len(y_dict[key]))


ghs_mean 200


### Correlation Significance

For each dependent variable y in the list of all dependent values, calibrate the model.
Add new key to the output dictionary where y is the dependent variable curently being processed and the values are empty for now

In [45]:
#Initialize the output dictionary, Y_D, 
# with each key being a dependent variable and the values being the results of the analyses

Y_D = {}

Y_D[y_var]={}

#Dictionary Models is used to store each result object for later use if needed

Models ={}

#Get independent variables from the variable dictionary and store in list x_vars
x_vars = y_dict[y_var]


vars_df = pd.DataFrame()

vars_df[y_var] = spfeas_guf[y_var]


for x in x_vars:
    vars_df[x] = spfeas_guf[x]

In [46]:
vars_df.head()

Unnamed: 0_level_0,ghs_mean,hog_sc7_kurtosis_mean,mean_sc3_variance_mean,hog_sc5_kurtosis_mean,hog_sc7_skew_mean,sfs_sc31_mean_mean,gabor_sc3_variance_mean,gabor_sc3_filter_14_mean,gabor_sc3_filter_2_mean,hog_sc5_skew_mean,...,gabor_sc7_filter_10_std,gabor_sc7_filter_6_std,gabor_sc7_filter_11_std,orb_sc51_skew_std,gabor_sc7_filter_12_std,gabor_sc7_filter_3_std,orb_sc51_mean_std,sfs_sc31_std_mean,gabor_sc7_filter_4_std,orb_sc51_max_std
gid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
43,2.702,1232837.607,3.93,332095.664,5621.894,4.617,1.111,1.095,1.094,2818.422,...,10.627,9.938,0.545,4458.473,10.235,0.494,0.025,6.886,9.323,0.654
564,3.891,582042.462,2.222,180522.707,3684.138,3.057,0.754,0.737,0.746,2076.286,...,12.589,12.906,0.582,3583.986,12.216,0.603,0.02,6.168,12.926,0.74
7917,1.406,191063.09,1.108,89183.994,2482.967,1.646,0.448,0.448,0.446,1514.059,...,6.762,6.661,0.346,0.0,6.563,0.341,0.0,4.184,6.385,0.0
2263,66.169,1713686.816,6.59,424855.492,5412.451,6.65,1.931,1.904,1.903,3254.023,...,24.696,34.773,0.812,0.0,21.315,1.507,0.0,6.442,44.066,0.0
8,10.489,1341415.447,3.319,306488.279,4976.435,4.773,1.041,1.048,1.036,2506.209,...,18.068,17.722,0.776,2556.833,17.332,0.747,0.007,7.101,16.609,0.261


### Scale/Normalize Data

In [47]:
minmax_scaler = preprocessing.MinMaxScaler()
standard_scaler = preprocessing.StandardScaler()

names = vars_df.columns
scaled_df = standard_scaler.fit_transform(vars_df)
scaled_df = pd.DataFrame(scaled_df, columns=names)
scaled_df.head()

Unnamed: 0,ghs_mean,hog_sc7_kurtosis_mean,mean_sc3_variance_mean,hog_sc5_kurtosis_mean,hog_sc7_skew_mean,sfs_sc31_mean_mean,gabor_sc3_variance_mean,gabor_sc3_filter_14_mean,gabor_sc3_filter_2_mean,hog_sc5_skew_mean,...,gabor_sc7_filter_10_std,gabor_sc7_filter_6_std,gabor_sc7_filter_11_std,orb_sc51_skew_std,gabor_sc7_filter_12_std,gabor_sc7_filter_3_std,orb_sc51_mean_std,sfs_sc31_std_mean,gabor_sc7_filter_4_std,orb_sc51_max_std
0,-0.405118,1.399884,0.799661,1.4445,1.531891,1.075406,0.576564,0.556021,0.557298,1.509675,...,-0.297894,-0.365046,-0.061538,1.693247,-0.294011,-0.221706,0.236982,0.975934,-0.38793,0.139299
1,-0.355411,-0.066859,-0.208227,-0.164669,-0.206404,-0.018677,-0.247507,-0.279333,-0.257276,-0.189176,...,-0.113674,-0.083272,0.061047,1.165526,-0.098695,0.14681,0.078307,0.173998,-0.02615,0.241595
2,-0.459298,-0.948036,-0.865597,-1.134362,-1.283933,-1.008261,-0.953854,-0.953684,-0.959496,-1.47619,...,-0.660794,-0.676156,-0.720845,-0.997278,-0.65605,-0.738982,-0.556395,-2.041938,-0.682936,-0.638623
3,2.248177,2.483608,2.369324,2.42928,1.344007,2.501221,2.469389,2.443736,2.450949,2.506825,...,1.0231,1.992722,0.82306,-0.997278,0.798418,3.203129,-0.556395,0.480029,3.100637,-0.638623
4,-0.079575,1.644594,0.439111,1.17264,0.952872,1.184814,0.414981,0.446352,0.421536,0.794977,...,0.400771,0.373946,0.703789,0.545677,0.405716,0.633657,-0.334249,1.216069,0.343662,-0.328168


In [48]:
# #Create a new dataframe for scaled and centered values
# scaled_df = pd.DataFrame()

# #Scale and center the values
# scaled_df[y_var] = scale(pop_count, with_mean=True, with_std=True)


# for x in x_vars:
#     scaled_df[x] = scale(spfeas_world_pop_merged[x], with_mean=True, with_std=True)

# scaled_df = scaled_df.round(3)

In [49]:


# Y = preprocessing.minmax_scale(vars_df[y_var])
# name='pop_sum'
# Y = pd.DataFrame(Y)
# #X = pd.DataFrame()

# X = scaler.fit_transform(vars_df[x_vars])
# X = pd.DataFrame(X, columns=x_vars)
# Y.head()

In [50]:
#X.head()

### Scale the variables

### Set Elastic net's parameters

In [51]:
enet_result = ElasticNetCV(max_iter=1e8,
                    alphas = [0.0005, 0.001, 0.01, 0.03, 0.05, 0.1],
                    l1_ratio =[.1, .5, .7, .9, .95, .99, 1],
                    verbose= False,
                    n_jobs = -1, 
                    cv=5, 
                    selection = 'random',
                    fit_intercept=False)


In [52]:
scaled_df.shape

(14021, 201)

In [53]:
# Fit the mode

In [57]:
#Fit the model with the scaled data
X_train, X_test, y_train, y_test = train_test_split(scaled_df[x_vars],scaled_df[y_var], test_size=0.34)
enet_result.fit(X_train,y_train)
#Append the model to the Models dictionary
Models[y_var] = enet_result


In [58]:
enet_result

ElasticNetCV(alphas=[0.0005, 0.001, 0.01, 0.03, 0.05, 0.1], copy_X=True, cv=5,
       eps=0.001, fit_intercept=False,
       l1_ratio=[0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1], max_iter=100000000.0,
       n_alphas=100, n_jobs=-1, normalize=False, positive=False,
       precompute='auto', random_state=None, selection='random',
       tol=0.0001, verbose=False)

In [59]:
opt_alpha, opt_l1_ratio = enet_result.alpha_, enet_result.l1_ratio_

#Print update to ensure that the script is progressing properly
print("R2: {:.2f} Alpha: {} l1_ratio: {}"
      .format(enet_result.score(scaled_df[x_vars],scaled_df[y_var]),
              enet_result.alpha_, enet_result.l1_ratio_))

R2: 0.81 Alpha: 0.0005 l1_ratio: 0.1


Record the overall R squared score and optimal alpha 
and l1 ratio values and store them in the output dictionary


In [38]:
Y_D[y_var]['Total R2'] = enet_result.score(scaled_df[x_vars],scaled_df[y_var])
Y_D[y_var]['Alpha'] = opt_alpha
Y_D[y_var]['l1_ratio'] = opt_l1_ratio

### Ten Fold Cross validated regression

In [39]:
#Create a list R2s to store out of sample R squared values

R2s = []

#Specify the number of trials to run

trials = 10

#Run the number of trials specified in trials, 
#for each trial 66% of the observations are randomly selected to train the model
#Testing is done on the remaining 33% of observations and the R squared values are recorded

for i in range(trials):    
    X_train, X_test, y_train, y_test = train_test_split(scaled_df[x_vars],scaled_df[y_var], test_size=0.34)
    enet_regr = ElasticNetCV(max_iter=1e8,
                    alphas = [opt_alpha],
                    l1_ratio =[opt_l1_ratio],
                    n_jobs = -1, 
                    cv=5, 
                    selection = 'random',
                    fit_intercept=False)
    enet_regr.fit(X_train,y_train)
    R2s.append(enet_regr.score(X_test,y_test))

#print("Mean R2: {:.2f} StDev: {:.4f}".format(mean(R2s),pstdev(R2s)))
#Record the out of sample R squared values
Y_D[y_var]['Sampling']={'trials':trials,'R2':mean(R2s),'StDev':pstdev(R2s),'R2s':R2s}
coefs = [i for i in zip(list(scaled_df[x_vars].axes[1]),enet_result.coef_)]
remaining = [i for i in coefs if abs(i[1])>0.0]
Y_D[y_var]["Coefficients"]=remaining

In [40]:

Y_D[y_var]

{'Total R2': 0.8133796952192605,
 'Alpha': 0.01,
 'l1_ratio': 0.1,
 'Sampling': {'trials': 10,
  'R2': 0.8127367522549835,
  'StDev': 0.004876911949278502,
  'R2s': [0.8172555343673286,
   0.8082334554863153,
   0.8127897744080191,
   0.8042827592991347,
   0.8084479130523512,
   0.8143859182837081,
   0.8094488205972372,
   0.8197821402844279,
   0.8133957761343042,
   0.8193454306370094]},
 'Coefficients': [('hog_sc7_kurtosis_mean', 0.18936066528190254),
  ('mean_sc3_variance_mean', 0.13322126184690722),
  ('hog_sc5_kurtosis_mean', 0.08487440383463327),
  ('hog_sc7_skew_mean', 0.03127327861971739),
  ('gabor_sc3_filter_14_mean', 0.03829858747537948),
  ('hog_sc5_skew_mean', 0.03435877289137731),
  ('gabor_sc3_filter_12_mean', 0.045079139127919686),
  ('gabor_sc3_filter_10_mean', 0.010947240870508742),
  ('hog_sc3_kurtosis_mean', 0.06365143345903068),
  ('fourier_sc31_mean_mean', 0.024943235428885338),
  ('hog_sc3_variance_std', -0.03630845213608007),
  ('ndvi_sc3_variance_mean', 0.26

In [63]:
y_df = pd.DataFrame([i for i in zip(list(scaled_df[x_vars].axes[1]),enet_result.coef_)], 
                    columns=["features","Coeff"]).sort_values("Coeff", ascending=False)

y_df.head(50)

Unnamed: 0,features,Coeff
14,hog_sc3_kurtosis_mean,0.792804
45,hog_sc3_skew_std,0.615455
56,ndvi_sc5_variance_mean,0.591431
183,gabor_sc7_filter_14_std,0.454253
1,mean_sc3_variance_mean,0.438303
6,gabor_sc3_filter_14_mean,0.407949
87,orb_sc71_mean_mean,0.389849
177,gabor_sc7_variance_std,0.383471
32,lbpm_sc3_kurtosis_mean,0.358033
92,gabor_sc7_filter_5_mean,0.35409


## HOG

In [65]:
filter_var = [col for col in scaled_df if col.startswith('hog')]
X_train, X_test, y_train, y_test = train_test_split(scaled_df[filter_var],scaled_df[y_var], test_size=0.34)
enet_result.fit(X_train,y_train)

Models[y_var] = enet_result

opt_alpha, opt_l1_ratio = enet_result.alpha_, enet_result.l1_ratio_


print("R2: {:.2f} Alpha: {} l1_ratio: {}"
      .format(enet_result.score(scaled_df[filter_var],scaled_df[y_var]),
              enet_result.alpha_, enet_result.l1_ratio_))


R2: 0.70 Alpha: 0.0005 l1_ratio: 1.0


## LBPM

In [67]:
filter_var = [col for col in scaled_df if col.startswith('lbpm')]
X_train, X_test, y_train, y_test = train_test_split(scaled_df[filter_var],scaled_df[y_var], test_size=0.34)
enet_result.fit(X_train,y_train)

Models[y_var] = enet_result

opt_alpha, opt_l1_ratio = enet_result.alpha_, enet_result.l1_ratio_


print("R2: {:.2f} Alpha: {} l1_ratio: {}"
      .format(enet_result.score(scaled_df[filter_var],scaled_df[y_var]),
              enet_result.alpha_, enet_result.l1_ratio_))


R2: 0.65 Alpha: 0.0005 l1_ratio: 1.0


## Gabor

In [68]:
filter_var = [col for col in scaled_df if col.startswith('gabor')]
X_train, X_test, y_train, y_test = train_test_split(scaled_df[filter_var],scaled_df[y_var], test_size=0.34)
enet_result.fit(X_train,y_train)

Models[y_var] = enet_result

opt_alpha, opt_l1_ratio = enet_result.alpha_, enet_result.l1_ratio_


print("R2: {:.2f} Alpha: {} l1_ratio: {}"
      .format(enet_result.score(scaled_df[filter_var],scaled_df[y_var]),
              enet_result.alpha_, enet_result.l1_ratio_))


R2: 0.64 Alpha: 0.0005 l1_ratio: 0.1


## Fourier

In [69]:
ilter_var = [col for col in scaled_df if col.startswith('fourier')]
X_train, X_test, y_train, y_test = train_test_split(scaled_df[filter_var],scaled_df[y_var], test_size=0.34)
enet_result.fit(X_train,y_train)

Models[y_var] = enet_result

opt_alpha, opt_l1_ratio = enet_result.alpha_, enet_result.l1_ratio_


print("R2: {:.2f} Alpha: {} l1_ratio: {}"
      .format(enet_result.score(scaled_df[filter_var],scaled_df[y_var]),
              enet_result.alpha_, enet_result.l1_ratio_))


R2: 0.64 Alpha: 0.0005 l1_ratio: 0.1
