In [2]:
from sklearn.linear_model import (LinearRegression, Ridge, 
                                  Lasso, RandomizedLasso)
from sklearn.feature_selection import RFE, f_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np
from minepy import MINE

In [3]:
# load the csv data file
df = pd.read_csv("./data/nci60.csv")
# X = np.array(df.iloc[:,2:])
# y = np.array(df.iloc[:,1])
X = df.iloc[:,2:]
Y = df.iloc[:,1]

In [7]:
names = list(X)
 
ranks = {}
 
def rank_to_dict(ranks, names, order=1):
    minmax = MinMaxScaler()
    ranks = minmax.fit_transform(order*np.array([ranks]).T).T[0]
    ranks = map(lambda x: round(x, 2), ranks)
    return dict(zip(names, ranks ))
 
lr = LinearRegression(normalize=True)
lr.fit(X, Y)
ranks["Linear reg"] = rank_to_dict(np.abs(lr.coef_), names)
 
ridge = Ridge(alpha=7)
ridge.fit(X, Y)
ranks["Ridge"] = rank_to_dict(np.abs(ridge.coef_), names)
 
 
lasso = Lasso(alpha=.05)
lasso.fit(X, Y)
ranks["Lasso"] = rank_to_dict(np.abs(lasso.coef_), names)
 
 
rlasso = RandomizedLasso(alpha=0.04)
rlasso.fit(X, Y)
ranks["Stability"] = rank_to_dict(np.abs(rlasso.scores_), names)
 
 
rf = RandomForestRegressor()
rf.fit(X,Y)
ranks["RF"] = rank_to_dict(rf.feature_importances_, names)
 
 
f, pval  = f_regression(X, Y, center=True)
ranks["Corr."] = rank_to_dict(f, names)
 
mine = MINE()
mic_scores = []
for i in range(X.shape[1]):
    mine.compute_score(X.iloc[:,i], Y)
    m = mine.mic()
    mic_scores.append(m)
 
ranks["MIC"] = rank_to_dict(mic_scores, names) 
 
 
r = {}
for name in names:
    r[name] = round(np.mean([ranks[method][name] 
                             for method in ranks.keys()]), 2)
 
methods = sorted(ranks.keys())
ranks["Mean"] = r
methods.append("Mean")
 
print("\t%s" % "\t".join(methods))
for name in names:
    print("%s\t%s" % (name, "\t".join(map(str, [ranks[method][name] for method in methods]))))

	Corr.	Lasso	Linear reg	MIC	RF	Ridge	Stability	Mean
A1BG	0.03	0.0	0.02	0.26	0.0	0.21	0.0	0.07
A1CF	0.11	0.0	0.18	0.26	0.0	0.14	0.0	0.1
A2ML1	0.0	0.0	0.05	0.13	0.0	0.0	0.0	0.03
A4GALT	0.14	0.0	0.02	0.26	0.0	0.03	0.0	0.06
A4GNT	0.0	0.0	0.06	0.0	0.0	0.0	0.0	0.01
AAAS	0.28	0.0	0.2	0.18	0.0	0.48	0.0	0.16
AACS	0.06	0.0	0.05	0.26	0.0	0.05	0.0	0.06
AADACL2	0.0	0.0	0.12	0.26	0.0	0.03	0.0	0.06
AADACL3	0.01	0.0	0.01	0.19	0.0	0.0	0.0	0.03
AADACL4	0.06	0.0	0.1	0.21	0.0	0.26	0.0	0.09
AADAT	0.0	0.0	0.02	0.0	0.0	0.0	0.0	0.0
AAED1	0.02	0.0	0.15	0.0	0.0	0.11	0.0	0.04
AAGAB	0.0	0.0	0.18	0.26	0.0	0.04	0.0	0.07
AAMP	0.01	0.0	0.07	0.18	0.0	0.04	0.0	0.04
AARS	0.02	0.0	0.1	0.17	0.0	0.15	0.0	0.06
AARS2	0.0	0.0	0.03	0.2	0.0	0.01	0.0	0.03
AARSD1	0.0	0.0	0.03	0.0	0.0	0.01	0.0	0.01
AASDH	0.0	0.0	0.01	0.0	0.0	0.04	0.0	0.01
AASDHPPT	0.02	0.0	0.13	0.26	0.0	0.0	0.0	0.06
AASS	0.03	0.0	0.03	0.18	0.0	0.16	0.0	0.06
AATF	0.0	0.0	0.02	0.26	0.0	0.03	0.0	0.04
ABCA1	0.01	0.0	0.02	0.34	0.0	0.04	0.0	0.06
ABCA10	0.29	0.0	0.08	0.3

In [28]:
list_values = [ v for v in ranks["MIC"].values() ]
np.var(list_values)

0.021813093822264647

In [29]:
list_values = [ v for v in ranks["Corr."].values() ]
np.var(list_values)

0.0056781931475460758

In [30]:
list_values = [ v for v in ranks["RF"].values() ]
np.var(list_values)

0.00057924659246441396

In [31]:
list_values = [ v for v in ranks["Stability"].values() ]
np.var(list_values)

0.00018001567627465762

In [32]:
list_values = [ v for v in ranks["Ridge"].values() ]
np.var(list_values)

0.0083906022372528619

In [33]:
list_values = [ v for v in ranks["Lasso"].values() ]
np.var(list_values)

0.00013543967064768818

In [35]:
list_values = [ v for v in ranks["Linear reg"].values() ]
np.var(list_values)

0.0041769674264613612

In [36]:
list_values = [ v for v in ranks["Mean"].values() ]
np.var(list_values)

0.0016729679372206141

In [34]:
methods

['Corr.', 'Lasso', 'Linear reg', 'MIC', 'RF', 'Ridge', 'Stability', 'Mean']