-
Notifications
You must be signed in to change notification settings - Fork 0
/
FeatureSelector.py
113 lines (75 loc) · 2.86 KB
/
FeatureSelector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import csv
import decimal
import pandas
#columns to include
columnsToInclude = 10;
#selected Columns for fisher selector
selectedColumns = [];
def search(list, platform):
for i in range(len(list)):
if list[i] == platform:
return True
return False
with open('prefix hijax rc004.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
negativeData = [];
positiveData = [];
theta = [0]*37;
sortedTheta = [0]*37;
gama = -0.5;
beta = 0;
sumOfNegative = [0]*37;
sumOfPositive = [0]*37;
squareOfData = [0]*37;
sumOfData = [0]*37;
lengthOfData = 0;
lengthOfPositive = len(positiveData);
lengthOfNegative = len(negativeData);
for row in csv_reader:
lengthOfData = lengthOfData + 1;
for i in range(37) :
sumOfData[i] = float(sumOfData[i]) + float(row[i]);
squareOfData[i] = float(squareOfData[i]) + ( float(row[i]) * float(row[i]) ) ;
if(row[37] == '-1'):
negativeData.append(row);
else:
positiveData.append(row);
for i in range(37) :
for neg in negativeData:
sumOfNegative[i] = float(sumOfNegative[i]) + float(neg[i]);
for pos in positiveData:
sumOfPositive[i] = float(sumOfPositive[i]) + float(pos[i]);
for i in range(37) :
firstDivision = (1/lengthOfData);
firstSubDivision1 = ( 1/len(sumOfNegative) );
firstSubDivision2 = ( 1/len(sumOfPositive) );
secondDivision = ( gama/lengthOfData );
squareLength = (lengthOfData * lengthOfData);
thirdDivision = ( (gama-1)/squareLength );
firstSubPortion1 = ( firstSubDivision1 * sumOfNegative[i] );
firstSubPortion2 = ( firstSubDivision2 * sumOfPositive[i] );
firstPortion = ( firstSubPortion1 + firstSubPortion2);
secondPortion = secondDivision * squareOfData[i];
thirdPortion = thirdDivision * sumOfData[i];
theta[i] = firstDivision * (firstPortion - secondPortion + thirdPortion );
sortedTheta = sorted(theta, reverse = True);
print(theta);
print('sortedTheta:::');
print(sortedTheta);
#search for top including columns
for i in range(columnsToInclude):
for j in range(37):
if(theta[j] == sortedTheta[i]):
selectedColumns.append(j);
selectedColumns.append(37);
#sort selected columns
sortedSelectedColumns = sorted(selectedColumns,reverse = False);
print('selectedColumns::');
print(sortedSelectedColumns);
with open('prefix hijax rc004.csv') as csv_file:
csv_data = pandas.read_csv(csv_file, usecols = sortedSelectedColumns);
#line_count = 0;
print(csv_data);
city = pandas.DataFrame(csv_data);
city.to_csv('writeData.csv');