Skip to content

Commit

Permalink
Coding Style 符合 PEP8
Browse files Browse the repository at this point in the history
  • Loading branch information
fukuball committed Nov 4, 2016
1 parent 4fc8955 commit b8ae99f
Show file tree
Hide file tree
Showing 25 changed files with 115 additions and 97 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ install:

script:
- coverage run --source=FukuML setup.py test
- pep8 FukuML/*.py --ignore=E501

after_success:
- codecov
Expand Down
8 changes: 4 additions & 4 deletions FukuML/AdaBoostStump.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

import os
import numpy as np
Expand Down Expand Up @@ -132,7 +132,7 @@ def calculate_alpha_u(self, weak_learner, u):
epsiloin += (u[i] * 1.0)

epsiloin = epsiloin / np.sum(u)
tune_alpha = np.sqrt((1.0-epsiloin)/epsiloin)
tune_alpha = np.sqrt((1.0 - epsiloin) / epsiloin)
alpha = np.log(tune_alpha)

new_u = []
Expand Down Expand Up @@ -160,9 +160,9 @@ def train(self):

for t in range(self.run_t):

#np.random.choice(np.arange(self.data_num), self.data_num, p=(u/sum(u)))
# np.random.choice(np.arange(self.data_num), self.data_num, p=(u/sum(u)))

print("Round "+str(t+1))
print("Round " + str(t + 1))

decision_stump_bc = decision_stump.BinaryClassifier()
decision_stump_bc.status = 'load_train_data'
Expand Down
16 changes: 8 additions & 8 deletions FukuML/Blending.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

from __future__ import division
from abc import ABCMeta, abstractmethod
Expand Down Expand Up @@ -26,14 +26,14 @@ def calculate_avg_error(self, input_data_file=''):

with open(input_data_file) as f:
for line in f:
data_num = data_num+1
data_num = data_num + 1
data = line.split()
answer = data[-1]
prediction = self.prediction(line)
if float(prediction['prediction']) != float(answer):
error_num = error_num+1
error_num = error_num + 1

avg_error = float(error_num/data_num)
avg_error = float(error_num / data_num)

return avg_error

Expand All @@ -58,14 +58,14 @@ def calculate_avg_error(self, input_data_file=''):

with open(input_data_file) as f:
for line in f:
data_num = data_num+1
data_num = data_num + 1
data = line.split()
answer = data[-1]
prediction = self.prediction(line)
error = (float(prediction['prediction']) - float(answer)) ** 2
error_sum = error_sum+error
error_sum = error_sum + error

avg_error = float(error_sum/data_num)
avg_error = float(error_sum / data_num)

return avg_error

Expand Down Expand Up @@ -129,7 +129,7 @@ def prediction(self, input_data='', mode='test_data'):
prediction = model.prediction(input_data, mode)
prediction_sum = prediction_sum + prediction['prediction']

prediction_return = float(prediction_sum/len(self.models))
prediction_return = float(prediction_sum / len(self.models))

if mode == 'future_data':
data = input_data.split()
Expand Down
10 changes: 5 additions & 5 deletions FukuML/DecisionStump.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

from __future__ import division
import os
Expand Down Expand Up @@ -148,7 +148,7 @@ def train(self):

self.status = 'train'

error_in = self.data_num/self.data_num
error_in = self.data_num / self.data_num

for i in range(0, self.train_X.shape[1]):

Expand All @@ -161,16 +161,16 @@ def train(self):
sort_dim_X = sort_dim_XY[:, 0]
sort_dim_Y = sort_dim_XY[:, 1]

thetas = np.array([float("-inf")] + [(sort_dim_X[j] + sort_dim_X[j+1])/2 for j in range(0, self.data_num-1)] + [float("inf")])
thetas = np.array([float("-inf")] + [(sort_dim_X[j] + sort_dim_X[j + 1]) / 2 for j in range(0, self.data_num - 1)] + [float("inf")])
error_in_i = sum(sort_u)
sign_i = 1
theta_i = 0.0

for theta in thetas:
y_positive = np.where(sort_dim_X > theta, 1, -1)
y_negative = np.where(sort_dim_X < theta, 1, -1)
error_positive = sum((y_positive != sort_dim_Y)*sort_u)
error_negative = sum((y_negative != sort_dim_Y)*sort_u)
error_positive = sum((y_positive != sort_dim_Y) * sort_u)
error_negative = sum((y_negative != sort_dim_Y) * sort_u)
if error_positive > error_negative:
if error_in_i > error_negative:
error_in_i = error_negative
Expand Down
40 changes: 21 additions & 19 deletions FukuML/DecisionTree.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

import os
import collections
Expand Down Expand Up @@ -143,7 +143,7 @@ def prune_by_height(self, tree, tree_height_limit):
for v, c in tree.false_branch.each_class_counts.items():
false_branch += [v] * c

true_false_branch = np.array(true_branch+false_branch)
true_false_branch = np.array(true_branch + false_branch)
true_branch = np.array(true_branch)
false_branch = np.array(false_branch)

Expand Down Expand Up @@ -172,12 +172,12 @@ def prune(self, tree):
true_branch += [v] * c
for v, c in tree.false_branch.each_class_counts.items():
false_branch += [v] * c
true_false_branch = np.array(true_branch+false_branch)
true_false_branch = np.array(true_branch + false_branch)
true_branch = np.array(true_branch)
false_branch = np.array(false_branch)

p = float(len(true_branch)) / len(true_false_branch)
delta = self.impurity(true_false_branch) - p*self.impurity(true_branch) - (1-p)*self.impurity(false_branch)
delta = self.impurity(true_false_branch) - p * self.impurity(true_branch) - (1 - p) * self.impurity(false_branch)
if delta < self.prune_gain:
if self.prune_notify:
print('A branch was pruned: gain = %f' % delta)
Expand Down Expand Up @@ -212,13 +212,13 @@ def classify_with_missing_data(self, x, tree):
false_branch = self.classify_with_missing_data(x, tree.false_branch)
true_branch_count = sum(true_branch.values())
false_branch_count = sum(false_branch.values())
true_branch_weight = float(true_branch_count)/(true_branch_count + false_branch_count)
false_branch_weight = float(false_branch_count)/(true_branch_count + false_branch_count)
true_branch_weight = float(true_branch_count) / (true_branch_count + false_branch_count)
false_branch_weight = float(false_branch_count) / (true_branch_count + false_branch_count)
each_class_counts = collections.defaultdict(int)
for k, v in true_branch.items():
each_class_counts[k] += v*true_branch_weight
each_class_counts[k] += v * true_branch_weight
for k, v in false_branch.items():
each_class_counts[k] += v*false_branch_weight
each_class_counts[k] += v * false_branch_weight
return dict(each_class_counts)
else:
branch = None
Expand Down Expand Up @@ -284,14 +284,14 @@ def regression_with_missing_data(self, x, tree):
true_branch_sum = 0
for key, value in list(true_branch.items()):
true_branch_count += value
true_branch_sum += key*value
true_branch_sum += key * value
false_branch_count = 0
false_branch_sum = 0
for key, value in list(false_branch.items()):
false_branch_count += value
false_branch_sum += key*value
false_branch_sum += key * value

mean = (float(true_branch_sum)+float(false_branch_sum))/(true_branch_count+false_branch_count)
mean = (float(true_branch_sum) + float(false_branch_sum)) / (true_branch_count + false_branch_count)
return {mean: 1}
else:
branch = None
Expand Down Expand Up @@ -328,13 +328,13 @@ def impurity(self, Y):
total_data_num = len(Y)
each_class_counts = self.each_class_counts(Y)
for k in each_class_counts:
impurity -= (float(each_class_counts[k])/total_data_num)**2
impurity -= (float(each_class_counts[k]) / total_data_num)**2
elif self.learn_type == 'regression':
if len(Y) == 0:
return 0
data = [float(y) for y in Y]
mean = sum(data) / len(data)
variance = sum([(d-mean)**2 for d in data]) / len(data)
variance = sum([(d - mean)**2 for d in data]) / len(data)
impurity = variance

return impurity
Expand All @@ -355,10 +355,12 @@ def divide_set(self, X, Y, column, value):

if value_is_float:
# for int and float values
splitting_function = lambda row: float(row[column]) >= value
def splitting_function(row):
return float(row[column]) >= value
else:
# for strings
splitting_function = lambda row: row[column] == value
def splitting_function(row):
return row[column] == value

list1 = [row for row in XY if splitting_function(row)]
list2 = [row for row in XY if not splitting_function(row)]
Expand All @@ -380,7 +382,7 @@ def grow_decision_tree_from(self, X, Y, height_position):
best_set = None

for col in range(1, self.data_demension):
column_values = X[:, col:col+1]
column_values = X[:, col:col + 1]
for value in column_values:
self.divide_set(X, Y, col, value[0])
(set1, set2) = self.divide_set(X, Y, col, value[0])
Expand All @@ -398,15 +400,15 @@ def grow_decision_tree_from(self, X, Y, height_position):
set2Y = np.array([])

p = float(len(set1Y)) / len(Y)
gain = impurity_score - p*self.impurity(set1Y) - (1-p)*self.impurity(set2Y)
gain = impurity_score - p * self.impurity(set1Y) - (1 - p) * self.impurity(set2Y)
if gain > best_gain and len(set1Y) > 0 and len(set2Y) > 0:
best_gain = gain
best_attribute = (col, value[0])
best_set = (set1X, set1Y, set2X, set2Y)

if best_gain > 0:
true_branch = self.grow_decision_tree_from(best_set[0], best_set[1], height_position+1)
false_branch = self.grow_decision_tree_from(best_set[2], best_set[3], height_position+1)
true_branch = self.grow_decision_tree_from(best_set[0], best_set[1], height_position + 1)
false_branch = self.grow_decision_tree_from(best_set[2], best_set[3], height_position + 1)
is_leaf = False
if true_branch is None and false_branch is None:
is_leaf = True
Expand Down
8 changes: 4 additions & 4 deletions FukuML/KernelLogisticRegression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

import random
import numpy as np
Expand Down Expand Up @@ -100,7 +100,7 @@ def error_function(self, x, y, W):
x = x[1:]
original_X = self.train_X[:, 1:]
score = np.sum(self.beta * utility.Kernel.kernel_matrix_xX(self, x, original_X))
error = np.log(1 + np.exp((-1)*y*score))
error = np.log(1 + np.exp((-1) * y * score))

return error

Expand All @@ -116,7 +116,7 @@ def calculate_gradient(self, X, Y, beta):
original_X = self.train_X[:, 1:]
K = utility.Kernel.kernel_matrix_xX(self, original_x, original_X)

gradient_average = ((2*self.lambda_p)/data_num)*np.dot(beta, K) + np.dot(self.theta((-1)*Y*np.dot(beta, K))*((-1)*Y), K)/data_num
gradient_average = ((2 * self.lambda_p) / data_num) * np.dot(beta, K) + np.dot(self.theta((-1) * Y * np.dot(beta, K)) * ((-1) * Y), K) / data_num

print('calculate gradient descent...')

Expand All @@ -140,7 +140,7 @@ def train(self):

for i in range(0, self.updates):
if self.feed_mode == 'stochastic':
stochastic_i = random.randint(0, self.data_num-1)
stochastic_i = random.randint(0, self.data_num - 1)
x = self.train_X[stochastic_i]
y = self.train_Y[stochastic_i]
gradient = self.calculate_gradient(x, y, self.beta)
Expand Down
2 changes: 1 addition & 1 deletion FukuML/KernelRidgeRegression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

import os
import itertools
Expand Down
8 changes: 4 additions & 4 deletions FukuML/L2RLogisticRegression.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#encoding=utf8
# encoding=utf8

#import operator
#import itertools
# import operator
# import itertools
import numpy as np
import FukuML.LogisticRegression as logistic_regression

Expand Down Expand Up @@ -70,7 +70,7 @@ def calculate_gradient(self, X, Y, W):
else:
data_num = 1

gradient_average = ((2*self.lambda_p)/data_num)*self.W + np.dot(self.theta((-1)*Y*np.dot(W, X.transpose()))*((-1)*Y), X)/data_num
gradient_average = ((2 * self.lambda_p) / data_num) * self.W + np.dot(self.theta((-1) * Y * np.dot(W, X.transpose())) * ((-1) * Y), X) / data_num

return gradient_average

Expand Down
2 changes: 1 addition & 1 deletion FukuML/LeastSquaresSVM.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

import FukuML.KernelRidgeRegression as kernel_ridge_regression

Expand Down
2 changes: 1 addition & 1 deletion FukuML/LinearRegression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

import os
import itertools
Expand Down
16 changes: 8 additions & 8 deletions FukuML/LogisticRegression.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#encoding=utf8
# encoding=utf8

import os
import random
import operator
import itertools
#import collections
# import collections
import numpy as np
import FukuML.Utility as utility
import FukuML.MLBase as ml
import FukuML.LinearRegression as linear_regression
#np.set_printoptions(threshold=np.nan)
# np.set_printoptions(threshold=np.nan)


class LogisticRegression(ml.Learner):
Expand Down Expand Up @@ -123,7 +123,7 @@ def theta(self, s):

s = np.where(s < -709, -709, s)

return 1/(1 + np.exp((-1)*s))
return 1 / (1 + np.exp((-1) * s))

def score_function(self, x, W):
# need refector
Expand All @@ -143,7 +143,7 @@ def error_function(self, x, y, W):
Error function to calculate error: cross entropy error
'''

error = np.log(1 + np.exp((-1)*y*np.inner(x, W)))
error = np.log(1 + np.exp((-1) * y * np.inner(x, W)))

return error

Expand All @@ -154,7 +154,7 @@ def calculate_gradient(self, X, Y, W):
else:
data_num = 1

gradient_average = np.dot(self.theta((-1)*Y*np.dot(W, X.transpose()))*((-1)*Y), X)/data_num
gradient_average = np.dot(self.theta((-1) * Y * np.dot(W, X.transpose())) * ((-1) * Y), X) / data_num

return gradient_average

Expand Down Expand Up @@ -203,7 +203,7 @@ def train(self):

for i in range(0, self.updates):
if self.feed_mode == 'stochastic':
stochastic_i = random.randint(0, self.data_num-1)
stochastic_i = random.randint(0, self.data_num - 1)
x = self.train_X[stochastic_i]
y = self.train_Y[stochastic_i]
gradient = self.calculate_gradient(x, y, self.W)
Expand Down Expand Up @@ -577,7 +577,7 @@ def prediction(self, input_data='', mode='test_data'):
self.temp_W = {}

if self.decomposition == 'ovo':
#counter = collections.Counter(ovo_vote)
# counter = collections.Counter(ovo_vote)
prediction_return = max(set(ovo_vote), key=ovo_vote.count)
elif self.decomposition == 'ova':
prediction_return = max(prediction_list.items(), key=operator.itemgetter(1))[0]
Expand Down
2 changes: 1 addition & 1 deletion FukuML/MLBase.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#encoding=utf8
# encoding=utf8

from abc import ABCMeta, abstractmethod
import numpy as np
Expand Down
Loading

0 comments on commit b8ae99f

Please sign in to comment.