Coding Style 符合 PEP8

fukuball · Nov 4, 2016 · b8ae99f · b8ae99f
1 parent 4fc8955
commit b8ae99f
Show file tree

Hide file tree

Showing 25 changed files with 115 additions and 97 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -22,6 +22,7 @@ install:
 
 script:
   - coverage run --source=FukuML setup.py test
+  - pep8 FukuML/*.py --ignore=E501
 
 after_success:
   - codecov

diff --git a/FukuML/AdaBoostStump.py b/FukuML/AdaBoostStump.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 import os
 import numpy as np
@@ -132,7 +132,7 @@ def calculate_alpha_u(self, weak_learner, u):
                 epsiloin += (u[i] * 1.0)
 
         epsiloin = epsiloin / np.sum(u)
-        tune_alpha = np.sqrt((1.0-epsiloin)/epsiloin)
+        tune_alpha = np.sqrt((1.0 - epsiloin) / epsiloin)
         alpha = np.log(tune_alpha)
 
         new_u = []
@@ -160,9 +160,9 @@ def train(self):
 
         for t in range(self.run_t):
 
-            #np.random.choice(np.arange(self.data_num), self.data_num, p=(u/sum(u)))
+            # np.random.choice(np.arange(self.data_num), self.data_num, p=(u/sum(u)))
 
-            print("Round "+str(t+1))
+            print("Round " + str(t + 1))
 
             decision_stump_bc = decision_stump.BinaryClassifier()
             decision_stump_bc.status = 'load_train_data'

diff --git a/FukuML/Blending.py b/FukuML/Blending.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 from __future__ import division
 from abc import ABCMeta, abstractmethod
@@ -26,14 +26,14 @@ def calculate_avg_error(self, input_data_file=''):
 
         with open(input_data_file) as f:
             for line in f:
-                data_num = data_num+1
+                data_num = data_num + 1
                 data = line.split()
                 answer = data[-1]
                 prediction = self.prediction(line)
                 if float(prediction['prediction']) != float(answer):
-                    error_num = error_num+1
+                    error_num = error_num + 1
 
-        avg_error = float(error_num/data_num)
+        avg_error = float(error_num / data_num)
 
         return avg_error
 
@@ -58,14 +58,14 @@ def calculate_avg_error(self, input_data_file=''):
 
         with open(input_data_file) as f:
             for line in f:
-                data_num = data_num+1
+                data_num = data_num + 1
                 data = line.split()
                 answer = data[-1]
                 prediction = self.prediction(line)
                 error = (float(prediction['prediction']) - float(answer)) ** 2
-                error_sum = error_sum+error
+                error_sum = error_sum + error
 
-        avg_error = float(error_sum/data_num)
+        avg_error = float(error_sum / data_num)
 
         return avg_error
 
@@ -129,7 +129,7 @@ def prediction(self, input_data='', mode='test_data'):
             prediction = model.prediction(input_data, mode)
             prediction_sum = prediction_sum + prediction['prediction']
 
-        prediction_return = float(prediction_sum/len(self.models))
+        prediction_return = float(prediction_sum / len(self.models))
 
         if mode == 'future_data':
             data = input_data.split()

diff --git a/FukuML/DecisionStump.py b/FukuML/DecisionStump.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 from __future__ import division
 import os
@@ -148,7 +148,7 @@ def train(self):
 
         self.status = 'train'
 
-        error_in = self.data_num/self.data_num
+        error_in = self.data_num / self.data_num
 
         for i in range(0, self.train_X.shape[1]):
 
@@ -161,16 +161,16 @@ def train(self):
             sort_dim_X = sort_dim_XY[:, 0]
             sort_dim_Y = sort_dim_XY[:, 1]
 
-            thetas = np.array([float("-inf")] + [(sort_dim_X[j] + sort_dim_X[j+1])/2 for j in range(0, self.data_num-1)] + [float("inf")])
+            thetas = np.array([float("-inf")] + [(sort_dim_X[j] + sort_dim_X[j + 1]) / 2 for j in range(0, self.data_num - 1)] + [float("inf")])
             error_in_i = sum(sort_u)
             sign_i = 1
             theta_i = 0.0
 
             for theta in thetas:
                 y_positive = np.where(sort_dim_X > theta, 1, -1)
                 y_negative = np.where(sort_dim_X < theta, 1, -1)
-                error_positive = sum((y_positive != sort_dim_Y)*sort_u)
-                error_negative = sum((y_negative != sort_dim_Y)*sort_u)
+                error_positive = sum((y_positive != sort_dim_Y) * sort_u)
+                error_negative = sum((y_negative != sort_dim_Y) * sort_u)
                 if error_positive > error_negative:
                     if error_in_i > error_negative:
                         error_in_i = error_negative

diff --git a/FukuML/DecisionTree.py b/FukuML/DecisionTree.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 import os
 import collections
@@ -143,7 +143,7 @@ def prune_by_height(self, tree, tree_height_limit):
             for v, c in tree.false_branch.each_class_counts.items():
                 false_branch += [v] * c
 
-            true_false_branch = np.array(true_branch+false_branch)
+            true_false_branch = np.array(true_branch + false_branch)
             true_branch = np.array(true_branch)
             false_branch = np.array(false_branch)
 
@@ -172,12 +172,12 @@ def prune(self, tree):
                 true_branch += [v] * c
             for v, c in tree.false_branch.each_class_counts.items():
                 false_branch += [v] * c
-            true_false_branch = np.array(true_branch+false_branch)
+            true_false_branch = np.array(true_branch + false_branch)
             true_branch = np.array(true_branch)
             false_branch = np.array(false_branch)
 
             p = float(len(true_branch)) / len(true_false_branch)
-            delta = self.impurity(true_false_branch) - p*self.impurity(true_branch) - (1-p)*self.impurity(false_branch)
+            delta = self.impurity(true_false_branch) - p * self.impurity(true_branch) - (1 - p) * self.impurity(false_branch)
             if delta < self.prune_gain:
                 if self.prune_notify:
                     print('A branch was pruned: gain = %f' % delta)
@@ -212,13 +212,13 @@ def classify_with_missing_data(self, x, tree):
                 false_branch = self.classify_with_missing_data(x, tree.false_branch)
                 true_branch_count = sum(true_branch.values())
                 false_branch_count = sum(false_branch.values())
-                true_branch_weight = float(true_branch_count)/(true_branch_count + false_branch_count)
-                false_branch_weight = float(false_branch_count)/(true_branch_count + false_branch_count)
+                true_branch_weight = float(true_branch_count) / (true_branch_count + false_branch_count)
+                false_branch_weight = float(false_branch_count) / (true_branch_count + false_branch_count)
                 each_class_counts = collections.defaultdict(int)
                 for k, v in true_branch.items():
-                    each_class_counts[k] += v*true_branch_weight
+                    each_class_counts[k] += v * true_branch_weight
                 for k, v in false_branch.items():
-                    each_class_counts[k] += v*false_branch_weight
+                    each_class_counts[k] += v * false_branch_weight
                 return dict(each_class_counts)
             else:
                 branch = None
@@ -284,14 +284,14 @@ def regression_with_missing_data(self, x, tree):
                 true_branch_sum = 0
                 for key, value in list(true_branch.items()):
                     true_branch_count += value
-                    true_branch_sum += key*value
+                    true_branch_sum += key * value
                 false_branch_count = 0
                 false_branch_sum = 0
                 for key, value in list(false_branch.items()):
                     false_branch_count += value
-                    false_branch_sum += key*value
+                    false_branch_sum += key * value
 
-                mean = (float(true_branch_sum)+float(false_branch_sum))/(true_branch_count+false_branch_count)
+                mean = (float(true_branch_sum) + float(false_branch_sum)) / (true_branch_count + false_branch_count)
                 return {mean: 1}
             else:
                 branch = None
@@ -328,13 +328,13 @@ def impurity(self, Y):
             total_data_num = len(Y)
             each_class_counts = self.each_class_counts(Y)
             for k in each_class_counts:
-                impurity -= (float(each_class_counts[k])/total_data_num)**2
+                impurity -= (float(each_class_counts[k]) / total_data_num)**2
         elif self.learn_type == 'regression':
             if len(Y) == 0:
                 return 0
             data = [float(y) for y in Y]
             mean = sum(data) / len(data)
-            variance = sum([(d-mean)**2 for d in data]) / len(data)
+            variance = sum([(d - mean)**2 for d in data]) / len(data)
             impurity = variance
 
         return impurity
@@ -355,10 +355,12 @@ def divide_set(self, X, Y, column, value):
 
         if value_is_float:
             # for int and float values
-            splitting_function = lambda row: float(row[column]) >= value
+            def splitting_function(row):
+                return float(row[column]) >= value
         else:
             # for strings
-            splitting_function = lambda row: row[column] == value
+            def splitting_function(row):
+                return row[column] == value
 
         list1 = [row for row in XY if splitting_function(row)]
         list2 = [row for row in XY if not splitting_function(row)]
@@ -380,7 +382,7 @@ def grow_decision_tree_from(self, X, Y, height_position):
         best_set = None
 
         for col in range(1, self.data_demension):
-            column_values = X[:, col:col+1]
+            column_values = X[:, col:col + 1]
             for value in column_values:
                 self.divide_set(X, Y, col, value[0])
                 (set1, set2) = self.divide_set(X, Y, col, value[0])
@@ -398,15 +400,15 @@ def grow_decision_tree_from(self, X, Y, height_position):
                     set2Y = np.array([])
 
                 p = float(len(set1Y)) / len(Y)
-                gain = impurity_score - p*self.impurity(set1Y) - (1-p)*self.impurity(set2Y)
+                gain = impurity_score - p * self.impurity(set1Y) - (1 - p) * self.impurity(set2Y)
                 if gain > best_gain and len(set1Y) > 0 and len(set2Y) > 0:
                     best_gain = gain
                     best_attribute = (col, value[0])
                     best_set = (set1X, set1Y, set2X, set2Y)
 
         if best_gain > 0:
-            true_branch = self.grow_decision_tree_from(best_set[0], best_set[1], height_position+1)
-            false_branch = self.grow_decision_tree_from(best_set[2], best_set[3], height_position+1)
+            true_branch = self.grow_decision_tree_from(best_set[0], best_set[1], height_position + 1)
+            false_branch = self.grow_decision_tree_from(best_set[2], best_set[3], height_position + 1)
             is_leaf = False
             if true_branch is None and false_branch is None:
                 is_leaf = True

diff --git a/FukuML/KernelLogisticRegression.py b/FukuML/KernelLogisticRegression.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 import random
 import numpy as np
@@ -100,7 +100,7 @@ def error_function(self, x, y, W):
         x = x[1:]
         original_X = self.train_X[:, 1:]
         score = np.sum(self.beta * utility.Kernel.kernel_matrix_xX(self, x, original_X))
-        error = np.log(1 + np.exp((-1)*y*score))
+        error = np.log(1 + np.exp((-1) * y * score))
 
         return error
 
@@ -116,7 +116,7 @@ def calculate_gradient(self, X, Y, beta):
             original_X = self.train_X[:, 1:]
             K = utility.Kernel.kernel_matrix_xX(self, original_x, original_X)
 
-        gradient_average = ((2*self.lambda_p)/data_num)*np.dot(beta, K) + np.dot(self.theta((-1)*Y*np.dot(beta, K))*((-1)*Y), K)/data_num
+        gradient_average = ((2 * self.lambda_p) / data_num) * np.dot(beta, K) + np.dot(self.theta((-1) * Y * np.dot(beta, K)) * ((-1) * Y), K) / data_num
 
         print('calculate gradient descent...')
 
@@ -140,7 +140,7 @@ def train(self):
 
         for i in range(0, self.updates):
             if self.feed_mode == 'stochastic':
-                stochastic_i = random.randint(0, self.data_num-1)
+                stochastic_i = random.randint(0, self.data_num - 1)
                 x = self.train_X[stochastic_i]
                 y = self.train_Y[stochastic_i]
                 gradient = self.calculate_gradient(x, y, self.beta)

diff --git a/FukuML/KernelRidgeRegression.py b/FukuML/KernelRidgeRegression.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 import os
 import itertools

diff --git a/FukuML/L2RLogisticRegression.py b/FukuML/L2RLogisticRegression.py
@@ -1,7 +1,7 @@
-#encoding=utf8
+# encoding=utf8
 
-#import operator
-#import itertools
+# import operator
+# import itertools
 import numpy as np
 import FukuML.LogisticRegression as logistic_regression
 
@@ -70,7 +70,7 @@ def calculate_gradient(self, X, Y, W):
         else:
             data_num = 1
 
-        gradient_average = ((2*self.lambda_p)/data_num)*self.W + np.dot(self.theta((-1)*Y*np.dot(W, X.transpose()))*((-1)*Y), X)/data_num
+        gradient_average = ((2 * self.lambda_p) / data_num) * self.W + np.dot(self.theta((-1) * Y * np.dot(W, X.transpose())) * ((-1) * Y), X) / data_num
 
         return gradient_average
 

diff --git a/FukuML/LeastSquaresSVM.py b/FukuML/LeastSquaresSVM.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 import FukuML.KernelRidgeRegression as kernel_ridge_regression
 

diff --git a/FukuML/LinearRegression.py b/FukuML/LinearRegression.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 import os
 import itertools

diff --git a/FukuML/LogisticRegression.py b/FukuML/LogisticRegression.py
@@ -1,15 +1,15 @@
-#encoding=utf8
+# encoding=utf8
 
 import os
 import random
 import operator
 import itertools
-#import collections
+# import collections
 import numpy as np
 import FukuML.Utility as utility
 import FukuML.MLBase as ml
 import FukuML.LinearRegression as linear_regression
-#np.set_printoptions(threshold=np.nan)
+# np.set_printoptions(threshold=np.nan)
 
 
 class LogisticRegression(ml.Learner):
@@ -123,7 +123,7 @@ def theta(self, s):
 
         s = np.where(s < -709, -709, s)
 
-        return 1/(1 + np.exp((-1)*s))
+        return 1 / (1 + np.exp((-1) * s))
 
     def score_function(self, x, W):
         # need refector
@@ -143,7 +143,7 @@ def error_function(self, x, y, W):
         Error function to calculate error: cross entropy error
         '''
 
-        error = np.log(1 + np.exp((-1)*y*np.inner(x, W)))
+        error = np.log(1 + np.exp((-1) * y * np.inner(x, W)))
 
         return error
 
@@ -154,7 +154,7 @@ def calculate_gradient(self, X, Y, W):
         else:
             data_num = 1
 
-        gradient_average = np.dot(self.theta((-1)*Y*np.dot(W, X.transpose()))*((-1)*Y), X)/data_num
+        gradient_average = np.dot(self.theta((-1) * Y * np.dot(W, X.transpose())) * ((-1) * Y), X) / data_num
 
         return gradient_average
 
@@ -203,7 +203,7 @@ def train(self):
 
         for i in range(0, self.updates):
             if self.feed_mode == 'stochastic':
-                stochastic_i = random.randint(0, self.data_num-1)
+                stochastic_i = random.randint(0, self.data_num - 1)
                 x = self.train_X[stochastic_i]
                 y = self.train_Y[stochastic_i]
                 gradient = self.calculate_gradient(x, y, self.W)
@@ -577,7 +577,7 @@ def prediction(self, input_data='', mode='test_data'):
             self.temp_W = {}
 
         if self.decomposition == 'ovo':
-            #counter = collections.Counter(ovo_vote)
+            # counter = collections.Counter(ovo_vote)
             prediction_return = max(set(ovo_vote), key=ovo_vote.count)
         elif self.decomposition == 'ova':
             prediction_return = max(prediction_list.items(), key=operator.itemgetter(1))[0]

diff --git a/FukuML/MLBase.py b/FukuML/MLBase.py
@@ -1,4 +1,4 @@
-#encoding=utf8
+# encoding=utf8
 
 from abc import ABCMeta, abstractmethod
 import numpy as np