In [1]:
import warnings
warnings.filterwarnings('ignore')

**Logistic Regression**

[UCI的Adult数据集](https://archive.ics.uci.edu/ml/datasets/Adult)

In [2]:
from process import UCIadult_process
from mlearn import lr
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

X_train = train.drop(['salary'], axis=1).values
y_train = train.salary.values

X_test = test.drop(['salary'], axis=1).values
y_test = test.salary.values

clf = lr.LogisticRegression(train.drop(['salary'], axis=1).columns, max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

y_pred[y_pred >= 0.5] = 1
y_pred[y_pred < 0.5] = 0

acc = accuracy_score(y_test, y_pred)
print('accuracy:', acc)

accuracy: 0.8160743938890734


In [3]:
from sklearn import linear_model
from process import UCIadult_process

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

X_train = train.drop(['salary'], axis=1).as_matrix()
y_train = train.salary.as_matrix()

X_test = test.drop(['salary'], axis=1).as_matrix()
y_test = test.salary.as_matrix()
y_test = y_test

clf = linear_model.LogisticRegression(penalty='l2',verbose=2,solver='newton-cg').fit(X_train, y_train)
clf.score(X_test, y_test)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s finished


0.8458319495184324

**Perceptron**

[UCI的Adult数据集](https://archive.ics.uci.edu/ml/datasets/Adult)

In [4]:
from process import UCIadult_process
from mlearn import perceptron
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

# 用5000条数据进行训练，超过一定的数量对偶问题求解会💥
X_train = train.drop(['salary'], axis=1).values[:5000,:]
y_train = train.salary.values[:5000]
y_train[y_train == 0] = -1

X_test = test.drop(['salary'], axis=1).values
y_test = test.salary.values
y_test[y_test == 0] = -1

clf = perceptron.Perceptron(train.drop(['salary'], axis=1).columns, max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print('accuracy:', acc)

accuracy: 0.8159415476585852


In [5]:
from sklearn import linear_model
import sys
from process import UCIadult_process

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

X_train = train.drop(['salary'], axis=1).values
y_train = train.salary.values

X_test = test.drop(['salary'],axis=1).values
y_test = test.salary.values

clf = linear_model.Perceptron(max_iter=1000,shuffle=True).fit(X_train, y_train)
clf.score(X_test, y_test)

0.8075722351378279

**拉格朗日乘子法**

![](https://i.loli.net/2018/09/18/5ba0eec766bac.png)

In [6]:
from sympy import *
from mlearn import optimize


x1 = Symbol('x1')
x2 = Symbol('x2')
f = 10 - x1**2 -x2**2
cons_e = [x1+x2]
cons_le = [x1**2-x2]
result= optimize.lagrange_multiplier(f, cons_e, cons_le, x1, x2)
print(result)

[((-1, 1, 6, 4), 8)]


f(x,y,z)=L²=x²+y²+z²

曲面方程化为 x²+2y²-3z²-4=0

设辅助系数为 a,则对应的拉格朗日辅助函数为

f(x,y,z,a)=x²+y²+z²+a(x²+2y²-3z²-4)

In [7]:
from sympy import *
from mlearn import optimize

# 存在复数解

x = Symbol('x')
y = Symbol('y')
z = Symbol('z')
f = x**2 + y**2 + z**2
cons_e = [x**2+2*y**2-3*z**2-4]
cons_le = []
result, loss= optimize.lagrange_multiplier(f, cons_e, cons_le, x,y,z)
print(result)

((0, 0, -2*sqrt(3)*I/3, 1/3), -4/3)


**SVM**

In [8]:
from process import UCIadult_process
from mlearn import svm
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

# 用5000条数据进行训练，超过一定的数量对偶问题求解会💥
X_train = train.drop(['salary'], axis=1).values[:5000,:]
y_train = train.salary.values[:5000]
y_train[y_train == 0] = -1

X_test = test.drop(['salary'], axis=1).values
y_test = test.salary.values
y_test[y_test == 0] = -1

clf = svm.SVM(features=train.drop(['salary'], axis=1).columns, max_iter=100, C=0.1, kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print('accuracy:', acc)

accuracy: 0.4179342411159083


In [15]:
from Process import UCIadult_process
from MLWheel import svm
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

# 用5000条数据进行训练，超过一定的数量对偶问题求解会💥
X_train = train.drop(['salary'], axis=1).values[:5000,:]
y_train = train.salary.values[:5000]
y_train[y_train == 0] = -1

X_test = test.drop(['salary'], axis=1).values
y_test = test.salary.values
y_test[y_test == 0] = -1

clf = svm.SVM(features=train.drop(['salary'], axis=1).columns, max_iter=5000, C=3, kernel='rbf')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print('accuracy:', acc)

accuracy: 0.7541680504815675


In [9]:
from process import UCIadult_process
from mlearn import svm
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

# 用5000条数据进行训练，超过一定的数量对偶问题求解会💥
X_train = train.drop(['salary'], axis=1).values[:5000,:]
y_train = train.salary.values[:5000]
y_train[y_train == 0] = -1

X_test = test.drop(['salary'], axis=1).values
y_test = test.salary.values
y_test[y_test == 0] = -1

clf = svm.SVM(features=train.drop(['salary'], axis=1).columns, max_iter=100, C=0.1, kernel='sigmoid')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print('accuracy:', acc)

accuracy: 0.48794420458319493


In [None]:
from sklearn import svm
from process import UCIadult_process

train = UCIadult_process.process('data/UCI_adult/adult.data', None)
test = UCIadult_process.process('data/UCI_adult/adult.test', [0])

del train['native_country_Holand-Netherlands']

X_train = train.drop(['salary'], axis=1).values
y_train = train.salary.values

X_test = test.drop(['salary'],axis=1).values
y_test = test.salary.values

clf = svm.SVC().fit(X_train, y_train)
clf.score(X_test, y_test)