In [1]:
import argparse
import cv2
import numpy as np
import os
from imantics import Mask
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

# 读取数据

In [27]:
x_list = []
y_list = []
for pic in tqdm(os.listdir('obj_aug')):
    pic_path = './obj_aug/' + pic
    # 读取RGB三通道图像(640, 640, 3)
#     pic_data = cv2.imread(pic_path, cv2.IMREAD_COLOR)
    # 读取灰度单通道图像(640, 640)
    pic_data = cv2.imread(pic_path, cv2.IMREAD_GRAYSCALE)
    # 下采样(160, 160, 3)
    pic_data = cv2.resize(pic_data, (160, 160))
    x_list.append(pic_data)
    y_list.append(int(pic[:2]))
x_list = np.array(x_list)  # (15000, 160, 160, 3)
y_list = np.array(y_list)

  0%|          | 0/15000 [00:00<?, ?it/s]

In [28]:
from sklearn.model_selection import StratifiedShuffleSplit

# 假设标签数据保存在label_list中，其中每个标签是一个整数
X = x_list
y = y_list

# 分层抽样，其中train_size和test_size分别表示训练集和测试集的比例
# n_splits表示抽取的次数，random_state表示随机数种子
split = StratifiedShuffleSplit(n_splits=1, train_size=0.8, test_size=0.2, random_state=42)
train_index, test_index = next(split.split(X, y))

# 得到训练集和测试集
X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index]

# 加载数据集
train_data = np.array(X_train)
train_labels = np.array(y_train)
test_data = np.array(X_test)
test_labels = np.array(y_test)

# 将图片矩阵转换为向量
train_data = train_data.reshape(train_data.shape[0], -1)
test_data = test_data.reshape(test_data.shape[0], -1)

In [32]:
train_data.shape

(12000, 25600)

# 特征提取方法
## 增量PCA

In [33]:
from sklearn.decomposition import IncrementalPCA

n_batches = 10
inc_pca = IncrementalPCA(n_components=20)

for X_batch in tqdm(np.array_split(train_data, n_batches)):
    inc_pca.partial_fit(X_batch)

  0%|          | 0/10 [00:00<?, ?it/s]

In [34]:
train_data_inc_pca = inc_pca.transform(train_data)
test_data_inc_pca = inc_pca.transform(test_data)

## 随机PCA

In [36]:
from sklearn.decomposition import PCA

pca = PCA(n_components=20, svd_solver='randomized')
pca.fit(train_data)
train_data_inc_pca = pca.transform(train_data)
test_data_inc_pca = pca.transform(test_data)

## t-SNE

In [39]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=3, learning_rate='auto', init='random', perplexity=3)
train_data_inc_pca = tsne.fit_transform(train_data)
test_data_inc_pca = tsne.fit_transform(test_data)

## FactorAnalysis

In [43]:
from sklearn.decomposition import FactorAnalysis

fa = FactorAnalysis(n_components=20)
fa.fit(train_data)
train_data_inc_pca = fa.transform(train_data)
test_data_inc_pca = fa.transform(test_data)

## MiniBatchSparsePCA

In [41]:
from sklearn.decomposition import MiniBatchSparsePCA

pca = MiniBatchSparsePCA(n_components=20)
pca.fit(train_data)
train_data_inc_pca = pca.transform(train_data)
test_data_inc_pca = pca.transform(test_data)

# 分类器

In [5]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.18833333333333332

In [6]:
from sklearn.linear_model import PassiveAggressiveClassifier

clf = PassiveAggressiveClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.07533333333333334

In [7]:
from sklearn.linear_model import Perceptron

clf = Perceptron(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.06566666666666666

In [8]:
from sklearn.linear_model import RidgeClassifier

clf = RidgeClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.15766666666666668

In [9]:
from sklearn.linear_model import SGDClassifier

clf = SGDClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.053

In [10]:
from sklearn.dummy import DummyClassifier

clf = DummyClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.06666666666666667

In [11]:
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.13066666666666665

In [44]:
from sklearn.ensemble import BaggingClassifier

clf = BaggingClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.41633333333333333

In [13]:
from sklearn.ensemble import ExtraTreesClassifier

clf = ExtraTreesClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.344

In [14]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.317

In [15]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier()
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.23633333333333334

In [16]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)



0.41833333333333333

In [17]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=0)
clf.fit(train_data_inc_pca, train_labels)
clf.score(test_data_inc_pca, test_labels)

0.17