/
mnist_image_classifier.py
108 lines (90 loc) · 3.93 KB
/
mnist_image_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
import pandas as pd
from evadb.functions.abstract.pytorch_abstract_function import (
PytorchAbstractClassifierFunction,
)
from evadb.utils.generic_utils import try_to_import_torch, try_to_import_torchvision
class MnistImageClassifier(PytorchAbstractClassifierFunction):
@property
def name(self) -> str:
return "MnistImageClassifier"
def setup(self):
try_to_import_torch()
try_to_import_torchvision()
import torch
import torch.nn as nn
model_urls = {
"mnist": "http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/mnist-b07bb66b.pth" # noqa
}
# https://github.com/aaron-xichen/pytorch-playground/blob/master/
class MLP(nn.Module):
def __init__(self, input_dims, n_hiddens, n_class):
super(MLP, self).__init__()
assert isinstance(input_dims, int), "Please provide int for input_dims"
self.input_dims = input_dims
current_dims = input_dims
layers = OrderedDict()
if isinstance(n_hiddens, int):
n_hiddens = [n_hiddens]
else:
n_hiddens = list(n_hiddens)
for i, n_hidden in enumerate(n_hiddens):
layers["fc{}".format(i + 1)] = nn.Linear(current_dims, n_hidden)
layers["relu{}".format(i + 1)] = nn.ReLU()
layers["drop{}".format(i + 1)] = nn.Dropout(0.2)
current_dims = n_hidden
layers["out"] = nn.Linear(current_dims, n_class)
self.model = nn.Sequential(layers)
def forward(self, input):
input = input.view(input.size(0), -1)
assert input.size(1) == self.input_dims
return self.model.forward(input)
def mnist(input_dims=784, n_hiddens=[256, 256], n_class=10, pretrained=None):
model = MLP(input_dims, n_hiddens, n_class)
import torch.utils.model_zoo as model_zoo
if pretrained is not None:
m = model_zoo.load_url(
model_urls["mnist"], map_location=torch.device("cpu")
)
state_dict = m.state_dict() if isinstance(m, nn.Module) else m
assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict)
model.load_state_dict(state_dict)
return model
self.model = mnist(pretrained=True)
self.model.eval()
@property
def labels(self):
return list([str(num) for num in range(10)])
def transform(self, images):
from PIL import Image
from torchvision.transforms import Compose, Grayscale, Normalize, ToTensor
composed = Compose(
[
Grayscale(num_output_channels=1),
ToTensor(),
Normalize((0.1307,), (0.3081,)),
]
)
# reverse the channels from opencv
return composed(Image.fromarray(images[:, :, ::-1])).unsqueeze(0)
def forward(self, frames) -> pd.DataFrame:
outcome = []
predictions = self.model(frames)
for prediction in predictions:
label = self.as_numpy(prediction.data.argmax())
outcome.append({"label": str(label)})
return pd.DataFrame(outcome, columns=["label"])