Permalink
Cannot retrieve contributors at this time
#define STB_IMAGE_IMPLEMENTATION | |
#include"stb_image.h" | |
#include<string> | |
#include<iostream> | |
#include<fstream> | |
#include <sstream> | |
#include<cmath> | |
#include <vector> | |
#include <algorithm> | |
#include"template.h" | |
#include"spellchecker.h" | |
// for debug | |
#ifndef ALLOW_DEBUG_MSG // set to show debug message (in console) | |
#define ALLOW_DEBUG_MSG false | |
#endif | |
#ifndef ALLOW_DEBUG_FILE_STORAGE // set to save every image generated (including segmentation cache) | |
#define ALLOW_DEBUG_FILE_STORAGE false | |
#endif | |
static int debug_alp_count = 0; | |
/*****************************************************************************\ | |
*** ***\ | |
*** Text Extractor and Recognition ***\ | |
*** This can help to to split alphaberts from an image. ***\ | |
*** by Kanch at http://akakanch.com ***\ | |
*** kanchisme@gmail.com ***\ | |
*** ***\ | |
*** ***\ | |
****************************************************************************\*/ | |
/* ==============================================Types Defination============================================ */ | |
#ifndef FEATURE_DEFENDED | |
/* Basic types */ | |
typedef uint8_t BASE; | |
typedef BASE*** MATRIX; | |
typedef BASE** ROW; | |
typedef BASE** COLUMN; | |
typedef BASE* PIXEL; | |
typedef BASE* ALPHABERTS; | |
typedef BASE CHANNEL; | |
typedef MATRIX IMAGE; | |
typedef std::vector<int> LIST; | |
typedef ROW IMAGE2D; | |
typedef PIXEL ROW2D; | |
/* struct stores image basic information */ | |
struct ImageData{ | |
int width; | |
int height; | |
int channel; | |
}; | |
/* struct stores image matrix and its basic information */ | |
struct ImagePack{ | |
IMAGE image; | |
ImageData properties; | |
}; | |
struct ImagePack2D{ | |
IMAGE2D image; | |
ImageData properties; | |
}; | |
/* structures to stores Features */ | |
struct Features{ | |
std::vector<int> x_sum; | |
std::vector<int> y_sum; | |
std::vector<int> ninesampling; // for nine sampling | |
int circle; // record if there is a circle in the image | |
int width; | |
int height; | |
double wh_ratio; | |
std::string label; | |
}; | |
typedef std::vector<ImagePack> LISTIMAGEPACK; | |
typedef std::vector<ImagePack2D> LISTIMAGEPACK2D; | |
typedef std::vector<LISTIMAGEPACK> DLISTIMAGEPACK; | |
typedef std::vector<ImagePack>::iterator LISTIMAGEPACKITERATOR; | |
typedef std::vector<ImagePack2D>::iterator LISTIMAGEPACK2DITERATOR; | |
typedef std::vector<LISTIMAGEPACK>::iterator DLISTIMAGEPACKITERATOR; | |
#endif | |
void save_string(const std::string data,const std::string path); | |
/* ==============================================Helper Function============================================ */ | |
/*Transfer anyvalue to string, for compatiable of Android app. | |
source:https://stackoverflow.com/questions/22774009/android-ndk-stdto-string-support*/ | |
template <typename T> | |
std::string to_string(T value){ | |
std::ostringstream os ; | |
os << value ; | |
return os.str() ; | |
} | |
void pause(bool pause=true){ | |
while(pause){ | |
; | |
} | |
} | |
/* used to display debug message, | |
you can turn it off by setting macro ALLOW_DEBUG_MSG to 0 */ | |
template <typename T> | |
void klog(T msg,const bool newline=true){ | |
if(ALLOW_DEBUG_MSG){ | |
std::cout<<">>>>>"<<msg; | |
if(newline){ | |
std::cout<<std::endl; | |
} | |
} | |
} | |
template <typename T> | |
void printVector(std::vector<T> v,std::string name=""){ | |
if(ALLOW_DEBUG_MSG){ | |
std::cout<<"Vector "<<name<<" : "; | |
for(int i=0;i<v.size();++i){ | |
std::cout<<v[i]<<","; | |
} | |
std::cout<<std::endl; | |
} | |
} | |
/* Calucate average of a given array (row vector) */ | |
BASE avg(const PIXEL p,const uint8_t channels){ | |
int sum = 0; | |
for(int i=0;i<channels;++i){ | |
sum+=p[i]; | |
} | |
return sum/channels; | |
} | |
/* sum up an array */ | |
long sum(const PIXEL p,const uint8_t channels){ | |
long sum = 0; | |
for(int i=0;i<channels;++i){ | |
sum+=p[i]; | |
} | |
return sum; | |
} | |
/* save image to file */ | |
void save2File(const IMAGE image,const ImageData &id,const std::string des="image.txt"){ | |
std::string data = ""; | |
for(int i=0;i<id.height;++i){ | |
std::string rowstr = ""; | |
for(int j=0;j<id.width;++j){ | |
std::string pixel = ""; | |
for(int c=0;c<id.channel;c++){ | |
pixel += to_string(int(image[i][j][c])) + ","; | |
} | |
pixel = "[" + pixel.substr(0,pixel.length()-1) + "]"; | |
rowstr += pixel + ","; | |
} | |
rowstr = "[" + rowstr.substr(0,rowstr.length()-1) + "]"; | |
data += rowstr + "\r\n"; | |
} | |
data = "[" + data.substr(0,data.length()-1) + "]"; | |
std::fstream fs; | |
fs.open(des.c_str(),std::ios_base::out); | |
fs<<data; | |
fs.close(); | |
} | |
/* print Matrix to console */ | |
/* can print pixelize and depixelize */ | |
void printMatrix(const MATRIX mat3d,const ImageData&id,const IMAGE2D mat2d=nullptr){ | |
if(ALLOW_DEBUG_MSG){ | |
std::cout<<"[\n"; | |
if(id.channel > 0){ | |
MATRIX mat = mat3d; | |
for(int i =0;i<id.height;++i){ | |
std::cout<<"["; | |
for(int j=0;j<id.width;++j){ | |
std::cout<<"["; | |
for(int c=0;c<id.channel;c++){ | |
std::cout<< int(mat[i][j][c]) <<","; | |
} | |
std::cout<<"\b] " ; | |
} | |
std::cout<<"\b] "<<std::endl; | |
} | |
}else{ | |
IMAGE2D mat = mat2d; | |
for(int i =0;i<id.height;++i){ | |
std::cout<<"["; | |
for(int j=0;j<id.width;++j){ | |
std::cout<< int(mat[i][j]) <<","; | |
} | |
std::cout<<"\b] "<<std::endl; | |
} | |
} | |
std::cout<<"]"<<std::endl; | |
} | |
} | |
/* perform mean normailize to pictures */ | |
const IMAGE normalize(IMAGE image,const ImageData& id,BASE* mean){ | |
long long sumx = 0; | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
sumx += sum( image[i][j],id.channel ); | |
} | |
} | |
BASE avgv = BASE(sumx/(id.height*id.width)); | |
*mean = avgv; | |
for(int i =0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
image[i][j][0] -= avgv; | |
} | |
} | |
return image; | |
} | |
/* this function can set all pixels which values large than a specified value to 1 */ | |
const IMAGE set_largeThan2Value(IMAGE image,const ImageData& id,const uint8_t value,const int threshold){ | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
if( int(image[i][j][0]) >= threshold ){ | |
image[i][j][0] = value; | |
} | |
} | |
} | |
return image; | |
} | |
/* this function can set all pixels which values less than a specified value to 1 */ | |
const IMAGE set_lessThan2Value(IMAGE image,const ImageData& id,const uint8_t value,const int threshold){ | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
if( int(image[i][j][0]) <= threshold ){ | |
image[i][j][0] = value; | |
} | |
} | |
} | |
return image; | |
} | |
/* this function will enhancing the image (maxiumize the gray pixel value) */ | |
/* this function should be use only after the normailization and background deleted */ | |
/* for one channel image */ | |
const IMAGE enhanceImage(IMAGE image,const ImageData& id){ | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
if(int(image[i][j][0])>0){ | |
image[i][j][0] = 255; | |
} | |
} | |
} | |
return image; | |
} | |
const IMAGE reverseImageBit(IMAGE image,const ImageData& id){ | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
if(int(image[i][j][0]) == 0){ | |
image[i][j][0] = 1; | |
}else{ | |
image[i][j][0] = 0; | |
} | |
} | |
} | |
return image; | |
} | |
/* Tranfera row vector into a matrix */ | |
const IMAGE to_Martix(uint8_t* img,const ImageData & id){ | |
MATRIX mat = new COLUMN[id.height]; | |
for(int i= 0;i<id.height;++i){ | |
ROW row = new PIXEL[id.width]; | |
for(int j=0;j<id.width;++j){ | |
PIXEL pixel = new CHANNEL[id.channel]; | |
for(int c=0;c<id.channel;c++){ | |
pixel[c] = int(img[ 3*(i*id.width + j) +c ]); | |
} | |
row[j] = pixel; | |
} | |
mat[i] = row; | |
} | |
return mat; | |
} | |
/* set background to 1, text to 0 */ | |
const IMAGE Thresholding(IMAGE& img,const ImageData& id){ | |
std::string hist = ""; | |
//first count 0 to 255 pixel value count | |
std::vector<int> pixelcount(256); | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
++pixelcount[ img[i][j][0] ]; | |
} | |
} | |
for(auto&a : pixelcount){ | |
hist += to_string(a); | |
hist += ","; | |
} | |
save_string( "[" + hist.substr(0,hist.length() -1 ) + "]" ,"cache/hist.txt"); | |
// start split backgroud color and forecolor | |
int minvalue = 10; | |
for(std::vector<int>::iterator it=pixelcount.begin()+1;it<pixelcount.end();++it){ | |
if(*it >= pixelcount[0]){ | |
minvalue = std::distance(pixelcount.begin(),it)/5*4; | |
break; | |
} | |
} | |
// set background to 1, data to 1 | |
img = set_lessThan2Value(img,id,1,minvalue); | |
img = set_largeThan2Value(img,id,0,minvalue); | |
return img; | |
} | |
/* get a slice of an matrix, like a[:,:] in Python */ | |
/* return `nullptr` if invailed data dectected */ | |
/* double nagative value in row or column stands for all select all rows and columns */ | |
const MATRIX sliceSubMatrix3D(MATRIX mat,const ImageData&id,const int rs=0,const int re=0,const int cs=0,const int ce=0){ | |
int row_start,row_end,col_start,col_end; | |
row_start = rs; | |
row_end = re; | |
col_start = cs; | |
col_end = ce; | |
if( rs > re || cs > ce || ( (rs>=0)^(re>=0) ) || ( (cs>=0)^(ce>=0) ) ){ | |
return nullptr; | |
} | |
if( row_end < 0 && row_start < 0 ){ // all rows | |
row_start = 0; | |
row_end = id.height; | |
} | |
if( col_end < 0 && col_start < 0 ){ // all columns | |
col_start = 0; | |
col_end = id.width; | |
} | |
int height = row_end - row_start; | |
int width = col_end - col_start; | |
MATRIX submatrix = new COLUMN[height]; | |
int colbase = 0; | |
for(int i=0;i<id.height;++i){ | |
if( i >= row_start && i < row_end){ | |
ROW row = new PIXEL[width]; | |
int base = 0; | |
for(int j=0;j<id.width;++j){ | |
if( j >=col_start && j < col_end ){ | |
PIXEL pixel = new BASE[id.channel]; | |
for(int c=0;c<id.channel;c++){ | |
pixel[c] = mat[i][j][c]; | |
} | |
row[base] = pixel; | |
base++; | |
} | |
} | |
submatrix[colbase] = row; | |
colbase++; | |
} | |
} | |
return submatrix; | |
} | |
/* Delete resource taken up by matrix */ | |
void deleteMatrix(MATRIX mat,const ImageData& id){ | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
delete[] mat[i][j]; | |
} | |
delete[] mat[i]; | |
} | |
delete[] mat; | |
} | |
/* Grayscale an image */ | |
/* only 1 color channel will be left after this function */ | |
const ImagePack to_grayScale(const IMAGE image,const ImageData& id){ | |
IMAGE grayscale = new COLUMN[id.height]; | |
for(int i=0;i<id.height;++i){ | |
ROW row = new PIXEL[id.width]; | |
for(int j=0;j<id.width;++j){ | |
int buf = int(image[i][j][0]*0.11 + image[i][j][1]*0.59 + image[i][j][2]*0.3 ); | |
PIXEL p = new CHANNEL[1]{ BASE(buf) }; | |
row[j] = p; | |
} | |
grayscale[i] = row; | |
} | |
ImagePack result = {grayscale,{id.width,id.height,1}}; | |
return result; | |
} | |
/* make pixel in image from a array to single BASE variable */ | |
const ImagePack2D depixelize(const IMAGE image,const ImageData& id){ | |
ImagePack2D i2d; | |
i2d.properties.width = id.width; | |
i2d.properties.height = id.height; | |
i2d.properties.channel = 0; | |
if(id.channel != 1){ | |
i2d.image = nullptr; | |
return i2d; | |
} | |
IMAGE2D buf = new ROW2D[id.height]; | |
for(int i=0;i<id.height;++i){ | |
ROW2D row = new BASE[id.width]; | |
for(int j=0;j<id.width;++j){ | |
row[j] = image[i][j][0]; | |
} | |
buf[i] = row; | |
} | |
i2d.image = buf; | |
return i2d; | |
} | |
/* make an matrix avaliable to show as image in python */ | |
const std::string numpylize(const IMAGE mat3d,const ImageData& id,const IMAGE2D mat2d=nullptr){ | |
std::string numpylizedstring = ""; | |
if(ALLOW_DEBUG_FILE_STORAGE){ | |
if(id.channel > 0){ | |
MATRIX mat = mat3d; | |
for(int i =0;i<id.height;++i){ | |
numpylizedstring += "["; | |
for(int j=0;j<id.width;++j){ | |
numpylizedstring += "["; | |
for(int c=0;c<id.channel;c++){ | |
numpylizedstring += to_string(int(mat[i][j][c])) + ","; | |
} | |
numpylizedstring.pop_back(); | |
numpylizedstring += "],"; | |
} | |
numpylizedstring.pop_back(); | |
numpylizedstring += "],"; | |
} | |
}else{ | |
IMAGE2D mat = mat2d; | |
for(int i =0;i<id.height;++i){ | |
numpylizedstring += "["; | |
for(int j=0;j<id.width;++j){ | |
numpylizedstring += to_string(int(mat[i][j])) + ","; | |
} | |
numpylizedstring.pop_back(); | |
numpylizedstring += "],"; | |
} | |
} | |
numpylizedstring.pop_back(); | |
} | |
numpylizedstring += "]"; | |
return "[" + numpylizedstring; | |
} | |
/* save string to file */ | |
void save_string(const std::string data,const std::string path){ | |
if(ALLOW_DEBUG_FILE_STORAGE){ | |
std::fstream fs; | |
fs.open(path.c_str(),std::ios_base::out); | |
fs<<data; | |
fs.close(); | |
} | |
} | |
/* type cast: from C++ native int to uint8_t */ | |
uint8_t * cast2uint8_t(const int * src,const int len){ | |
uint8_t * des = new uint8_t[len]; | |
for(int i=0;i<len;++i){ | |
des[i] = src[i]; | |
} | |
return des; | |
} | |
std::vector<std::string> split_string(const std::string& data,const std::string&sym){ | |
std::vector<std::string> result; | |
int s=0; | |
std::string subresult = ""; | |
for(int i=0;i<data.length();++i){ | |
if( data[i] == sym[0] ){ | |
bool found = true; | |
for(int j=1;j<sym.length();++j){ | |
if( sym[j] != data[i+j] ){ | |
found = false; | |
break; | |
} | |
} | |
if(found){ | |
subresult = data.substr(s,i-s); | |
result.push_back(subresult); | |
i += sym.length(); | |
s = i; | |
} | |
} | |
} | |
if(s != data.length()){ | |
result.push_back( data.substr(s,data.length()-s) ); | |
} | |
return result; | |
} | |
/* strinfy the result list */ | |
/* format: [array of image data]@width@height<!> */ | |
/* <!> this symbol is used to split differnt images */ | |
const std::string strinfy(DLISTIMAGEPACK dpack,int&sum){ | |
std::string result = ""; | |
for(int i=0;i<dpack.size();++i){ // extract line container | |
LISTIMAGEPACK linecontainer = dpack[i]; | |
for(int j=0;j<linecontainer.size();++j){ // extract alpberts image on every line | |
// we're processing ImagePack here | |
std::string buf = "["; | |
std::string properties = to_string(linecontainer[j].properties.width) + "@" | |
+ to_string(linecontainer[j].properties.height) + "<!>"; | |
//start processing evert channel's pixel now,only work for 1 channel image | |
// that is grayscale image | |
//int mmm=0; | |
if( linecontainer[j].properties.channel ==1 ){ | |
for(int z=0;z<linecontainer[j].properties.height;z++){ | |
for(int l=0;l<linecontainer[j].properties.width;l++){ | |
buf += to_string(int(linecontainer[j].image[z][l][0])) + ","; | |
} | |
} | |
buf = buf.substr(0,buf.length() - 1) + "]@"; | |
buf += properties; | |
result += buf; | |
sum++; | |
} | |
} | |
} | |
return result; | |
} | |
/* delete empty line of an image */ | |
const IMAGE delteEmptyline(IMAGE image,ImageData&id,const int del_target=1){ | |
int countx = 0; | |
int del_line=0; | |
// scan on top | |
for(int i=0;i<id.height;++i){ | |
for(int j=0;j<id.width;++j){ | |
if( int(image[i][j][0]) == del_target ){ | |
++countx; | |
} | |
} | |
if(countx != id.width){ | |
break; | |
}else{ | |
++del_line; | |
countx = 0; | |
} | |
} | |
image = sliceSubMatrix3D(image,id,del_line,id.height,-1,-1); | |
id.height -= del_line; | |
// scan on bottom | |
countx=0; | |
del_line=0; | |
for(int i=id.height-1;i>=0;i--){ | |
for(int j=0;j<id.width;++j){ | |
if( int(image[i][j][0]) == del_target ){ | |
++countx; | |
} | |
} | |
if(countx != id.width){ | |
break; | |
}else{ | |
++del_line; | |
countx = 0; | |
} | |
} | |
image = sliceSubMatrix3D(image,id,0,id.height-del_line,-1,-1); | |
id.height -= del_line; | |
// scan on the left | |
countx=0; | |
del_line=0; | |
for(int i=0;i<id.width;++i){ | |
for(int j=0;j<id.height;++j){ | |
if( int(image[j][i][0]) == del_target ){ | |
++countx; | |
} | |
} | |
if(countx != id.height){ | |
break; | |
}else{ | |
++del_line; | |
countx = 0; | |
} | |
} | |
image = sliceSubMatrix3D(image,id,-1,-1,del_line,id.width); | |
id.width -= del_line; | |
// scan on the right | |
countx=0; | |
del_line=0; | |
for(int i=id.width-1;i>=0;++i){ | |
for(int j=0;j<id.height;++j){ | |
if( int(image[j][i][0]) == del_target ){ | |
++countx; | |
} | |
} | |
if(countx != id.height){ | |
break; | |
}else{ | |
++del_line; | |
countx = 0; | |
} | |
} | |
image = sliceSubMatrix3D(image,id,-1,-1,0,id.width-del_line); | |
id.width -= del_line; | |
return image; | |
} | |
/* image feature extractor: 投影区块匹配 */ | |
Features feature_extractor_projectionmatch(IMAGE img,ImageData&id,std::string label){ | |
Features feature; | |
std::vector<int> xsum(id.width); | |
std::vector<int> ysum; | |
for(int i=0;i<id.height;++i){ | |
int ycount = 0; | |
for(int j=0;j<id.width;++j){ | |
if( int(img[i][j][0]) == 0 ){ // 0 for data , 1 for background | |
++ycount; | |
++(xsum[j]); | |
} | |
} | |
ysum.push_back(ycount); | |
} | |
feature.x_sum = xsum; | |
feature.y_sum = ysum; | |
feature.width = id.width; | |
feature.height = id.height; | |
feature.wh_ratio = double(id.width)/id.height; | |
feature.label = label; | |
return feature; | |
} | |
/* image feature extractor: nine special points sampling */ | |
Features feature_extractor_9Sampling(IMAGE img,ImageData&id,Features& fea){ | |
//we're going to select 9 special point to use as our third feature | |
// these 9 points are top-left, top-center, top-right, also with mid-height and bottom | |
// then, we're going to arrange them in the order of scaning | |
fea.ninesampling.push_back( img[ 0 ][ 0 ][0] ); // top-left | |
fea.ninesampling.push_back( img[ 0 ][ int(id.width/2) ][0] ); // top-center | |
fea.ninesampling.push_back( img[ 0 ][ id.width-1 ][0] ); //top-right | |
fea.ninesampling.push_back( img[ int(id.height/2) ][ 0 ][0] ); //mid-l | |
fea.ninesampling.push_back( img[ int(id.height/2) ][ int(id.width/2) ][0] ); //mid-c | |
fea.ninesampling.push_back( img[ int(id.height/2) ][ id.width-1 ][0] ); //mid-r | |
fea.ninesampling.push_back( img[ id.height-1 ][ 0 ][0] ); //bottom-l | |
fea.ninesampling.push_back( img[ id.height-1 ][ int(id.width/2) ][0] ); //bottom-c | |
fea.ninesampling.push_back( img[ id.height-1 ][ id.width-1 ][0] ); //bottom-r | |
return fea; | |
} | |
/* get the weight feature of the picture */ | |
void feature_circle(IMAGE img,ImageData&id,Features&fea){ | |
// scan from left to right | |
int lcount = 0; | |
for(int i=0;i<id.height;++i){ | |
int bufx = 0; | |
bool nof = false; | |
for(int j=0;j<id.width;++j){ | |
if(img[i][j][0] == 0 && !nof){ | |
++bufx; | |
nof = true; | |
}else if(img[i][j][0]== 1 && nof){ | |
nof = false; | |
} | |
} | |
if(bufx >= 2){ | |
++lcount; | |
} | |
} | |
//scan from top to bottom | |
int vcount = 0; | |
for(int i=0;i<id.width;++i){ | |
int bufx = 0; | |
bool nof = false; | |
for(int j=0;j<id.height;++j){ | |
if(img[j][i][0] == 0 && !nof){ | |
++bufx; | |
nof = true; | |
}else if(img[j][i][0]== 1 && nof){ | |
nof = false; | |
} | |
} | |
if(bufx >= 2){ | |
++vcount; | |
} | |
} | |
//make sure the point exist | |
if(lcount > 0 && vcount > 0){ | |
fea.circle = 1; | |
return; | |
} | |
fea.circle = 0; | |
} | |
/* compute cosine */ | |
const double cosine(std::vector<int> v1,std::vector<int> v2){ | |
int sum = 0; | |
int sumv1=0,sumv2=0; | |
// reverse value (1<>0) in v1 and v2 | |
for(int i=0;i<v1.size();++i){ | |
v1[i] = std::abs(--v1[i]); | |
v2[i] = std::abs(--v2[i]); | |
} | |
for(int i=0;i<v1.size();++i){ | |
sum += v1[i]*v2[i]; | |
sumv1 += v1[i]; | |
sumv2 += v2[i]; | |
} | |
return sum*1.0/(sqrt(sumv1)*sqrt(sumv2)); | |
} | |
/* this function can resize an image to the size of template */ | |
const IMAGE resizeToTemplate(IMAGE& img, ImageData& id,const Features& temp){ | |
// we use this formula: | |
// dest[dx,dy] = src[dx*src_width/dest_width,dy*src_height/dest_height] | |
// dest[dx,dy] = src[dx*px,dy*py] | |
double px = id.width/temp.width; | |
double py = id.height/temp.height; | |
IMAGE newimg = new COLUMN[temp.height]; | |
for(int i=0;i<temp.height;++i){ | |
newimg[i] = new PIXEL[temp.width]; | |
for(int j=0;j<temp.width;++j){ | |
newimg[i][j] = new CHANNEL[1]; | |
newimg[i][j][0] = img[int(i*py)][int(j*px)][0]; | |
} | |
} | |
return newimg; | |
} | |
/* parse feature structure to string */ | |
/* feature format: x1,x2@y1,y2@scale@height,width@label@9points#next_feature_section@xweight,vertial_ratio */ | |
std::string feature2string(Features f){ | |
std::string buf = ""; | |
for(int i=0;i<f.x_sum.size();++i){ | |
buf += to_string(f.x_sum[i]) + ","; | |
} | |
buf = buf.substr(0,buf.length()-1) + "@"; | |
for(int i=0;i<f.y_sum.size();++i){ | |
buf += to_string(f.y_sum[i]) + ","; | |
} | |
buf = buf.substr(0,buf.length()-1) + "@"; | |
buf += to_string(f.wh_ratio) + "@"; | |
buf += to_string(f.height) + ","; | |
buf += to_string(f.width) + "@"; | |
buf += f.label + "@"; | |
for(int i=0;i<f.ninesampling.size();++i){ | |
buf += to_string(f.ninesampling[i]) + ","; | |
} | |
buf = buf.substr(0,buf.length()-1) + "@"; | |
buf += to_string(f.circle) ; | |
return buf; | |
} | |
#ifndef FEATURE_DEFENDED | |
/* converting string to features,s = x1,x2@y1,y2@scale@height,width@label */ | |
Features string2feature(std::string s){ | |
Features f; | |
// get x_sum_str,y_sum_str,tatio_str,height_str,width_str,label | |
std::vector<std::string> dataframe = split_string(s,"@"); | |
//parse x_sum | |
std::vector<std::string> values = split_string(dataframe[0],","); | |
std::vector<int> xsum; | |
for(int i=0;i<values.size();++i){ | |
xsum.push_back( atoi( values[i].c_str() ) ); | |
} | |
//parse y_sum | |
values = split_string(dataframe[1],","); | |
std::vector<int> ysum; | |
for(int i=0;i<values.size();++i){ | |
ysum.push_back( atoi( values[i].c_str() ) ); | |
} | |
f.x_sum = xsum; | |
f.y_sum = ysum; | |
//parse ratio | |
f.wh_ratio = atof(dataframe[2].c_str()); | |
// parse width and height | |
std::vector<std::string> hw = split_string(dataframe[3],","); | |
f.height = atoi(hw[0].c_str()); | |
f.width = atoi(hw[1].c_str()); | |
f.label = dataframe[4]; | |
// set 9 special feature | |
values = split_string(dataframe[5],","); | |
std::vector<int> nsps; | |
for(int i=0;i<values.size();++i){ | |
nsps.push_back(atoi(values[i].c_str())); | |
} | |
f.ninesampling = nsps; | |
// set ratio feature | |
f.circle = atoi(dataframe[6].c_str()); | |
return f; | |
} | |
#endif | |
/* function below is used for prediction */ | |
const std::vector<Features> parseTemplateData(std::string data){ | |
std::vector<Features> templatelist; | |
std::vector<std::string> datalist = split_string(data,"#"); | |
for(int i=0;i<datalist.size();++i){ | |
templatelist.push_back( string2feature(datalist[i]) ); | |
} | |
return templatelist; | |
} | |
/* minus two feature vectors with the consideration of different font size */ | |
const std::vector<double> minusWithScale(const std::vector<int> x,const std::vector<int> templatex,const double payoff){ | |
std::vector<double> result; | |
// compress longer vector to align with the shorter one | |
if( x.size() >= templatex.size() ){ | |
// compress x | |
const double mapping_size = x.size()/templatex.size(); | |
for(int i=0;i<templatex.size();++i){ | |
int base = int(i*mapping_size); | |
int upper = int(base+mapping_size); | |
int sumx = 0; | |
for(int j=base;j<upper;++j){ | |
sumx += x[j]; | |
} | |
sumx /= (upper-base); | |
result.push_back( sumx*payoff - templatex[i] ); | |
} | |
}else{ | |
// compress template | |
const double mapping_size =templatex.size()/x.size(); | |
for(int i=0;i<x.size();++i){ | |
int base = int(i*mapping_size); | |
int upper = int(base+mapping_size); | |
int sumx = 0; | |
for(int j=base;j<int(base+mapping_size);++j){ | |
sumx += templatex[j]; | |
} | |
sumx /= (upper-base); | |
result.push_back( sumx*payoff - x[i] ); | |
} | |
} | |
printVector(x,"x"); | |
printVector(templatex,"templatex"); | |
printVector(result,"minusWithScale"); | |
return result; | |
} | |
/* */ | |
const double meanSquaredError(const std::vector<double> con,const double csum){ | |
double error = 0.0; | |
std::string logx=""; | |
for(int i=0;i<con.size();++i){ | |
double xx = con[i]/csum; | |
error += (xx*xx); | |
} | |
return error/con.size(); | |
} | |
const double payoff(const int a,const int b){ | |
return ((a>=b?b:a)*1.0) / ((a>=b?a:b)*1.0) ; | |
} | |
/* compute similarity based on width and height ratio*/ | |
const double error_feature_WHR(const Features img,const Features temp){ | |
return std::abs( img.wh_ratio - temp.wh_ratio ); | |
} | |
/* compute similarity */ | |
inline const double similarity(const Features img,const Features temp){ | |
std::vector<double> x_diff = minusWithScale( img.x_sum , temp.x_sum , payoff( temp.height,img.height ) ); | |
std::vector<double> y_diff = minusWithScale( img.y_sum , temp.y_sum , payoff( temp.width,img.width ) ); | |
double x_error = meanSquaredError( x_diff , temp.height<=img.height?img.height:temp.height ); | |
double y_error = meanSquaredError( y_diff , temp.width<=img.width?img.width:temp.width ); | |
const double efwhr = error_feature_WHR(img,temp); | |
const double simNSPS = cosine(temp.ninesampling,img.ninesampling); | |
//std::cout<<"\n\t\tx_err="<<x_error<<"\ty_err="<<y_error<<"\tcos="<<simNSPS<<"\twh_ratio="<<efwhr<<"\n"; | |
return 0.65*(x_error + y_error) + 0.12*(1-simNSPS) + 0.23*efwhr ; | |
//return 0.65*(x_error + y_error) + 0.12*(1-simNSPS) + 0.13*efwhr + 0.1*std::abs(img.circle-temp.circle); | |
} | |
void exportImage(ImagePack & imp2d,std::string filename){ | |
ImagePack2D d2line = depixelize(imp2d.image,imp2d.properties); | |
save_string( numpylize( nullptr ,d2line.properties, d2line.image ) ,"cache/" + filename); | |
} | |
/* predict which alphaberts it is */ | |
std::string predictAlphberts(ImagePack img,const std::vector<Features> &templatedata){ | |
// get feature | |
//feature_circle(img.image,img.properties,f); | |
double minv = std::numeric_limits<double>::max(); // stores max similarity | |
std::string current="*"; // stores char which matches to the max similarity | |
// loop computing cos with the template data | |
for(auto& temp: templatedata){ | |
IMAGE imagex =resizeToTemplate(img.image,img.properties,temp); | |
ImageData id = ImageData{temp.width,temp.height,1}; | |
ImagePack s= ImagePack{imagex,id}; | |
Features f = feature_extractor_projectionmatch(imagex,id,""); | |
f = feature_extractor_9Sampling(imagex,id,f); | |
double x = similarity(f,temp); | |
if( x <= minv ){ | |
minv =x; | |
current = temp.label; | |
} | |
} | |
// return data | |
return current; | |
} | |
/* ==============================================Extract text fucntion here================================== */ | |
DLISTIMAGEPACK extractText(uint8_t* img,const ImageData & id){ | |
const IMAGE image = to_Martix(img,id); | |
save_string( numpylize(image,id) ,"cache/raw_image.txt"); | |
ImagePack grayscaleimgpack = to_grayScale(image,id); | |
//Thresholding(image,id); | |
ImagePack2D a = depixelize(grayscaleimgpack.image ,grayscaleimgpack.properties); | |
save_string(numpylize(nullptr,a.properties,a.image),"cache/grayscale.txt"); | |
/* 1 for background, 0 for data */ | |
grayscaleimgpack.image = set_lessThan2Value(grayscaleimgpack.image,grayscaleimgpack.properties,1,180); //50 | |
grayscaleimgpack.image = set_largeThan2Value(grayscaleimgpack.image,grayscaleimgpack.properties,0,180); //185 | |
a = depixelize(grayscaleimgpack.image ,grayscaleimgpack.properties); | |
save_string(numpylize(nullptr,a.properties,a.image),"cache/thresholding.txt"); | |
int *ones_on_y = new int[grayscaleimgpack.properties.height]; | |
for(int i=0;i<grayscaleimgpack.properties.height;++i){ | |
int buf = 0; | |
for(int j=0;j<grayscaleimgpack.properties.width;++j){ | |
if(grayscaleimgpack.image[i][j][0] > 0){ | |
++buf; | |
} | |
} | |
ones_on_y[i] = buf; | |
} | |
LIST sentence_boundary; | |
int start,end; | |
for(int i=0;i<grayscaleimgpack.properties.height-1;++i){ | |
if( ( ones_on_y[i] == 0) && (ones_on_y[i+1] > 0) ){ //top boundary of sentence | |
start = i; | |
}else if((ones_on_y[i] > 0) && (ones_on_y[i+1] == 0)){ // bottom boundary of sentence | |
end = i+1; | |
sentence_boundary.push_back(start); | |
sentence_boundary.push_back(end); | |
} | |
} | |
int linecount = sentence_boundary.size()/2; | |
LISTIMAGEPACK sentences; | |
for(int i=0;i<linecount;++i){ | |
ImagePack im; | |
im.image = sliceSubMatrix3D(grayscaleimgpack.image,grayscaleimgpack.properties, | |
sentence_boundary[2*i],sentence_boundary[2*i+1],-1,-1); | |
im.properties.width = grayscaleimgpack.properties.width; | |
im.properties.height = sentence_boundary[2*i+1] - sentence_boundary[2*i]; | |
im.properties.channel = 1; | |
sentences.push_back(im); | |
ImagePack2D d2line = depixelize(im.image,im.properties); | |
//save_string( numpylize( nullptr ,d2line.properties, d2line.image ) ,"cache/line" + to_string(i+1) + ".txt"); | |
} | |
DLISTIMAGEPACK alphaberts; | |
for(int i =0;i<sentences.size();++i){ | |
LISTIMAGEPACK alpha; | |
ImagePack sen = sentences[i]; | |
LIST zeros_on_sentence; | |
for(int j=0;j<sen.properties.width;++j ){ | |
int buf=0; | |
for(int c=0;c<sen.properties.height;c++){ | |
if(sen.image[c][j][0] > 0){ | |
++buf; | |
} | |
} | |
zeros_on_sentence.push_back(buf); | |
} | |
start = end = 0; | |
for(int j=0;j<zeros_on_sentence.size();++j){ | |
if( ( zeros_on_sentence[j] == 0) && (zeros_on_sentence[j+1] > 0) ){ //left boundary of alphaberts | |
start = j; | |
}else if((zeros_on_sentence[j] > 0) && (zeros_on_sentence[j+1] == 0)){ // right boundary of alplaberts | |
end = j+1; | |
IMAGE imx = sliceSubMatrix3D(sen.image,sen.properties,-1,-1,start,end); | |
ImagePack imp = {imx,{end-start,sen.properties.height,1}}; | |
imp.image = reverseImageBit(imp.image,imp.properties); | |
ImagePack2D d2line = depixelize(imp.image,imp.properties); | |
//save_string( numpylize( nullptr ,d2line.properties, d2line.image ) ,"cache/alp_"+ to_string(i+1) + to_string(j+1) + ".txt"); | |
alpha.push_back(imp); | |
} | |
} | |
alphaberts.push_back(alpha); | |
} | |
//delete resource | |
delete[] ones_on_y; | |
deleteMatrix(image,id); | |
deleteMatrix(grayscaleimgpack.image,grayscaleimgpack.properties); | |
return alphaberts; | |
} | |
/* this function will extract all words out of the picture */ | |
/* words extracted can be used in extractText for future recognize */ | |
DLISTIMAGEPACK extractWord(uint8_t* img,const ImageData & id){ | |
const IMAGE image = to_Martix(img,id); | |
save_string( numpylize(image,id) ,"cache/raw_image.txt"); | |
ImagePack grayscaleimgpack = to_grayScale(image,id); | |
/* 1 for background, 0 for data */ | |
Thresholding(grayscaleimgpack.image,grayscaleimgpack.properties); | |
//grayscaleimgpack.image = set_lessThan2Value(grayscaleimgpack.image,grayscaleimgpack.properties,1,180); | |
//grayscaleimgpack.image = set_largeThan2Value(grayscaleimgpack.image,grayscaleimgpack.properties,0,180); | |
exportImage(grayscaleimgpack,"thresholding_img.txt" ); | |
int *ones_on_y = new int[grayscaleimgpack.properties.height]; | |
for(int i=0;i<grayscaleimgpack.properties.height;++i){ | |
int buf = 0; | |
for(int j=0;j<grayscaleimgpack.properties.width;++j){ | |
if(grayscaleimgpack.image[i][j][0] > 0){ | |
++buf; | |
} | |
} | |
ones_on_y[i] = buf; | |
} | |
LIST sentence_boundary; | |
int start,end; | |
for(int i=0;i<grayscaleimgpack.properties.height-1;++i){ | |
if( ( ones_on_y[i] == 0) && (ones_on_y[i+1] > 0) ){ //top boundary of sentence | |
start = i; | |
}else if((ones_on_y[i] > 0) && (ones_on_y[i+1] == 0)){ // bottom boundary of sentence | |
end = i+1; | |
sentence_boundary.push_back(start); | |
sentence_boundary.push_back(end); | |
} | |
} | |
int linecount = sentence_boundary.size()/2; | |
LISTIMAGEPACK sentences; | |
for(int i=0;i<linecount;++i){ | |
ImagePack im; | |
im.image = sliceSubMatrix3D(grayscaleimgpack.image,grayscaleimgpack.properties, | |
sentence_boundary[2*i],sentence_boundary[2*i+1],-1,-1); | |
im.properties.width = grayscaleimgpack.properties.width; | |
im.properties.height = sentence_boundary[2*i+1] - sentence_boundary[2*i]; | |
im.properties.channel = 1; | |
sentences.push_back(im); | |
ImagePack2D d2line = depixelize(im.image,im.properties); | |
//save_string( numpylize( nullptr ,d2line.properties, d2line.image ) ,"cache/line" + to_string(i+1) + ".txt"); | |
} | |
DLISTIMAGEPACK words; | |
for(int i =0;i<sentences.size();++i){ | |
LISTIMAGEPACK alpha; | |
ImagePack sen = sentences[i]; | |
LIST zeros_on_sentence; | |
for(int j=0;j<sen.properties.width;++j ){ | |
int buf=0; | |
for(int c=0;c<sen.properties.height;c++){ | |
if(sen.image[c][j][0] > 0){ | |
++buf; | |
} | |
} | |
zeros_on_sentence.push_back(buf); | |
} | |
// get avereage space length | |
//get left and right boundary first | |
int left_line_boundary = 0; | |
for(int j=0;j<zeros_on_sentence.size();--j){ | |
if(zeros_on_sentence[j] != 0){ | |
break; | |
} | |
++left_line_boundary; | |
} | |
int right_line_boundary = 0; | |
for(int j=zeros_on_sentence.size()-1;j >= 0;--j){ | |
if(zeros_on_sentence[j] != 0){ | |
break; | |
} | |
++right_line_boundary; | |
} | |
int average = 0; | |
int count_space = 0; | |
int lenxxx = 0; | |
bool loopzero = false; | |
for(int j=left_line_boundary;j<zeros_on_sentence.size()-right_line_boundary;++j){ | |
if(zeros_on_sentence[j] == 0){ | |
if(!loopzero){ | |
start = j; | |
lenxxx = 1; | |
loopzero = true; | |
++count_space; | |
}else{ | |
++lenxxx; | |
} | |
}else{ | |
if(loopzero){ | |
loopzero = false; | |
average += lenxxx; | |
} | |
} | |
} | |
average = (average/count_space) + 2; // change this number if it works badly | |
//std::cout<<"average space length="<<average<<std::endl; | |
// start extract words | |
start = end = 0; | |
bool no_split = true; | |
for(int j=0;j<zeros_on_sentence.size();++j){ | |
if( ( zeros_on_sentence[j] == 0) && (zeros_on_sentence[j+1] > 0) ){ //left boundary of words | |
if(no_split){ | |
start = j; | |
} | |
}else if((zeros_on_sentence[j] > 0) && (zeros_on_sentence[j+1] == 0)){ // right boundary of alplaberts | |
end = j+1; | |
no_split = true; | |
int ending_checking_pos = ((j+average)>(zeros_on_sentence.size()))?(zeros_on_sentence.size()):(j+average); | |
for(int k=j+1;k<ending_checking_pos;k++){ | |
if(zeros_on_sentence[k] > 0){ | |
no_split = false; | |
break; | |
} | |
} | |
if(no_split){ | |
IMAGE imx = sliceSubMatrix3D(sen.image,sen.properties,-1,-1,start,end); | |
ImagePack imp = {imx,{end-start,sen.properties.height,1}}; | |
//imp.image = reverseImageBit(imp.image,imp.properties); | |
alpha.push_back(imp); | |
no_split = true; | |
} | |
} | |
} | |
words.push_back(alpha); | |
} | |
//delete resource | |
delete[] ones_on_y; | |
deleteMatrix(image,id); | |
deleteMatrix(grayscaleimgpack.image,grayscaleimgpack.properties); | |
return words; | |
} | |
DLISTIMAGEPACK extractTextFromWord(const IMAGE word,const ImageData & id){ | |
DLISTIMAGEPACK alphaberts; | |
LISTIMAGEPACK alpha; | |
ImagePack sen = ImagePack{word,id}; | |
exportImage(sen,"extractTextFromWord_raw.txt" ); | |
LIST zeros_on_sentence; | |
for(int j=0;j<sen.properties.width;++j ){ | |
int buf=0; | |
for(int c=0;c<sen.properties.height;c++){ | |
if(sen.image[c][j][0] > 0){ | |
++buf; | |
} | |
} | |
zeros_on_sentence.push_back(buf); | |
} | |
int start=0, end = 0; | |
bool beg_found = false; | |
for(int j=0;j<zeros_on_sentence.size();++j){ | |
if( ( zeros_on_sentence[j] > 0 ) && (!beg_found) ){ //left boundary of alphaberts | |
start = j; | |
beg_found = true; | |
}else if(((zeros_on_sentence[j] == 0) || (j==zeros_on_sentence.size()-1)) && beg_found){ // right boundary of alplaberts | |
end = j; | |
IMAGE imx = sliceSubMatrix3D(sen.image,sen.properties,-1,-1,start,end); | |
ImagePack imp = {imx,{end-start,sen.properties.height,1}}; | |
imp.image = reverseImageBit(imp.image,imp.properties); | |
beg_found = false; | |
//printMatrix(imp.image,imp.properties); | |
//exportImage(imp,"alp_alp_from_wordsExtracted_"+ to_string(debug_alp_count++) + ".txt"); | |
alpha.push_back(imp); | |
} | |
} | |
alphaberts.push_back(alpha); | |
return alphaberts; | |
} | |
/* ==============================================Recognize text fucntion here================================== */ | |
/* recognize a single image */ | |
std::string recognize(DLISTIMAGEPACK& data){ | |
//start recognize | |
std::string result = ""; | |
const std::vector<Features> templatedata = parseTemplateData(pcadata); | |
for(DLISTIMAGEPACKITERATOR i=data.begin();i<data.end();++i){ | |
LISTIMAGEPACK alpha = *i; | |
for(LISTIMAGEPACKITERATOR j=alpha.begin();j<alpha.end();++j){ | |
//delete empty line | |
(*j).image = delteEmptyline((*j).image,(*j).properties); | |
ImagePack2D d2line = depixelize((*j).image,(*j).properties); | |
std::string re = predictAlphberts(*j,templatedata); | |
result += re; | |
//printMatrix((*j).image,(*j).properties); | |
} | |
} | |
return result; | |
} | |
/* recognize a data with the original format, like words and space */ | |
/* input must be words list */ | |
std::string recognizeWithFormat(const DLISTIMAGEPACK& data){ | |
std::string result = ""; | |
initSpellChecker(); | |
for(auto& line: data){ | |
for(auto& wordimg: line){ | |
DLISTIMAGEPACK word = extractTextFromWord(wordimg.image,wordimg.properties); | |
std::string wordx = recognize(word); | |
wordx = suggest(wordx);// correct word here | |
result += wordx + " "; | |
} | |
result += "\r\n"; | |
} | |
return result; | |
} |