Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Add all files.

  • Loading branch information...
commit 8b147f957c67baee2e8c5b647202f020c751f5fd 1 parent 60a17a4
@hczhu authored
Showing with 15,434 additions and 0 deletions.
  1. +668 −0 Geometry-algorithms/Geometry.cpp
  2. +123 −0 ML-algorithms/logistic-MSE.py
  3. +573 −0 ML-algorithms/trainMAXENT.java
  4. +434 −0 Misc/Huffman.cpp
  5. +72 −0 Misc/LongestCommonSubsequence.cpp
  6. +106 −0 Misc/file_reader.cpp
  7. +310 −0 Misc/struct_print.cpp
  8. +402 −0 Misc/toolFunctions.cpp
  9. +94 −0 commons/common_header.h
  10. +187 −0 data-structures/FibHeap.cpp
  11. +52 −0 data-structures/LeftistTree.cpp
  12. +59 −0 data-structures/MinMaxQuery.h
  13. +697 −0 data-structures/RBTree.cpp
  14. +75 −0 data-structures/SMAWK.cpp
  15. +86 −0 data-structures/binaryHeap.cpp
  16. +77 −0 data-structures/common_header_zhc.h
  17. +89 −0 data-structures/data_structures_common.h
  18. +471 −0 data-structures/hash_table.h
  19. +104 −0 data-structures/hash_tables.cpp
  20. +67 −0 data-structures/im_vector.h
  21. +203 −0 data-structures/interval_tree.cpp
  22. +592 −0 data-structures/linked_list.h
  23. +671 −0 data-structures/mem_pool.h
  24. +153 −0 data-structures/segment_tree.h
  25. +445 −0 data-structures/static_RBTree.cpp
  26. +69 −0 data-structures/tester.cpp
  27. +157 −0 graph-algorithms/Bipartite_matching.cpp
  28. +280 −0 graph-algorithms/General_matching.cpp
  29. +140 −0 graph-algorithms/MaxFlowDinic.cpp
  30. +137 −0 graph-algorithms/Strongly_connected_component.cpp
  31. +122 −0 graph-algorithms/Undirected_graph_component.cpp
  32. +136 −0 graph-algorithms/chordalGraph.cpp
  33. +47 −0 graph-algorithms/graph-algorithms.cpp
  34. +231 −0 graph-algorithms/minCostFlow.cpp
  35. +35 −0 graph-algorithms/stableMarriage.cpp
  36. +76 −0 graph-algorithms/tree-algorithms.cpp
  37. +68 −0 language-features/11-21.cpp
  38. +91 −0 language-features/13-2.cpp
  39. +394 −0 language-features/5-9-6.cpp
  40. +439 −0 language-features/calculator.cpp
  41. +46 −0 language-features/class-definition.cpp
  42. +71 −0 language-features/new_syntax.cpp
  43. +55 −0 language-features/return_throw_performance.cpp
  44. +46 −0 language-features/var_argments.cpp
  45. +281 −0 numeric-algorithms/BigInteger.cpp
  46. +91 −0 numeric-algorithms/FFT.cpp
  47. +191 −0 numeric-algorithms/LPSimplex.cpp
  48. +634 −0 numeric-algorithms/Number.cpp
  49. +115 −0 numeric-algorithms/fraction.cpp
  50. +113 −0 numeric-algorithms/fractions.cpp
  51. +243 −0 numeric-algorithms/matrix_linear.cpp
  52. +30 −0 numeric-algorithms/numeric.h
  53. +139 −0 numeric-algorithms/prime_generator.cpp
  54. +136 −0 string-algorithms/2DMatcher.h
  55. +753 −0 string-algorithms/ACAuto.cpp
  56. +432 −0 string-algorithms/ACAuto_old.cpp
  57. +356 −0 string-algorithms/string-algorithms.cpp
  58. +124 −0 string-algorithms/suffixArray.cpp
  59. +99 −0 string-algorithms/suffixArray2.cpp
  60. +200 −0 string-algorithms/suffixTree.cpp
  61. +109 −0 testers/test_all.cpp
  62. +76 −0 testers/tester.cpp
  63. +82 −0 threads-lib/common_header_zhc.h
  64. +416 −0 threads-lib/parallel_framework.h
  65. +453 −0 threads-lib/parallel_framework_buffer.h
  66. +344 −0 threads-lib/parallel_framework_lock.h
  67. +525 −0 threads-lib/parallel_framework_semaphore.h
  68. +42 −0 topcoder-related/TopCoder_template.cpp
View
668 Geometry-algorithms/Geometry.cpp
@@ -0,0 +1,668 @@
+#include <stdio.h>
+#include <math.h>
+#include <algorithm>
+#include <vector>
+#include <iostream>
+#define NDEBUG
+#include <assert.h>
+#include <complex>
+using namespace std;
+#ifndef NDEBUG
+ #define debug(x) cerr<<#x<<"=\""<<x<<"\""<<" at line#"<<__LINE__<<endl;
+ #define hline() cerr<<"-----------------------------------------"<<endl;
+#else
+ #define debug(x)
+ #define hline()
+#endif
+const double EPS=1e-15;
+const double PI=acos(-1.0);
+typedef long long int llint;
+typedef unsigned long long int ullint;
+double mysqrt(double v)
+{
+ if(v<EPS)return 0.0;
+ return sqrt(v);
+}
+int sigcross(llint x1,llint y1,llint x2,llint y2,llint x3,llint y3)
+{
+ llint re=(llint)(x1-x3)*(y2-y3)-(llint)(y1-y3)*(x2-x3);
+ if(re<0)return -1;
+ if(re>0)return 1;
+ return 0;
+}
+template<typename T>
+class point2D
+{
+ public:
+ T _x,_y;
+ T x()const{return _x;};
+ T& x(){return _x;};
+ T y()const{return _y;};
+ T& y(){return _y;};
+ point2D(const T& x=T(),const T& y=T()):_x(x),_y(y){};
+ point2D(const point2D& other):_x(other._x),_y(other._y){};
+ const point2D& operator=(const point2D<T>& other){_x=other._x,_y=other._y;return *this;};
+ bool operator==(const point2D<T>& other)const{return _x==other._x&&_y==other._y;};
+ double length()const {return sqrt(1.0*_x*_x+1.0*_y*_y);};
+ double dnorm()const {return 1.0*_x*_x+1.0*_y*_y;};
+ T norm()const
+ {
+ return _x*_x+_y*_y;
+ };
+ bool operator<(const point2D& other)const
+ {
+ return x()<other.x()||(x()==other.x()&&y()<other.y());
+ }
+ point2D operator+=(const point2D& other)
+ {
+ _x+=other._x;
+ _y+=other._y;
+ return *this;
+ };
+ point2D operator+(const point2D& other)const
+ {
+ point2D res=*this;
+ res+=other;
+ return res;
+ };
+
+ point2D operator-=(const point2D& other)
+ {
+ _x-=other._x;
+ _y-=other._y;
+ return *this;
+ };
+ point2D operator-(const point2D& other)const
+ {
+ point2D res=*this;
+ res-=other;
+ return res;
+ };
+
+ point2D operator*=(const point2D& other)
+ {
+ point2D res=(*this)*other;
+ *this=res;
+ return *this;
+ };
+ point2D operator*(const point2D& other)const
+ {
+ point2D res;
+ res._x=_x*other._x-_y*other._y;
+ res._y=_x*other._y+_y*other._x;
+ return res;
+ };
+ point2D conjugate()const
+ {
+ return point2D(_x,-_y);
+ }
+ point2D negate()const
+ {
+ return point2D(-_x,-_y);
+ }
+ point2D<double> operator/(const point2D<T>& other)const
+ {
+ assert(other.dnorm()>0.0);
+ point2D<double> res=*this;
+ res*=other.conjugate();
+ res._x/=other.dnorm();
+ res._y/=other.dnorm();
+ return res;
+ }
+ point2D<double> operator/=(const point2D<T>& other)
+ {
+ *this=(*this)/other;
+ return *this;
+ }
+ point2D<double> operator/(const T& other)
+ {
+ assert(fabs(1.0*other)>0.0);
+ point2D res=*this;
+ res._x/=other;
+ res._y/=other;
+ return res;
+ }
+ point2D<double> operator/=(T& other)
+ {
+ *this=(*this)/other;
+ return *this;
+ }
+};
+template<typename T>
+ostream& operator<<(ostream& out,const point2D<T>& other)
+{
+ out<<"("<<other.x()<<","<<other.y()<<")";
+ return out;
+}
+template<typename T>
+istream& operator>>(istream& in,point2D<T>& other)
+{
+ in>>other.x()>>other.y();
+ return in;
+}
+
+
+typedef point2D<double> dpoint2D ;
+
+template<typename T>
+T cross(const point2D<T>& p1,const point2D<T>& p2)
+{
+ return p1.x()*p2.y()-p1.y()*p2.x();
+}
+
+
+template<typename T>
+llint llcross(const point2D<T>& p1,const point2D<T>& p2)
+{
+ return (llint)p1.x()*(llint)p2.y()-(llint)p2.x()*(llint)p1.y();
+};
+
+template<typename T>
+int sigcross(point2D<T> p1,point2D<T> p2)
+{
+ llint re=llcross(p1,p2);
+ if(re<0)return -1;
+ if(re>0)return 1;
+ return 0;
+}
+
+template<typename T1,typename T2>
+double dcross(point2D<T1> p1,point2D<T2> p2)
+{
+ return 1.0*p1.x()*p2.y()-p2.x()*p1.y();
+}
+
+template<typename T1,typename T2>
+double ddot(point2D<T1> p1,point2D<T2> p2)
+{
+ return 1.0*p1.x()*p2.x()+1.0*p1.y()*p2.y();
+}
+template<typename T1,typename T2>
+T1 dot(point2D<T1> p1,point2D<T2> p2)
+{
+ return p1.x()*p2.x()+p1.y()*p2.y();
+}
+template<typename T>
+T norm(const point2D<T>& p)
+{
+ return p.norm();
+}
+
+template<typename T>
+double dnorm(const point2D<T>& p)
+{
+ return 1.0*p._x*p._x+1.0*p._y*p._y;
+}
+template<typename T>
+double length(const point2D<T>& p)
+{
+ return sqrt(dnorm(p));
+}
+//*********************************************************
+// 3D point
+template<typename T>
+class point3D
+{
+ public:
+ T _x,_y,_z;
+ T x()const{return _x;};
+ T y()const{return _y;};
+ T z()const{return _z;};
+ T& x(){return _x;};
+ T& y(){return _y;};
+ T& z(){return _z;};
+
+ point3D(const T xx=T(),const T& yy=T(),const T& zz=T()):_x(xx),_y(yy),_z(zz){};
+ const point3D& operator=(const point3D& other)
+ {
+ _x=other._x;
+ _y=other._y;
+ _z=other._z;
+ }
+ const point3D& operator+(const point3D& other)const
+ {
+ point3D res=*this;
+ res+=other;
+ return res;
+ };
+ const point3D& operator+=(const point3D& other)
+ {
+ _x+=other._x;
+ _y+=other._y;
+ _z+=other._z;
+ return *this;
+ }
+ const point3D& operator-(const point3D& other)const
+ {
+ point3D res=*this;
+ res-=other;
+ return res;
+ };
+ const point3D& operator-=(const point3D& other)
+ {
+ _x-=other._x;
+ _y-=other._y;
+ _z-=other._z;
+ return *this;
+ };
+ const point3D& operator*(T k)const
+ {
+ point3D res=*this;
+ res*=k;
+ return res;
+ };
+ const point3D& operator*=(T k)
+ {
+ _x*=k;
+ _y*=k;
+ _z*=k;
+ return *this;
+ };
+ const point3D<T>& operator*(const point3D<T>& other)const
+ {
+ point3D res;
+ res._x=_y*(other._z)-_z*(other._y);
+ res._y=-_x*(other._z)+_z*(other._x);
+ res._z=_x*(other._y)-_y*(other._x);
+ return res;
+ };
+
+ const point3D& operator*=(const point3D& other)
+ {
+ point3D res=(*this)*other;
+ *this=res;
+ return *this;
+ };
+ T norm()const
+ {
+ return _x*_x+_y*_y+_z*_z;
+ };
+ double dnorm()const
+ {
+ return 1.0*_x*_x+1.0*_y*_y+1.0*_z*_z;
+ }
+ double length()const
+ {
+ return sqrt(dnorm());
+ };
+
+};
+template<typename T1,typename T2>
+double ddot(point3D<T1> p1,point3D<T2> p2)
+{
+ return 1.0*p1.x()*p2.x()+1.0*p1.y()*p2.y()+1.0*p1.z()*p2.z();
+}
+
+template<typename T1,typename T2>
+T1 dot(point3D<T1> p1,point3D<T2> p2)
+{
+ return p1.x()*p2.x()+p1.y()*p2.y()+p1.z()*p2.z();
+}
+
+template<typename T>
+ostream& operator<<(ostream& out,const point3D<T>& other)
+{
+ out<<"("<<other.x()<<","<<other.y()<<","<<other.z()<<")";
+ return out;
+}
+template<typename T>
+istream& operator>>(istream& in,point3D<T>& other)
+{
+ in>>other.x()>>other.y()>>other.z();
+ return in;
+}
+
+
+template<typename T>
+T norm(const point3D<T>& p)
+{
+ return p.norm();
+}
+
+template<typename T>
+double dnorm(const point3D<T>& p)
+{
+ return p.dnorm();
+}
+template<typename T>
+double length(const point3D<T>& p)
+{
+ return p.length();
+}
+//sort vectors clockwise
+template<typename T>
+int get_type(const point2D<T>& a)
+{
+ if(a.y())return a.y()>0?1:-1;
+ assert(a.x());
+ return a.x()>0?1:-1;
+}
+template<typename T>
+bool cmp_vector(const point2D<T>& a,const point2D<T>& b)
+{
+ int ta=get_type(a);
+ int tb=get_type(b);
+ if(ta!=tb)return ta==1;
+ return sigcross(b,a)>0;
+}
+//****************************************************************************
+//End of base classes and functions
+
+//return the foot of a perpendicular
+//The plane is represented by (X-px)pv=0
+template<typename T>
+point3D<double> point_plane_intersection(point3D<T> p0,point3D<T> px,point3D<T> pv)
+{
+ double t=ddot(p0-px,pv)/dnorm(pv);
+ return point3D<double>(p0.x()+t*pv.x(),p0.y()+t*pv.y(),p0.z()+t*pv.z());
+}
+
+
+// Some functions about lines and segments
+
+//check intersection of two lines
+//Return false if there are infinite intersections
+template<typename T>
+bool linesIntersection(point2D<T> p1,point2D<T> p2,point2D<T> p3,
+ point2D<T> p4,point2D<double>& p)
+{
+ if(fabs(dcross(p1-p2,p3-p4))<EPS)return false;
+ double t=dcross(p3-p1,p3-p4)/dcross(p2-p1,p3-p4);
+ p.x()=1.0*p1.x()+1.0*(p2.x()-p1.x())*t;
+ p.y()=1.0*p1.y()+1.0*(p2.y()-p1.y())*t;
+ return true;
+}
+// Return the intersection of two lines in the form of "kx1*x+ky1*y+k1=0"
+//Return false if they coincide or parallel
+bool linesIntersection
+(double kx1,double ky1,double k1,double kx2,double ky2,double k2,point2D<double>& p)
+{
+ double t;
+ if(fabs(t=kx1*ky2-kx2*ky1)<EPS)return false;
+ p=point2D<double>(-(k1*ky2-k2*ky1)/t,(k1*kx2-k2*kx1)/t);
+ return true;
+}
+//The two segments must have only one intersection
+//Find the intersection of two segments (p1,p2) and (p3,p4)
+//Return 0: no intersection.
+//Return 1: one intersection.
+//Return 2: infinite intersection. overlap.
+template<typename T>
+int segmentsIntersection
+(point2D<T> p1,point2D<T> p2,point2D<T> p3,point2D<T> p4,point2D<double>& inter)
+{
+ if(sigcross(p3-p1,p2-p1)*sigcross(p2-p1,p4-p1)<0)return 0;
+ if(sigcross(p1-p3,p4-p3)*sigcross(p4-p3,p2-p3)<0)return 0;
+ if(sigcross(p2-p1,p3-p1)==0&&sigcross(p2-p1,p4-p1)==0)
+ {
+ if(p1.x()==p2.x())
+ {
+ if(p1.y()>p2.y())swap(p1,p2);
+ if(p3.y()>p4.y())swap(p3,p4);
+ T ly=max(p1.y(),p3.y());
+ T hy=min(p2.y(),p4.y());
+ if(ly>hy)return 0;
+ if(ly<hy)return 2;
+ inter=point2D<double>(p2.x(),p2.y());
+ return 1;
+ }
+ else
+ {
+ if(p1.x()>p2.x())swap(p1,p2);
+ if(p3.x()>p4.x())swap(p3,p4);
+ T lx=max(p1.x(),p3.x());
+ T hx=min(p2.x(),p4.x());
+ if(lx>hx)return 0;
+ if(lx<hx)return 2;
+ inter=point2D<double>(p2.x(),p2.y());
+ return 1;
+ }
+ }
+ if(!linesIntersection(p1,p2,p3,p4,inter))*((int*)0)=0;
+ return 1;
+}
+//Return the distance between (x1,y1) and segment (bx,by)--(ex,ey)
+template<typename T>
+double pointSegmentDistance(point2D<T> p1,point2D<T> bp,point2D<T> ep)
+{
+ dpoint2D dp1(p1.x(),p1.y()),dbp(bp.x(),bp.y()),dep(ep.x(),ep.y());
+ dpoint2D dp=dep-dbp;
+ double a=dp.dnorm();
+// double b=2.0*(bp.x()-p1.x())*dp.x()+2.0*(bp.y()-p1.y())*dp.y();
+ double b=2.0*ddot(dbp-dp1,dp);
+// double c=(bx-x1)*(bx-x1)+(by-y1)*(by-y1);
+ double c=(dbp-dp1).dnorm();
+ double ans=min(c,a+b+c);
+ if(fabs(a)<EPS)return mysqrt(ans);
+ double t=-b/2.0/a;
+ if(t<0.0||t>1.0)return mysqrt(ans);
+ return mysqrt(t*t*a+t*b+c);
+}
+//Return the distance of a point p and a line p+dp*t
+//The minimum point in the line is p+dp*tt.
+template<typename T>
+double pointLineDistance(point2D<T> p,point2D<T> p0,point2D<T> dp,double& tt)
+{
+ tt=0.0;
+ double a=dp.dnorm();
+ double b=2.0*ddot(p0-p,dp);//(x0-xx)*dx+2.0*(y0-yy)*dy;
+ double c=(p0-p).dnorm();//(x0-xx)*(x0-xx)+(y0-yy)*(y0-yy);
+ if(fabs(a)<EPS)return mysqrt(c);
+ tt=-b/2.0/a;
+ return mysqrt(a*tt*tt+b*tt+c+EPS);
+}
+//Return the intersection of segment (p1,p2) and a*x+b*y+c=0
+//0--> no intersection; 2--> infinite intersection
+template<typename T>
+int lineSegmentIntersection(point2D<T> p1,point2D<T> p2,
+ double a,double b,double c,dpoint2D& inter)
+{
+ double de=1.0*(p2.x()-p1.x())*a+1.0*(p2.y()-p1.y())*b;
+ double no=-c-a*p1.x()-b*p1.y();
+ if(fabs(de)<EPS)return fabs(no)<EPS?2:0;
+ double t=no/de;
+ if(t<-EPS||t>1.0+EPS)return 0;
+ inter.x()=t*(p2.x()-p1.x())+p1.x();
+ inter.y()=t*(p2.y()-p1.y())+p1.y();
+ return 1;
+}
+//Convert line form from (p1,p2) to a*x+b*y+c=0
+template<typename T,typename A>
+void convert(point2D<T> p1,point2D<T> p2,A& a,A& b,A& c)
+{
+ a=((A)p2.y()-(A)p1.y());
+ b=((A)p1.x()-(A)p2.x());
+ c=(A)p1.y()*((A)p2.x()-p1.x())-(A)p1.x()*((A)p2.y()-p1.y());
+}
+
+// End of lines and segments functions
+//****************************************************************
+
+//**********************************************************
+//Functions of circles
+
+//Find one intersection of two circles. Return the upper intersection point if x1<x2
+template<typename T>
+bool circlesIntersection
+ ( point2D<T> p1,T r1,point2D<T> p2,T r2,point2D<double>& inter)
+{
+ double dx = p2.x() - p1.x();
+ double dy = p2.y() - p1.y();
+ double d = mysqrt(dx*dx + dy*dy);
+ if(d>r1+r2+EPS||d+EPS<fabs(r1-r2))return false;
+ //circle overlap
+ if(d<EPS)return false;
+ double e = (r1*r1 - r2*r2 + d*d)/(2*d);
+ double f = mysqrt(r1*r1 - e*e);
+ inter=point2D<double>(1.0*p1.x() + (e*dx - f*dy)/d,1.0*p1.y() + (f*dx + e*dy)/d);
+ return true;
+}
+
+//Intersections of a line p+dp*t and a circle (center,r).
+//Two intersections are p+dp*t1 and p+dp*t2.
+template<typename T>
+bool lineCircleIntersection
+ (point2D<T> p0,point2D<T> dp,point2D<T> center,T r,double& t1,double& t2)
+{
+ if(dp.dnorm()<EPS)
+ {
+ t1=t2=0.0;
+ return fabs((center-p0).dnorm()-r*r)<EPS;
+ }
+ double tt;
+ double d=pointLineDistance(center,p0,dp,tt);
+ if(d>r+EPS)return false;
+ double dt=mysqrt(EPS+(r*r-d*d)/(dp.dnorm()));
+ t1=tt-dt;t2=tt+dt;
+ return true;
+}
+
+// Find a circle going through the given three points
+// Return false if there are an infinite number of such circles or no such circle
+bool circle_on_point3(dpoint2D a,dpoint2D b,dpoint2D c,dpoint2D& center,double& radius)
+{
+ dpoint2D ab0=(a+b)/2.0;
+ dpoint2D ab1=(a-b)*dpoint2D(0.0,1)+ab0;
+
+ dpoint2D ac0=(a+c)/2.0;
+ dpoint2D ac1=(a-c)*dpoint2D(0.0,1)+ac0;
+ if(!linesIntersection(ab0,ab1,ac0,ac1,center))return false;
+ radius=(center-a).length();
+ return true;
+}
+// Overlaped area of two circles.
+template<typename T>
+double circleOverlap(point2D<T> c1,double r1,point2D<T> c2,double r2)
+{
+ if(length(c1-c2)+EPS>r1+r2)return 0.0;
+ if(r1>r2)swap(c1,c2),swap(r1,r2);
+ if(length(c1-c2)<r2-r1+EPS)return PI*r1*r1;
+ point2D<double> p1,p2;
+ circlesIntersection(c1,r1,c2,r2,p1);
+ circlesIntersection(c2,r2,c1,r1,p2);
+ double th1=acos(dot(p1-c1,p2-c1)/length(p1-c1)/length(p2-c1));
+ double th2=acos(dot(p1-c2,p2-c2)/length(p1-c2)/length(p2-c2));
+ double sum=th1*r1*r1/2+th2*r2*r2/2;
+ double d=length(p1-p2);
+ sum-=sqrt(r1*r1-d*d/4)*d/2+sqrt(r2*r2-d*d/4)*d/2;
+ return sum;
+}
+
+// nocolinear=true means remove all points on the edges of convex hull.
+// Be carefull to the points on the same coordanates.
+template<typename T>
+vector<point2D<T> > convexHull(vector<point2D<T> > all,bool nocolinear=true)
+{
+ if(all.size()<2)return all;
+ sort(all.begin(),all.end());
+ vector<int> qq;
+ qq.push_back(0);
+ int cut=1;
+ if(nocolinear)cut=0;
+ for(int i=1;i<all.size();i++)
+ {
+ // If all[qq.back()]==all[qq[qq.size()-2]], then all point2D<T> s will be pushed to the stack
+ while(qq.size()>=2&&llcross(all[i]-all[qq.back()],
+ all[qq.back()]-all[qq[qq.size()-2]])>=cut)
+ qq.pop_back();
+ qq.push_back(i);
+ }
+ vector<point2D<T> > res;
+ vector<bool> used(all.size(),false);
+ for(int i=0;i<qq.size();i++)
+ res.push_back(all[qq[i]]),used[qq[i]]=true;
+ used[0]=used[all.size()-1]=false;
+ qq.clear();
+ qq.push_back(all.size()-1);
+ for(int i=all.size()-2;i>=0;i--)
+ {
+ if(used[i])continue;
+ while(qq.size()>=2&&llcross(all[i]-all[qq.back()],
+ all[qq.back()]-all[qq[qq.size()-2]])>=cut)
+ qq.pop_back();
+ qq.push_back(i);
+ }
+ for(int i=1;i+1<qq.size();i++)res.push_back(all[qq[i]]);
+ return res;
+}
+//Use line start-->end to cut convex
+//The half plane of the left side of start-->end is kept
+vector<point2D<double> > cut_convex(const vector<point2D<double> >& convex,
+ point2D<double> start,point2D<double> end)
+{
+ vector<point2D<double> > ret;
+ const int n=convex.size();
+ if(n<3)return ret;
+ vector<int> inside(n,0);
+ for(int idx=0;idx<n;idx++)
+ inside[idx]=(dcross(end-start,convex[idx]-start)>EPS)?1:0;
+// if(*max_element(inside.begin(),inside.end())==0)return ret;
+// if(*min_element(inside.begin(),inside.end())==1)return convex;
+ point2D<double> inter;
+ for(int i=0,pre=0,next=0;i<n;i++)
+ {
+ next=(pre+1)%n;
+ if(inside[pre]!=inside[next])
+ {
+ bool cnt=linesIntersection(start,end,convex[pre],convex[next],inter);
+ assert(cnt);
+ ret.push_back(inter);
+ }
+ if(inside[next])ret.push_back(convex[next]);
+ pre=next;
+ }
+ return ret;
+}
+template<typename T>
+T convex_area(const vector<point2D<T> >& convex)
+{
+ const int n=convex.size();
+ T ret=0;
+ for(int i=0;i<n;i++)ret+=cross(convex[i],convex[(i+1)%n]);
+ if(ret<0)ret=-ret;
+ ret/=2;
+ return ret;
+}
+struct rect_t
+{
+ int x1,x2,y1,y2;
+ rect_t(){};
+ rect_t(int a,int b,int c,int d):x1(a),x2(b),y1(c),y2(d){};
+ bool operator<(const rect_t& other)const
+ {
+ return y1<other.y1;
+ }
+ const rect_t& operator=(const rect_t& other)
+ {
+ x1=other.x1;
+ x2=other.x2;
+ y1=other.y1;
+ y2=other.y2;
+ return *this;
+ };
+ rect_t intersaction(const rect_t& other) {
+ rect_t res(max(x1,other.x1),min(x2,other.x2),
+ max(y1,other.y1),min(y2,other.y2));
+ if(res.x1>res.x2)res.x1=res.x2;
+ if(res.y1>res.y2)res.y1=res.y2;
+ return res;
+ }
+ int Xdim() {
+ return x2-x1;
+ }
+ int Ydim() {
+ return y2-y1;
+ }
+};
+template<typename T>
+class CPoint : public complex<T> {
+ public:
+ CPoint(const T& x=T(),const T& y=T()):complex<T>(x,y){};
+ T& x() {
+ return complex<T>::real();
+ }
+ T& y() {
+ return complex<T>::imag();
+ }
+};
+
View
123 ML-algorithms/logistic-MSE.py
@@ -0,0 +1,123 @@
+#!/usr/bin/python2.4
+#
+# Copyright 2011 Google Inc. All Rights Reserved.
+
+"""One-line documentation for logistic-MSE module.
+
+A detailed description of logistic-MSE.
+"""
+
+__author__ = 'hczhu@google.com (Hongcheng Zhu)'
+
+import sys
+import math
+import getopt
+
+def readData(filename):
+ sys.stderr.write('Loading data file:'+filename+'\n')
+ X,Y=[],[]
+ Dim=-1
+ for line in file(filename):
+ #print line.strip().split(',')
+ t=map(float,line.strip().split(','))
+ if Dim!=-1 and Dim != len(t):
+ sys.stderr.write('Bad line:'+line)
+ continue
+ Dim=len(t)
+ Y.append(t[len(t)-1])
+ t.pop()
+ t.append(1.0)
+ X.append(t)
+ return X,Y
+def calculate_sigmod(X,Para):
+ res=0.0
+ for i in range(len(X)): res+=X[i]*Para[i]
+ return 1.0/(1.0+math.exp(-res))
+
+def prediction(X,Para):
+ Y=[]
+ for i in range(len(X)):
+ Y.append(calculate_sigmod(X[i],Para))
+ return Y
+def squaredSum(Y1,Y2):
+ res=0.0
+ for i in range(len(Y1)):
+ res+=(Y1[i]-Y2[i])**2
+ return res
+def doTraining(X,Y,itr_times):
+ eps=10.0
+ dim=len(X[0])
+ P=[0]*dim
+ Y1=prediction(X,P)
+ pre_sum=squaredSum(Y1,Y)
+ itr=-1
+ good=0
+ while eps>1e-6:
+ itr+=1
+ if itr>itr_times: break
+ Z=[]
+ for i in range(len(X)):
+ Z.append(2.0*(Y1[i]-Y[i])*Y1[i]*(1-Y1[i]))
+ new_P=list(P)
+ new_Y1=list(Y1)
+ for k in range(dim):
+ for i in range(len(Y)):
+ new_P[k]-=eps*X[i][k]*Z[i]
+ new_Y1=prediction(X,new_P)
+ new_sum=squaredSum(new_Y1,Y)
+ #print P,new_P
+ if new_sum<pre_sum:
+ pre_sum,P,Y1=new_sum,new_P,new_Y1
+ good+=1
+ if good==10:
+ eps*=1.1
+ good=0
+ else:
+ eps/=1.2
+ good=0
+ sys.stderr.write('Iteration #'+str(itr)+': error='+str(pre_sum)+' eps='+str(eps)+'\n')
+ return P
+
+def main(argv):
+ try:
+ #p: predict. d: data file(csv). t: iteration times
+ opts,args=getopt.getopt(argv,'p:d:t:')
+ except getopt.GetoptError,err:
+ sys.stderr.write(str(err))
+ sys.exit(2)
+ do_pred=False
+ datafile=''
+ parafile=''
+ itr_times=1000000000
+ for o,a in opts:
+ if o in ('-p'):
+ parafile=a
+ do_pred=True
+ elif o in ('-d'): datafile=a
+ elif o in ('-t'): itr_times=int(a)
+ if datafile=='':
+ sys.stderr.write('no data file\n')
+ sys.exit(1)
+ if do_pred and parafile=='':
+ sys.stderr.write('no para file for prediction\n')
+ sys.exit(1)
+ X,Y=readData(datafile)
+ sys.stderr.write('training sample size='+str(len(Y))+'\n')
+ sys.stderr.write('feature dimention='+str(len(X[0]))+'\n')
+ sys.stderr.write('itr times='+str(itr_times)+'\n')
+ if not do_pred:
+ P=doTraining(X,Y,itr_times)
+ for item in P: print '%f'%(item)
+ elif do_pred:
+ P=[]
+ for line in file(parafile):
+ P.append(float(line.strip()))
+ if len(P)!=len(X[0]):
+ sys.stderr.write('Dimension mismatched in parameter file.\n')
+ sys.exit(1)
+ Y1=prediction(X,P)
+ for i in range(len(Y1)):
+ print '%f\t%f'%(Y[i],Y1[i])
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
View
573 ML-algorithms/trainMAXENT.java
@@ -0,0 +1,573 @@
+/**
+ */
+package outfox.MAXENT_train;
+
+import outfox.MAXENT_train.Data;
+import odis.conf.ConfigUtils;
+import odis.mapred.ext.IMergeMapper;
+import odis.serialize.lib.MD5Writable;
+import odis.serialize.comparator.ByteBinaryComparator;
+import odis.mapred.lib.IdentityMapper;
+import odis.mapred.lib.IdentityReducer;
+import odis.mapred.lib.ReuseWalker;
+import java.util.logging.Level;
+import odis.serialize.lib.LongWritable;
+import odis.serialize.lib.DoubleWritable;
+import odis.mapred.IWritablePairWalker;
+import odis.serialize.IWritable;
+import odis.mapred.lib.SeqFileHashPartitioner;
+import java.io.IOException;
+import odis.serialize.IWritableComparable;
+import java.text.ParseException;
+import toolbox.misc.MD5;
+import java.util.Properties;
+import java.util.Random;
+import java.util.logging.Logger;
+import odis.tools.ToolContext;
+import odis.mapred.lib.SkipBadRecordSeqFileInputFormat;
+import odis.serialize.comparator.ByteBinaryComparator;
+import java.util.*;
+
+import odis.cowork.CounterMap.Counter;
+import odis.app.job.AbstractCoWorkToolWithArg;
+import odis.app.serialize.AbstractBeanWritable;
+import odis.cowork.JobDef;
+import odis.cowork.JobResult;
+import odis.cowork.TaskRunnable;
+import odis.io.FSDataInputStream;
+import odis.io.FileInfo;
+import odis.io.FileSystem;
+import odis.io.Path;
+import odis.mapred.AbstractMapper;
+import odis.mapred.AbstractReducer;
+import odis.mapred.ICollector;
+import odis.mapred.IWritablePairWalker;
+import odis.mapred.MapReduceJobDef;
+import odis.mapred.lib.GenericFileOutputFormat;
+import odis.mapred.lib.ReuseWalker;
+import odis.mapred.lib.StringMd5Partitioner;
+import odis.serialize.lib.StringWritable;
+import odis.serialize.lib.TimeWritable;
+import odis.serialize.lib.UTF8Writable;
+import odis.tools.MapReduceHelper;
+import toolbox.misc.LogFormatter;
+import toolbox.misc.cli.Options;
+import toolbox.wordsegment.CSegmentor;
+import toolbox.wordsegment.TextTermInfo;
+import toolbox.wordsegment.termstat.TermStatReader;
+import odis.app.job.AbstractCoWorkToolWithArg;
+import odis.cowork.JobResult;
+import odis.io.Path;
+import odis.mapred.AbstractMapper;
+import odis.mapred.AbstractReducer;
+import odis.mapred.ICollector;
+import odis.mapred.IWritablePairWalker;
+import odis.mapred.MapReduceJobDef;
+import odis.mapred.lib.ReuseWalker;
+import odis.serialize.comparator.IntBinaryComparator;
+import odis.serialize.lib.IntWritable;
+import odis.tools.MapReduceHelper;
+import odis.serialize.lib.Url;
+import odis.serialize.toolkit.AbstractMultiDynamicWritable;
+import odis.serialize.toolkit.AbstractOpObject;
+import odis.serialize.toolkit.AbstractWritableList;
+import odis.serialize.toolkit.AbstractDynamicWritable.AbstractClassIdMap;
+import odis.serialize.toolkit.AbstractDynamicWritable.IClassIdMap;
+import odis.tools.AbstractCoWorkTool;
+import odis.tools.MapReduceHelper;
+import odis.tools.ToolContext;
+import outfox.MAXENT_train.Data;
+
+public class trainMAXENT extends AbstractCoWorkToolWithArg {
+ static final int shift=8;
+ static final int mask=(1<<shift)-1;
+ private static long total_sample=0;
+ private static int max_sample_per_feature=0;
+ private double equation_precision=1e-9;
+ private String inDir;
+ private String outDir;
+ private String tmpDir;
+ private String f2sDir;
+ private String s2fDir;
+ private int itr_times;
+ private double delta_upper;
+ static final private long error_factor=1000000000;
+ private double error_upper;
+ private double sigma_upper,sigma_denom,sigma_adder;
+
+ private int reducer_num;
+ protected void prepareOptions(Options options) {
+ options.withOption("i", "input dir",
+ "request-log [TimeWritable, UTF8Writbale]");
+ // options.withOption("o", "output dir",
+ // "queries and their frequencies [StringWritable, IntWritable]")
+ // .hasDefault();
+ options.withOption("rn", "reducer number","reducer number");
+ options.withOption("f2s", "f2s output dir","");
+ options.withOption("s2f", "s2f output dir","");
+ options.withOption("start", "start phrase","");
+ options.withOption("itr", "reducer number","");
+ //options.withOption("error", "Equation error upper","");
+ }
+ int start;
+ public boolean processOptions(Options options) throws Exception {
+ itr_times=Integer.parseInt(options.getStringOpt("itr","default"));
+ //error_upper=Double.parseDouble(options.getStringOpt("error","default"));
+
+ inDir = options.getStringOpt("i","default");
+ s2fDir = options.getStringOpt("s2f", "default");
+ f2sDir = options.getStringOpt("f2s", "default");
+ reducer_num=Integer.parseInt(options.getStringOpt("rn","default"));
+ start=Integer.parseInt(options.getStringOpt("start","default"));
+ if(start>2)Data.throw_exception();
+ if(inDir.compareTo(s2fDir)==0||inDir.compareTo(f2sDir)==0||f2sDir.compareTo(s2fDir)==0)
+ {
+ System.out.println("The same inDir and outDir");
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public String comment() {
+ return "MAXENT traning";
+ }
+ private Counter itr_counter;
+ @Override
+ public boolean exec(int nWorker) throws Exception {
+
+ Path input = new Path(inDir);
+ Path s2f_output = new Path(s2fDir);
+ Path f2s_output = new Path(f2sDir);
+ assert(reducer_num>0);
+ String conf_file = ToolContext.getAppHome() + "/conf/NLP_classifier.conf";
+ HashMap<String,String> config=Data.readConfig(conf_file);
+ max_sample_per_feature=Integer.parseInt((String)config.get("max_sample_per_feature"));
+ total_sample=Long.parseLong((String)config.get("total_sample"));
+ equation_precision=Double.parseDouble((String)config.get("equation_precision"));
+ error_upper=Double.parseDouble((String)config.get("error_upper"));
+ delta_upper=Double.parseDouble((String)config.get("delta_upper"));
+ sigma_upper=Double.parseDouble((String)config.get("sigma_upper"));
+ sigma_denom=Double.parseDouble((String)config.get("sigma_denom"));
+ sigma_adder=Double.parseDouble((String)config.get("sigma_adder"));
+ System.out.println("reducer_num="+reducer_num);
+ System.out.println("max_sample_per_fea="+max_sample_per_feature);
+ System.out.println("total_sample="+total_sample);
+ System.out.println("equation_precision="+equation_precision);
+ System.out.println("start="+start);
+ System.out.println("itr_times="+itr_times);
+ System.out.println("error upper="+error_upper);
+ System.out.println("delta upper="+delta_upper);
+ if(start<=0)
+ {
+ long start_time=new Date().getTime()/1000;
+ System.out.println("This is phrase #0");
+ FileSystem fs = context.getFileSystem();
+ MapReduceJobDef job = context.createMapReduceJob(this.getToolName(),nWorker);
+ MapReduceHelper helper = new MapReduceHelper(context, job);
+ helper.addReadInputDir(input,SkipBadRecordSeqFileInputFormat.class);
+ job.setCheckReduceProgress(false);
+ job.setMapper(Mapper.class);
+ job.setMapNumber(nWorker * mParallel);
+ job.setMergeKeyValClass(Data.feature_t.class, Data.sample_t.class);
+ job.setPartitionerClass(Data.feature_partitioner.class);
+ job.setReducer(Reducer.class);
+ job.setWalkerClass(ReuseWalker.class);
+ job.setReduceNumber(reducer_num);
+ helper.addDirectOutputDir(0, s2f_output, Data.sample_t.class,Data.feature_t.class, null);
+ job.getConfig().setLong("total_sample",total_sample);
+ job.getConfig().setLong("max_sample_per_feature",max_sample_per_feature);
+ job.getConfig().setDouble("equation_precision",equation_precision);
+ JobResult result = helper.runJob(context.getCoWork());
+ if (!result.isSuccess()) return false;
+ if (result.isSuccess()) {
+ helper.printMessages(out, result.getMsg());
+ helper.printCounters(out, result.getCounters());
+ }
+ System.out.println("Phrase #0 took "+(new Date().getTime()/1000-start_time)+" seconds");
+ }
+ int max_sample_per_fea=max_sample_per_feature;
+ System.out.println("reducer_num="+reducer_num);
+ for(int i=0;i<itr_times;i++)
+ {
+ F2S_Reducer.itr_num=i+1;
+ System.out.println("This is iteration #"+i);
+ if(start<=1)
+ {
+ long start_time=new Date().getTime()/1000;
+ System.out.println("This is phrase #1");
+ System.out.println("Start calculating sample probability from s2f file and will generate f2s file");
+ System.out.println("max_sample_per_fea="+max_sample_per_feature);
+ FileSystem fs = context.getFileSystem();
+ MapReduceJobDef job = context.createMapReduceJob(this.getToolName(),nWorker);
+ job.getConfig().setLong("total_sample",total_sample);
+ job.getConfig().setLong("max_sample_per_feature",max_sample_per_fea);
+ job.getConfig().setDouble("equation_precision",equation_precision);
+ MapReduceHelper helper = new MapReduceHelper(context, job);
+ helper.addReadInputDir(s2f_output,SkipBadRecordSeqFileInputFormat.class);
+ job.setCheckReduceProgress(false);
+ job.setMapper(IdentityMapper.class);
+ job.setMapNumber(nWorker * mParallel);
+ job.setMergeKeyValClass(Data.sample_t.class, Data.feature_t.class);
+ job.setPartitionerClass(Data.sample_partitioner.class);
+ job.setReducer(S2F_Reducer.class);
+ job.setWalkerClass(ReuseWalker.class);
+ job.setReduceNumber(reducer_num);
+ helper.addDirectOutputDir(0, f2s_output, Data.feature_t.class,Data.sample_t.class, null);
+ JobResult result = helper.runJob(context.getCoWork());
+ if (!result.isSuccess()) return false;
+ if (result.isSuccess()) {
+ helper.printMessages(out, result.getMsg());
+ helper.printCounters(out, result.getCounters());
+ }
+ System.out.println("Finished calculating sample probability from s2f file.");
+ System.out.println("Phrase #1 took "+(new Date().getTime()/1000-start_time)+" seconds");
+ }
+ boolean end=false;
+ if(start<=2)
+ {
+ long start_time=new Date().getTime()/1000;
+ System.out.println("This is phrase #2");
+ System.out.println("Start updating parameters from f2s file and will generate s2f file.");
+ System.out.println("max_sample_per_fea="+max_sample_per_fea);
+ FileSystem fs = context.getFileSystem();
+ MapReduceJobDef job = context.createMapReduceJob(this.getToolName(),nWorker);
+ job.getConfig().setLong("total_sample",total_sample);
+ job.getConfig().setLong("max_sample_per_feature",max_sample_per_fea);
+ job.getConfig().setDouble("equation_precision",equation_precision);
+ job.getConfig().setDouble("sigma_upper",sigma_upper);
+ job.getConfig().setDouble("sigma_denom",sigma_denom);
+ job.getConfig().setDouble("sigma_adder",sigma_adder);
+ MapReduceHelper helper = new MapReduceHelper(context, job);
+ helper.addReadInputDir(f2s_output,SkipBadRecordSeqFileInputFormat.class);
+ job.setCheckReduceProgress(false);
+ job.setMapper(IdentityMapper.class);
+ job.setMapNumber(nWorker * mParallel);
+ job.setMergeKeyValClass(Data.feature_t.class, Data.sample_t.class);
+ job.setPartitionerClass(Data.feature_partitioner.class);
+ job.setReducer(F2S_Reducer.class);
+ job.setWalkerClass(ReuseWalker.class);
+ job.setReduceNumber(reducer_num);
+ helper.addDirectOutputDir(0, s2f_output, Data.sample_t.class,Data.feature_t.class, null);
+ JobResult result = helper.runJob(context.getCoWork());
+ if (!result.isSuccess()) return false;
+ if (result.isSuccess()) {
+ helper.printMessages(out, result.getMsg());
+ helper.printCounters(out, result.getCounters());
+ }
+ long delta_sum= result.getCounters()[1].get("delta_sum").get();
+ long Newton_times= result.getCounters()[1].get("Newton_times").get();
+ System.out.println("Newton_times="+Newton_times+" at Iteration#"+(i+1));
+ long feature_counter= result.getCounters()[1].get("feature_counter").get();
+ System.out.println("feature_counter="+feature_counter+" at Iteration#"+(i+1));
+ double max_equation_error=0.0;
+ for(int idx=0;idx<reducer_num;idx++)
+ {
+ long error=result.getCounters()[1].get("equation_error_"+idx).get();
+ max_equation_error=Math.max(1.0*error/error_factor,max_equation_error);
+ }
+ System.out.println("max equation error="+max_equation_error);
+ double average_delta=1.0*delta_sum/1000/feature_counter/2;
+ System.out.println("Average delta="+average_delta);
+ long error_sum=result.getCounters()[1].get("equation_error_sum").get();
+ double average_error=1.0*error_sum/error_factor/feature_counter/2;
+ System.out.println("Average error="+average_error);
+ end=average_error<error_upper||average_delta<delta_upper;
+ System.out.println("Phrase #2 took "+(new Date().getTime()/1000-start_time)+" seconds");
+ }
+ System.out.println("Finished updating parameters from f2s file.");
+ System.out.println("Finished iteration #"+(i+1));
+ start=-1;
+ if(end)
+ {
+ System.out.println("Succeeded to reach ending condition with delta_upper="+delta_upper);
+ break;
+ }
+ }
+ return true;
+ }
+
+ public static class S2F_Reducer extends AbstractReducer<Data.sample_t, Data.feature_t> {
+ Data.sample_t sam;
+ Data.feature_t fea;
+ Data.feature_list flist=new Data.feature_list();
+ Counter sample_counter,line_counter;
+ public void configure(JobDef job, TaskRunnable task) {
+ sam=new Data.sample_t();
+ fea=new Data.feature_t();
+ sample_counter=task.getCounter("sample_counter");
+ line_counter=task.getCounter("line_counter");
+ }
+ public void reduce(Data.sample_t sample,IWritablePairWalker<Data.sample_t, Data.feature_t> values,
+ ICollector collector)
+ {
+ flist.clear();
+ double pos=0,neg=0;
+ sample_counter.inc(sample.portion.get());
+ line_counter.inc();
+ while(values.moreValue())
+ {
+ Data.feature_t f=values.getValue();
+ pos+=f.lam_pos.get();
+ neg+=f.lam_neg.get();
+ flist.add(new Data.feature_t(f));
+ }
+ sample.positive_prob.set(1.0/(1.0+Math.exp(neg-pos)));
+ if((!(sample.positive_prob.get()>0.0)) && (!(sample.positive_prob.get()<1.0)))
+ {
+ System.out.println(neg+" "+pos);
+ Data.throw_exception();
+ }
+ for(int i=0;i<flist.size();i++)
+ collector.collect(flist.get(i),sample);
+ }
+ public void reduceEnd(ICollector collector) {}
+ }
+ public static class F2S_Reducer extends AbstractReducer<Data.feature_t, Data.sample_t>
+ {
+ static public int itr_num;
+ static boolean update_para;
+ Counter delta_sum,Newton_times,reducer_num_counter;
+ Data.feature_t feature;
+ Data.sample_t sample;
+ double positive_prob[];
+ int hit_portion[];
+ int sid[];
+ private Counter itr_counter,binary_counter;
+ Counter feature_counter;
+ int max_sample_per_fea;
+ long total_sample;
+ double equation_precision;
+ private Counter equation_error_counter;
+ private Counter equation_error_sum_counter;
+ double max_equation_error;
+ double sigma_upper,sigma_denom,sigma_adder;
+ public void configure(JobDef job, TaskRunnable task)
+ {
+ max_sample_per_fea=(int)job.getConfig().getLong("max_sample_per_feature");
+ equation_precision=job.getConfig().getDouble("equation_precision");
+ sigma_upper=job.getConfig().getDouble("sigma_upper");
+ sigma_denom=job.getConfig().getDouble("sigma_denom");
+ sigma_adder=job.getConfig().getDouble("sigma_adder");
+ total_sample=job.getConfig().getLong("total_sample");
+ delta_sum=task.getCounter("delta_sum");
+ Newton_times=task.getCounter("Newton_times");
+ reducer_num_counter=task.getCounter("reducer_num_counter");
+ feature_counter=task.getCounter("feature_counter");
+ binary_counter=task.getCounter("binary_counter");
+ feature=new Data.feature_t();
+ sample=new Data.sample_t();
+ positive_prob=new double[max_sample_per_fea];
+ hit_portion=new int[max_sample_per_fea];
+ sid=new int[max_sample_per_fea];
+ reducer_num_counter.inc();
+ long partIdx=task.getPartIdx();
+ equation_error_counter=task.getCounter("equation_error_"+partIdx);
+ equation_error_sum_counter=task.getCounter("equation_error_sum");
+ max_equation_error=0.0;
+ System.out.println("max_sample_per_fea="+max_sample_per_fea);
+ System.out.println("equation_precision="+equation_precision);
+ System.out.println("sigma_upper="+sigma_upper);
+ System.out.println("sigma_denom="+sigma_denom);
+ System.out.println("sigma_adder="+sigma_adder);
+ }
+ private double solve_equation(double aa[],int bb[],double target,int n,double extra_linear_cof)
+ {
+ if(n<=0)Data.throw_exception();
+ //直接以初值0作Newdon迭代
+ double x=0.0,y;
+ int times=0;
+ for(;;)
+ {
+ double dy=0.0;
+ y=0.0;
+ for(int i=0;i<n;i++)
+ {
+ double add=aa[i]*Math.exp(x*(bb[i]&mask));
+ if(Data.isNaN(add))
+ {
+ System.out.println(x+" "+(bb[i]&mask));
+ Data.checkNaN(add);
+ }
+ y+=add;
+ dy+=(bb[i]&mask)*add;
+ //System.out.println("aa[i]="+aa[i]+" bb[i]="+(bb[i]&mask)+" x="+x+" add="+add+" dy_add="+((bb[i]&mask)*add));
+ }
+ y+=extra_linear_cof*x-target;
+ dy+=extra_linear_cof;
+ x-=y/dy;
+ Newton_times.inc();
+ times++;
+ if(Math.abs(y)/target<equation_precision)break;
+ //System.out.println("x="+x+" y="+y+" dy="+dy);
+ }
+// LOG.log(Level.WARNING, "Iteration #"+itr_num+" times="+times+" y="+y+" target="+target);
+ System.out.println("target="+target+" x="+x+" y="+y+" precision="+Math.abs(y)/target);
+ if((!(x<1.0))&&(!(x>0.0)))
+ {
+ System.out.println("NaN x="+x+" times="+times);
+ System.out.println("target="+target+" x="+x+" y="+y);
+ for(int i=0;i<n;i++)
+ {
+ System.out.print(aa[i]+","+(bb[i]&mask)+" ");
+ }
+ System.out.println("");
+ Data.throw_exception();
+ }
+ return x;
+ }
+ public void reduce(Data.feature_t feature,IWritablePairWalker<Data.feature_t, Data.sample_t> values,
+ ICollector collector)
+ {
+ int n=0;
+ feature_counter.inc();
+ while(values.moreValue())
+ {
+ Data.sample_t sam=values.getValue();
+ if(n>=hit_portion.length)Data.throw_exception();
+ positive_prob[n]=(double)sam.positive_prob.get();
+ sid[n]=sam.sid.get();
+ if(sam.portion.get()>(1<<(32-shift))||sam.hit.get()>mask)Data.throw_exception();
+ hit_portion[n++]=(sam.portion.get()<<shift)^sam.hit.get();
+ }
+ final double sigma=sigma_upper/(1+Math.exp(-Math.sqrt(feature.coverage.get())/sigma_denom+sigma_adder));
+ System.out.println("feature #"+feature.fid.toString()+" sigma="+sigma);
+ double pos_error=0.0,neg_error=0.0;
+ for(int i=0;i<n;i++)
+ {
+ positive_prob[i]=(double)(1-positive_prob[i])*(hit_portion[i]>>shift)/total_sample;
+ neg_error+=positive_prob[i];
+ }
+ //加上正规化项
+ double dneg=solve_equation(positive_prob,hit_portion,
+ feature.exp_neg.get()-feature.lam_neg.get()/sigma/sigma,n,1.0/sigma/sigma);
+ for(int i=0;i<n;i++)
+ {
+ positive_prob[i]=(double)(1.0*(hit_portion[i]>>shift)/total_sample-positive_prob[i]);
+ pos_error+=positive_prob[i];
+ }
+ //加上正规化项
+ double dpos=solve_equation(positive_prob,hit_portion,
+ feature.exp_pos.get()-feature.lam_pos.get()/sigma/sigma,n,1.0/sigma/sigma);
+ System.out.println("feature #"+feature.fid.toString()+": "+dpos+","+dneg);
+ if(Math.abs(dneg)>1e20||Math.abs(dpos)>1e20)
+ {
+ System.out.println(dpos+","+dneg);
+ Data.throw_exception();
+ }
+ long add=(long)((Math.abs(dpos)+Math.abs(dneg))*1000);
+ delta_sum.inc(add);
+ feature.lam_pos.set(feature.lam_pos.get()+dpos);
+ feature.lam_neg.set(feature.lam_neg.get()+dneg);
+ for(int i=0;i<n;i++)
+ {
+ sample.sid.set(sid[i]);
+ sample.portion.set(hit_portion[i]>>shift);
+ sample.positive_prob.set(positive_prob[i]*total_sample/(hit_portion[i]>>shift));
+ sample.hit.set(hit_portion[i]&mask);
+ collector.collect(sample,feature);
+ }
+ pos_error=Math.abs(pos_error-feature.exp_pos.get()+feature.lam_pos.get()/sigma/sigma)
+ /feature.exp_pos.get();
+ neg_error=Math.abs(neg_error-feature.exp_neg.get()+feature.lam_pos.get()/sigma/sigma)
+ /feature.exp_neg.get();
+ max_equation_error=Math.max(pos_error,max_equation_error);
+ max_equation_error=Math.max(neg_error,max_equation_error);
+ equation_error_sum_counter.inc((long)((pos_error+neg_error)*error_factor));
+ }
+ public void reduceEnd(ICollector collector)
+ {
+ equation_error_counter.inc((long)(error_factor*max_equation_error));
+ }
+ }
+
+
+
+ public static class Mapper extends
+ AbstractMapper<Data.sample_t, Data.feature_t> {
+
+ public void configure(JobDef job, TaskRunnable task) {
+ }
+
+ public void map(Data.sample_t key, Data.feature_t feature,ICollector collector)
+ {
+ collector.collect(feature,key);
+ }
+ }
+
+ public static class Reducer extends AbstractReducer<Data.feature_t, Data.sample_t> {
+
+ private TaskRunnable p_task;
+ int sid[];
+ int hit_portion[];
+ double positive_prob[];
+ Counter feature_counter,sample_counter,line_counter;
+ int max_sample_per_feature,max_sample_per_fea;
+ long total_sample;
+ public void configure(JobDef job, TaskRunnable task) {
+ max_sample_per_fea=max_sample_per_feature=
+ (int)(job.getConfig().getLong("max_sample_per_feature"));
+ total_sample=(job.getConfig().getLong("total_sample"));
+ if(total_sample<=0)Data.throw_exception();
+ feature_counter=task.getCounter("feature_counter");
+ sample_counter=task.getCounter("sample_counter");
+ line_counter=task.getCounter("line_counter");
+ sid=new int[max_sample_per_feature];
+ hit_portion=new int[max_sample_per_feature];
+ positive_prob=new double[max_sample_per_feature];
+ System.out.println("max_sample_per_fea="+max_sample_per_fea);
+ System.out.println("total_sample="+total_sample);
+ }
+ public void reduce(Data.feature_t feature,IWritablePairWalker<Data.feature_t, Data.sample_t> samples,
+ ICollector collector)
+ {
+ double pos=0.0,neg=0.0;
+ int top=0;
+ feature_counter.inc();
+ while(samples.moreValue())
+ {
+ Data.sample_t sam=samples.getValue();
+ if(top>=max_sample_per_fea)Data.throw_exception();
+ if(sam.portion.get()>=((1<<(32-shift)))||sam.hit.get()>mask)Data.throw_exception();
+ sid[top]=(int)sam.sid.get();
+ hit_portion[top]=((int)sam.hit.get())^((int)sam.portion.get()<<shift);
+ top++;
+ //这里的positive_prob并不是概率,而是平滑后的正例个数
+ pos+=sam.positive_prob.get();
+ neg+=sam.portion.get()-sam.positive_prob.get();
+ if(sam.positive_prob.get()<0.0||sam.portion.get()+1e-10<sam.positive_prob.get())
+ {
+ System.out.println(sam.toString());
+ Data.throw_exception();
+ }
+ }
+ pos/=total_sample;
+ neg/=total_sample;
+ feature.exp_pos.set(pos);
+ feature.exp_neg.set(neg);
+ Data.checkNaN(pos);
+ Data.checkNaN(neg);
+ if(pos<1e-30||neg<1e-30)
+ {
+ System.out.println("pos="+pos+" neg="+neg);
+ Data.throw_exception();
+ }
+ feature.lam_pos.set(0.0);
+ feature.lam_neg.set(0.0);
+ for(int i=0;i<top;i++)
+ {
+ Data.sample_t sample=new Data.sample_t();
+ sample.sid.set(sid[i]);
+ sample.hit.set(hit_portion[i]&mask);
+ sample.positive_prob.set(0.5);
+ sample.portion.set(hit_portion[i]>>shift);
+ collector.collect(sample,feature);
+ }
+ }
+ public void reduceEnd(ICollector collector) {}
+ }
+ public static void main(String args[])
+ {
+ System.out.println(Math.exp(-100000));
+ }
+}
View
434 Misc/Huffman.cpp
@@ -0,0 +1,434 @@
+/***************************************************************************
+ *
+ * Copyright (c) 2009 Baidu.com, Inc. All Rights Reserved
+ * $Id: Huffman.cpp,v 1.1.2.1 2009/09/10 06:29:33 zhuhongcheng Exp $
+ *
+ **************************************************************************/
+
+
+
+/**
+ * @file Huffman.cpp
+ * @author Zhu Hongcheng
+ * @date 2009/08/31 10:31
+ * @version $Revision: 1.1.2.1 $
+ **/
+
+#include <algorithm>
+#include <cstdio>
+#include <cstring>
+#include <cstdlib>
+#include <stdint.h>
+#include <new>
+//#include <iostream>
+
+
+#include <cstdio>
+#include <cstring>
+#include <cstdlib>
+#include <stdint.h>
+#include <new>
+//#define NDEBUG
+#include <assert.h>
+
+
+
+
+namespace Huffman
+{
+#define com_writelog fprintf
+#define COMLOG_FATAL stderr
+#define COMLOG_WARNING stderr
+#define COMLOG_DEBUG stderr
+#define TWO(x) (((uint32_t)1)<<(x))
+
+#define MASK(x) ((TWO(x))-1)
+
+typedef uint32_t uint;
+
+typedef unsigned long long int uint64;
+
+const int MAX_HUFFMAN_SIZE=300000;
+// Here normalized Huffman encoding is used
+class Huffman_encode_table_t
+{
+ int capacity;
+public:
+ int n;
+ uint *code;
+ uint *length;
+ int *map_table;
+ int resize(int);
+ Huffman_encode_table_t():capacity(0),n(0),code(NULL),length(NULL),map_table(NULL){};
+ int create(const uint frequency[],int n,const int map_t[]=NULL);
+ int serialization(FILE* out_file)const;
+ int deserialization(FILE* in_file);
+ int get_code(int idx,uint& out_code,uint& out_len)const;
+ ~Huffman_encode_table_t();
+ friend class Huffman_decode_table_t;
+};
+
+class Huffman_decode_table_t
+{
+ int capacity;
+public:
+ int n;
+ uint *offset;
+ int *map_table;
+ uint *first_code;
+ uint min_code_length;
+ int resize(int);
+ int serialization(FILE* out_file)const;
+ int deserialization(FILE* in_file);
+ Huffman_decode_table_t():capacity(0),n(0),offset(NULL),map_table(NULL),first_code(NULL),min_code_length(0){};
+ int create(const Huffman_encode_table_t& encode_table);
+ int Huffman_decode_nitem(const uint32_t[],const uint64,uint64&,int [],int )const;
+ ~Huffman_decode_table_t();
+};
+};
+
+
+namespace Huffman
+{
+
+#define Delete_array(x) if(NULL!=(x))delete [] (x);
+Huffman_encode_table_t::~Huffman_encode_table_t()
+{
+ Delete_array(code);
+ Delete_array(length);
+ Delete_array(map_table);
+}
+int Huffman_encode_table_t::resize(int size)
+{
+ if(capacity>=size) return 0;
+ Delete_array(code);
+ Delete_array(length);
+ Delete_array(map_table);
+ try
+ {
+ capacity=2*size;
+ n=size;
+ code=new uint[capacity];
+ length=new uint[capacity];
+ map_table=new int[capacity];
+ }
+ catch(std::bad_alloc)
+ {
+ Delete_array(code);
+ Delete_array(length);
+ Delete_array(map_table);
+ Huffman_encode_table_t();
+ com_writelog(COMLOG_FATAL,"Alolcating memory for Huffman_encode_table_t failed");
+ return -1;
+ }
+ return 0;
+}
+int Huffman_encode_table_t::get_code(int idx,uint& out_code,uint& out_len)const
+{
+ if(idx<0||idx>=n) {
+ com_writelog(COMLOG_FATAL,"[idx:%d] does not in the range [%d,%d] in Huffman_encode_table_t::get_code",idx,0,n-1);
+ return -1;
+ }
+ out_code=code[idx];
+ out_len=length[idx];
+ return 0;
+}
+Huffman_decode_table_t::~Huffman_decode_table_t()
+{
+ Delete_array(offset);
+ Delete_array(first_code);
+ Delete_array(map_table);
+}
+int Huffman_decode_table_t::resize(int size)
+{
+ if(capacity>=size) return 0;
+ Delete_array(offset);
+ Delete_array(map_table);
+ Delete_array(first_code);
+ try
+ {
+ capacity=2*size;
+ n=size;
+ offset=new uint[capacity];
+ first_code=new uint[capacity];
+ map_table=new int[capacity];
+ }
+ catch(std::bad_alloc)
+ {
+ Delete_array(offset);
+ Delete_array(first_code);
+ Delete_array(map_table);
+ Huffman_decode_table_t();
+ com_writelog(COMLOG_FATAL,"Alolcating memory for Huffman_decode_table_t failed");
+ return -1;
+ }
+ return 0;
+}
+
+
+
+/*
+ * A linear time counting sort
+*/
+void counting_sort(uint count_array[],uint32_t count_size,uint item[],uint32_t value[],int n,uint buffer[])
+{
+ memset(count_array,0,sizeof(uint32_t)*count_size);
+ for(int i=0;i<n;i++) {
+ assert(value[item[i]]<count_size);
+ count_array[value[item[i]]]++;
+ }
+ for(uint32_t i=1;i<count_size;i++) {
+ count_array[i]+=count_array[i-1];
+ }
+ for(int i=n-1;i>=0;i--) {
+ buffer[--count_array[value[item[i]]]]=item[i];
+ }
+ memcpy(item,buffer,sizeof(int)*n);
+}
+
+
+
+int Huffman_encode_table_t::create(const uint frequency[],int fn,const int map_t[])
+{
+ if(fn<=0) {
+ com_writelog(COMLOG_FATAL,"Illegal Huffman size [n:%d]",fn);
+ return -1;
+ }
+ if(0>resize(fn)) {
+ com_writelog(COMLOG_FATAL,"resize failed for Huffman_encode_table_t");
+ return -1;
+ }
+ if(NULL!=map_t) {
+ memcpy(map_table,map_t,sizeof(int)*n);
+ }
+ if(n==1)
+ {
+ length[0]=code[0]=0;
+ return 0;
+ }
+ const int radix_length=16;
+ uint *buf1=NULL,*buf2=NULL,*buf3=NULL,*buf4=NULL,*count_array=NULL;
+ try
+ {
+ count_array=new uint[std::max(TWO(16),(uint)n)];
+ buf1=new uint[2*n-1];
+ buf2=new uint[2*n-1];
+ buf3=new uint[2*n-1];
+ buf4=new uint[2*n-1];
+ }
+ catch(std::bad_alloc)
+ {
+ Delete_array(count_array);
+ Delete_array(buf1);
+ Delete_array(buf2);
+ Delete_array(buf3);
+ Delete_array(buf4);
+ com_writelog(COMLOG_FATAL,"Allocating buffers for Huffman_encode_table_t::create failed");
+ return -1;
+ }
+ uint* item=buf1;
+ uint* value=buf2;
+
+ for(int i=0;i<n;i++) {
+ item[i]=i;
+ value[i]=frequency[i]&MASK(radix_length);
+ }
+
+ counting_sort(count_array,TWO(radix_length),item,value,n,buf3);
+
+ for(int i=0;i<n;i++) {
+ value[i]=frequency[i]>>radix_length;
+ }
+
+ counting_sort(count_array,TWO(radix_length),item,value,n,buf3);
+
+
+ uint* const zero_branch=buf3;
+ uint* const one_branch=buf4;
+
+#ifndef NDEBUG
+ for(int i=1;i<n;i++)
+ assert(frequency[item[i]]>=frequency[item[i-1]]);
+#endif
+ for(int i=0;i<n;i++) {
+ value[n-1+i]=frequency[i];
+ item[i]+=n-1;
+ }
+
+ int top=n-2;
+ int head1=0,tail1=0,head=0;
+ while(tail1-head1+n-head>1) {
+ int a,b;
+ if(head1==tail1)a=item[head++];
+ else if(head==n)a=item[head1++];
+ else if(value[item[head]]<=value[item[head1]])a=item[head++];
+ else a=item[head1++];
+
+ if(head1==tail1)b=item[head++];
+ else if(head==n)b=item[head1++];
+ else if(value[item[head]]<=value[item[head1]])b=item[head++];
+ else b=item[head1++];
+
+ assert(top>=0);
+ value[top]=value[a]+value[b];
+ zero_branch[top]=a;
+ one_branch[top]=b;
+ assert(tail1<head);
+ item[tail1++]=top;
+
+ top--;
+ }
+ assert(top==-1);
+
+
+ uint32_t *depth=buf2;
+
+ depth[0]=0;
+ for(int i=0;i<n-1;i++) {
+ depth[zero_branch[i]]=depth[one_branch[i]]=depth[i]+1;
+ }
+ // sort by code length
+ for(int i=0;i<n;i++) {
+ item[i]=i;
+ }
+ counting_sort(count_array,n,item,depth+n-1,n,buf3);
+
+ code[item[n-1]]=0;
+ length[item[n-1]]=depth[item[n-1]+n-1];
+// std::cerr<<item[n-1]<<":"<<map_table[item[n-1]]<<" "<<code[item[n-1]]<<" "<<length[item[n-1]]<<std::endl;
+
+ for(int i=n-2;i>=0;i--) {
+ length[item[i]]=depth[item[i]+n-1];
+ if(length[item[i]]==length[item[i+1]]) {
+ code[item[i]]=code[item[i+1]]+1;
+ }
+ else {
+ assert(length[item[i]]<length[item[i+1]]);
+ code[item[i]]=(code[item[i+1]]+1)>>(length[item[i+1]]-length[item[i]]);
+ }
+ assert(code[item[i]]<n);
+ assert(length[item[i]]<n);
+ assert(length[item[i]]>=32||code[item[i]]<TWO(length[item[i]]));
+// std::cerr<<item[i]<<":"<<map_table[item[i]]<<" "<<code[item[i]]<<" "<<length[item[i]]<<std::endl;
+ }
+ delete [] count_array;
+ delete [] buf1;
+ delete [] buf2;
+ delete [] buf3;
+ delete [] buf4;
+ com_writelog(COMLOG_DEBUG,"Huffman_encode_table_t::create succeeded:)");
+ return 0;
+}
+
+int Huffman_decode_table_t::create(const Huffman_encode_table_t& encode_table)
+{
+ const uint* length=encode_table.length;
+ const uint* code=encode_table.code;
+ if(0>resize(encode_table.n)) {
+ com_writelog(COMLOG_FATAL,"resize for Huffman_decode_table_t failed");
+ return -1;
+ }
+ min_code_length=*std::min_element(length,length+n);
+ //std::cerr<<"min_code_length="<<min_code_length<<std::endl;
+ assert(*std::max_element(length,length+n)<n);
+ uint* const count_array=new uint[n];
+
+ if(NULL==(count_array)) {
+ com_writelog(COMLOG_FATAL,"Allocating buffer for Huffman_decode_table_t::create");
+ return -1;
+ }
+ memset(count_array,0,sizeof(uint)*n);
+
+ for(int i=0;i<n;i++) {
+ count_array[length[i]]++;
+ }
+ offset[n-1]=first_code[n-1]=0;
+ for(int i=n-2;i>=0;i--) {
+ first_code[i]=(first_code[i+1]+count_array[i+1])>>1;
+ offset[i]=count_array[i+1]+offset[i+1];
+ }
+
+ for(int i=0;i<n;i++) {
+ uint len=length[i];
+ map_table[offset[len]+code[i]-first_code[len]]=encode_table.map_table[i];
+ }
+#ifndef NDEBUG
+// puts("map_table");
+// for(int i=0;i<n;i++)printf("%d ",map_table[i]);
+// puts("");
+// for(int i=n-1;i;i--)
+// printf("len=%d first_code=%u offset=%u\n",i,first_code[i],offset[i]);
+#endif
+ delete [] count_array;
+ return 0;
+}
+int Huffman_decode_table_t::serialization(FILE* out_file)const
+{
+ if(1!=fwrite(&n,sizeof(n),1,out_file)||
+ (uint)n!=fwrite(map_table,sizeof(map_table[0]),n,out_file)||
+ 1!=fwrite(&min_code_length,sizeof(min_code_length),1,out_file)||
+ (uint)n!=fwrite(first_code,sizeof(first_code[0]),n,out_file)||
+ (uint)n!=fwrite(offset,sizeof(offset[0]),n,out_file)) {
+ com_writelog(COMLOG_FATAL,"Huffman_decode_table_t::serialization writing file failed");
+ return -1;
+ }
+ return 0;
+}
+int Huffman_decode_table_t::deserialization(FILE* in_file)
+{
+ if(1!=fread(&n,sizeof(n),1,in_file)) {
+ com_writelog(COMLOG_FATAL,"Huffman_decode_table_t::serialization writing file failed");
+ return -1;
+ }
+ if(0>resize(n)) {
+ com_writelog(COMLOG_FATAL,"resize in Huffman_decode_table_t::deserialization failed");
+ return -1;
+ }
+ if((uint)n!=fread(map_table,sizeof(map_table[0]),n,in_file)||
+ 1!=fread(&min_code_length,sizeof(min_code_length),1,in_file)||
+ (uint)n!=fread(first_code,sizeof(first_code[0]),n,in_file)||
+ (uint)n!=fread(offset,sizeof(offset[0]),n,in_file)) {
+ com_writelog(COMLOG_FATAL,"Huffman_decode_table_t::serialization writing file failed");
+ return -1;
+ }
+ //std::cerr<<"min_code_length="<<min_code_length<<std::endl;
+ return 0;
+
+}
+
+
+int Huffman_decode_table_t::Huffman_decode_nitem(const uint32_t bit_stream[],
+ const uint64 bit_length,uint64& bit_offset,
+ int decoded_items[],int maximum_items)const
+{
+ int res=0;
+ while(res<maximum_items) {
+ uint code=0;
+ for(uint i=0;i<min_code_length;i++) {
+ code<<=1;
+ code^=((bit_stream[bit_offset>>5])>>(bit_offset&0x1f))&1;
+ bit_offset++;
+ }
+ uint len;
+ for(len=min_code_length;code<first_code[len];len++) {
+ code<<=1;
+ code^=((bit_stream[bit_offset>>5])>>(bit_offset&0x1f))&1;
+ bit_offset++;
+ }
+ if(bit_offset>bit_length) {
+ com_writelog(COMLOG_FATAL,"code boundary exceeds [bit_length:%llu] [offset%llu] [code%u]",bit_length,bit_offset,code);
+ return -1;
+ }
+ if(code>=(uint)n) {
+ com_writelog(COMLOG_FATAL, "Illegal Huffman code [code:%u] [n:%d]",code,n);
+ return -1;
+ }
+ decoded_items[res++]=map_table[offset[len]+(code-first_code[len])];
+ }
+ return res;
+}
+};
+int main()
+{
+
+}
View
72 Misc/LongestCommonSubsequence.cpp
@@ -0,0 +1,72 @@
+/*
+ *An implementation of the algorithm in the paper
+ "A new flexible algorithm for the longest common subsequence problem"
+ *The time complexity is O(min(pm,p(n-p))) where m and n (m<=n) are lengths of two strings , and p
+ is the length of LCS.
+ *
+ */
+#include <stdio.h>
+#include <assert.h>
+#include <memory.h>
+#include <algorithm>
+using namespace std;
+namespace LCS
+{
+#define N 20010
+#define S 100
+int rowMatch[N+2],colMatch[N+2];
+int nextPos1[N+2][S],nextPos2[N+2][S];
+void preprocess(int str1[],int n,int str2[],int m,int s=S)
+{
+ for(int i=0;i<s;i++)
+ nextPos1[n+1][i]=nextPos1[n][i]=n,nextPos2[m+1][i]=nextPos2[m][i]=m;
+ for(int i=n-1;i>=0;i--)
+ {
+ memcpy(nextPos1[i],nextPos1[i+1],sizeof(int)*s);
+ nextPos1[i][str1[i]]=i;
+ }
+ for(int i=m-1;i>=0;i--)
+ {
+ memcpy(nextPos2[i],nextPos2[i+1],sizeof(int)*s);
+ nextPos2[i][str2[i]]=i;
+ }
+}
+int longestCommonSubsequence(int str1[],int n,int str2[],int m)
+{
+ if(n>m)swap(str1,str2),swap(n,m);
+ const int s=1+max(*max_element(str1,str1+n),*max_element(str2,str2+m));
+ preprocess(str1,n,str2,m,s);
+ colMatch[0]=rowMatch[0]=-1;
+ for(int i=1;i<=n+1;i++)
+ rowMatch[i]=m,colMatch[i]=n;
+ int low=1;
+ for(int i=0;i<n;i++)
+ {
+ int r,c;
+ if(colMatch[low]==i)c=nextPos2[rowMatch[low++]+1][str1[i]];
+ else c=nextPos2[i][str1[i]];
+ for(int k=low;c<m;k++)
+ {
+ int tmp=rowMatch[k];
+ if(tmp>=c)
+ {
+ rowMatch[k]=c;
+ c=nextPos2[tmp+1][str1[i]];
+ }
+ }
+ if(rowMatch[low]==i)r=nextPos1[colMatch[low++]+1][str2[i]];
+ else r=nextPos1[i+1][str2[i]];
+ for(int k=low;r<n;k++)
+ {
+ int tmp=colMatch[k];
+ if(tmp>=r)
+ {
+ colMatch[k]=r;
+ r=nextPos1[tmp+1][str2[i]];
+ }
+ }
+ }
+ while(rowMatch[low]<m)low++;
+ return low-1;
+}
+};
View
106 Misc/file_reader.cpp
@@ -0,0 +1,106 @@
+#include "common_header_zhc.h"
+#include <algorithm>
+#include <new>
+#include <stdint.h>
+
+
+class file_line_reader_t
+{
+ char *m_current_buffer;
+ uint32_t m_buffer_size;
+ FILE* m_file;
+ uint32_t m_pos;
+ int m_fileno;
+ static const char split='\n';
+ long long m_file_size,m_rem_block,m_offset;
+public:
+ file_line_reader_t():m_current_buffer(NULL),m_file(NULL),m_pos(0),m_file_size(0),m_rem_block(0){};
+ int create(uint32_t buffer_size,FILE* in_file)
+ {
+ if(in_file==NULL) {
+ WRITE_LOG(UL_LOG_FATAL,"NULL file pointer");
+ return -1;
+ }
+ m_file=in_file;
+ m_offset=0;
+
+ struct stat st;
+ if(fstat(fileno(m_file),&st)) {
+ WRITE_LOG(UL_LOG_FATAL,"fstat failed");
+ return -1;
+ }
+ m_fileno=fileno(in_file);
+ m_file_size = st.st_size;
+ m_buffer_size=std::min(m_file_size,(long long)buffer_size);
+ try
+ {
+ m_current_buffer=new char[m_buffer_size+1];
+ }
+ catch(std::bad_alloc)
+ {
+ WRITE_LOG(UL_LOG_FATAL,"new buffer failed");
+ return -1;
+ }
+ m_rem_block=m_file_size/m_buffer_size;
+ m_pos=m_buffer_size;
+ m_current_buffer[m_pos]=split;
+ if(m_file_size%m_buffer_size) {
+ uint32_t read_byte=(m_file_size%m_buffer_size);
+ if(read_byte!=pread(m_fileno,m_current_buffer+m_buffer_size-read_byte,read_byte,0)) {
+ WRITE_LOG(UL_LOG_FATAL,"pread failed");
+ return -1;
+ }
+ m_offset+=read_byte;
+ m_pos-=read_byte;
+ }
+ return 0;
+ }
+ ~file_line_reader_t()
+ {
+ if(NULL!=m_current_buffer) {
+ delete [] m_current_buffer;
+ }
+
+ }
+ int get_line(char* out_buffer,uint32_t out_buffer_size) {
+ const int start=m_pos;
+ while(split!=m_current_buffer[m_pos]) {
+ m_pos++;
+ }
+ if(m_pos<m_buffer_size) {
+ int ret=m_pos-start;
+ int len=std::min((int)out_buffer_size-1,ret);
+ memcpy(out_buffer,m_current_buffer+start,len);
+ out_buffer[len]=0;
+ m_pos++;
+ return ret;
+ }
+ int ret=std::min(out_buffer_size-1,m_pos-start);
+ memcpy(out_buffer,m_current_buffer+start,ret);
+ m_pos=0;
+ if(m_rem_block==0) {
+ out_buffer[ret]=0;
+ return ret==0?-1:ret;
+ }
+ if(m_buffer_size!=pread(m_fileno,m_current_buffer,m_buffer_size,m_offset)) {
+ WRITE_LOG(UL_LOG_FATAL,"pread failed");
+ return -2;
+ }
+ m_rem_block--;
+ m_offset+=m_buffer_size;
+ m_current_buffer[m_buffer_size]=split;
+ while(split!=m_current_buffer[m_pos]) {
+ m_pos++;
+ }
+ if(m_pos==m_buffer_size) {
+ WRITE_LOG(UL_LOG_FATAL,"The inner buffer size [%u] is shorter than current line size.",m_buffer_size);
+ return -2;
+ }
+ int len=std::min(m_pos,out_buffer_size-1-ret);
+ memcpy(out_buffer+ret,m_current_buffer,len);
+ ret+=len;
+ out_buffer[ret]=0;
+ m_pos++;
+ return ret;
+ }
+};
View
310 Misc/struct_print.cpp
@@ -0,0 +1,310 @@
+/*
+ * zhuhongcheng@baidu.com
+*/
+/*
+这个程序从标准输入读入定义结构体的头文件,输出打印所有结构题的函数代码到标准输出文件。
+可能会因为格式问题assert失败
+*/
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+#include <string>
+#include <iostream>
+#include <cmath>
+#include <vector>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <map>
+#include <cstring>
+#include <complex>
+#include <numeric>
+#include <functional>
+#include <cassert>
+using namespace std;
+//#define NDEBUG
+#ifndef NDEBUG
+ #define debug(x) cerr<<#x<<"=\""<<x<<"\""<<" at line#"<<__LINE__<<endl;
+ #define hline() cerr<<"-----------------------------------------"<<endl;
+#else
+ #define debug(x)
+ #define hline()
+#endif
+
+typedef long long int llint;
+//************************************************
+template<typename A,typename B>
+ostream& operator<<(ostream& out,const pair<A,B>& pp)
+{
+ out<<"("<<pp.first<<","<<pp.second<<")";
+ return out;
+}
+template<typename T>
+ostream& operator<<(ostream& out,const vector<T>& vect)
+{
+ out<<"length = "<<vect.size()<<endl;
+ for(int i=0;i<vect.size();i++)out<<vect[i]<<" ";
+ out<<endl;
+ return out;
+}
+ostream& operator<<(ostream& out,const vector<string>& vect)
+{
+ out<<vect.size()<<" X "<<vect[0].length()<<endl;
+ for(int i=0;i<vect.size();i++)out<<vect[i]<<endl;
+ return out;
+}
+
+template<typename T>
+ostream& operator<<(ostream& out,const vector<vector<T> >& vect)
+{
+ out<<"row number="<<vect.size()<<endl;
+ for(int i=0;i<vect.size();i++)
+ {
+ out<<"row #"<<i<<":";
+ for(int j=0;j<vect[i].size();j++)
+ out<<" "<<vect[i][j];
+ out<<endl;
+ }
+ return out;
+}
+
+//Be carefull for cut into strings!!!!
+template<class T>
+vector<T> parse(const string& ss,const char* cut=" ")
+{
+ vector<T> re;
+ for(int j=0;j<ss.size();j++)
+ {
+ string s;
+ while(j<ss.size()&&NULL==strchr(cut,ss[j]))
+ s+=ss[j++];
+ if(!s.empty())
+ {
+ T tmp;
+ istringstream is(s);
+ is>>tmp;
+ re.push_back(tmp);
+ }
+ }
+ return re;
+}
+// Convert anything to string
+template<class T>
+string convert(T vv)
+{
+ ostringstream re;
+ re<<vv;
+ return re.str();
+}
+template<typename T>
+T convert(const string& ss)
+{
+ istringstream is(ss);
+ T tmp;
+ is>>tmp;
+ return tmp;
+}
+
+void split(const string& token,vector<string>& seq)
+{
+ string cur;
+ for(int i=0;i<token.length();i++)
+ {
+ if(token[i]=='{'||token[i]=='}'||token[i]==';'||token[i]==':'||token[i]=='['||token[i]==']')
+ {
+ if(cur.length())seq.push_back(cur);
+ cur="";
+ seq.push_back(token.substr(i,1));
+ }
+ else cur.push_back(token[i]);
+ }
+ if(cur.length())seq.push_back(cur);
+}
+struct item_t
+{
+ string type,name;
+ bool array;
+ int size;
+ item_t(string a,string b):type(a),name(b),array(false){};
+};
+
+map<string,vector<item_t> > root;
+map<string,string> macro;
+void parse_line(const string& line,vector<string>& seq)
+{
+ vector<string> tokens=parse<string>(line," \t\r\n");
+ if(tokens.empty())return;
+ if(tokens[0][0]=='/'||tokens[0][0]=='*')return;
+ if(tokens[0]=="#define")
+ {
+ string key=tokens[1];
+ string value="";
+ for(int i=2;i<tokens.size();i++)
+ value+=tokens[i];
+ cerr<<"macro "<<key<<" = "<<value<<endl;
+ macro[key]=value;
+ return;
+ }
+
+ vector<string> tmp_seq;
+ for(int i=0;i<tokens.size();i++)
+ split(tokens[i],tmp_seq);
+
+ for(int i=0;i<tmp_seq.size();i++)
+ {
+ if(tmp_seq[i][0]=='/')break;
+ if(tmp_seq[i]=="union")tmp_seq[i]="struct";
+ seq.push_back(tmp_seq[i]);
+ }
+}
+
+string valid_prefix(string& name)
+{
+ if(name.length()==0||(name[0]!='_'&&(!isalpha(name[0]))))return "";
+ for(int i=1;i<name.length();i++)
+ if((!isalpha(name[i]))&&(name[i]!='_')&&(!isdigit(name[i])))
+ return name.substr(0,i);
+ return name;
+}
+vector<item_t> parse_struct(vector<string>& token)
+{
+ assert(token.size()&&token.back()=="{");
+ vector<item_t> res;
+ token.pop_back();
+ while(token.size()&&token.back()!="}")
+ {
+ if(token.back()=="typdef"||token.back()=="struct")
+ {
+ while(token.size()&&token.back()!="{")token.pop_back();
+ assert(token.size()&&token.back()=="{");
+ vector<item_t> sub=parse_struct(token);
+ assert(token.size()&&token.back()!=";");
+ string sub_name=token.back();token.pop_back();
+ for(int i=0;i<sub.size();i++)
+ {
+ sub[i].name=sub_name+"."+sub[i].name;
+ res.push_back(sub[i]);
+ }
+ assert(token.size()&&token.back()==";");
+ token.pop_back();
+ continue;
+ }
+ string type=token.back();token.pop_back();
+ assert(token.size());
+ string name=token.back();token.pop_back();
+ debug(type);
+ assert(valid_prefix(type)==type);
+ item_t item(type,name);
+ assert(token.size());
+ if(token.back()=="[")
+ {
+ token.pop_back();
+ item.array=true;
+ assert(token.size());
+ string size;
+ if(macro.count(token.back()))size=macro[token.back()];
+ else size=token.back();
+ item.size=convert<int>(size);
+ token.pop_back();
+ assert(token.back()=="]");
+ token.pop_back();
+ }
+ debug(item.name);
+ assert(item.name==valid_prefix(item.name));
+ while(token.size()&&token.back()!=";")token.pop_back();
+ assert(token.size()&&token.back()==";");
+ token.pop_back();
+ res.push_back(item);
+ }
+ assert(token.size()&&token.back()=="}");
+ token.pop_back();
+ return res;
+}
+void print_item(const string& t,const item_t&);
+void print_struct(const string& prefix,string sname)
+{
+ //printf("\tout<<\"struct %s : \";\n",sname.c_str());
+ assert(root.count(sname));
+ const vector<item_t>& list=root[sname];
+ for(int i=0;i<list.size();i++)
+ {
+ string type=list[i].type;
+ string name=list[i].name;
+ if(root.count(type))
+ {
+ print_struct(prefix+name+".",type);
+ }
+ else
+ {
+ print_item(prefix,list[i]);
+ }
+ }
+ //printf("\tout<<endl;\n");
+}
+void print_item(const string& prefix,const item_t& item)
+{
+ string name=prefix+item.name;
+ if(item.array&&item.type!="char")
+ {
+ for(int i=0;i<item.size;i++)
+ {
+ //printf("\tout<<\"%s[%d]=\"<<%s[%d]<<\"\\t\";\n",name.c_str(),i,("obj."+name).c_str(),i);
+ printf("\tout<<%s[%d]<<\"\\t\";\n",("obj."+name).c_str(),i);
+ }
+ }
+ else
+ {
+ //printf("\tout<<\"%s=\"<<%s<<\"\\t\";\n",name.c_str(),("obj."+name).c_str());
+ printf("\tout<<%s<<\"\\t\";\n",("obj."+name).c_str());
+ }
+}
+int main()
+{
+ vector<string> token;
+ char line[10000];
+ while(gets(line))
+ {
+ parse_line(string(line),token);
+ }
+ cerr<<token<<endl;
+ reverse(token.begin(),token.end());
+ while(token.size())
+ {
+ //if(token.back()!="typdef"&&token.back()!="struct")
+ if(token.back()!="struct")
+ {
+ token.pop_back();
+ continue;
+ }
+ //while(token.size()&&token.back()!="struct")token.pop_back();
+ assert(token.size());
+ token.pop_back();
+ assert(token.size());
+ string sname=token.back();token.pop_back();
+ assert(token.size()&&token.back()=="{");
+ vector<item_t> list=parse_struct(token);
+ assert(token.size());
+ if(token.back()!=";")sname=token.back(),token.pop_back();
+ assert(token.size()&&token.back()==";");
+ root[sname]=list;
+ hline();
+ debug(sname);
+ for(int i=0;i<list.size();i++)
+ {
+ cerr<<list[i].type<<" "<<list[i].name;
+ if(list[i].array)cerr<<"["<<list[i].size<<"]";
+ cerr<<";"<<endl;
+ }
+ }
+ puts("//This is an auto-generated code!");
+ puts("#include <iostream>");
+ puts("using namespace std;");
+ for(map<string,vector<item_t> >::iterator itr=root.begin();itr!=root.end();itr++)
+ {
+ printf("void print_%s(const %s& obj,ostream& out)\n",itr->first.c_str(),itr->first.c_str());
+ puts("{");
+ print_struct("",itr->first);
+ puts("}");
+ }
+ return 0;
+}
View
402 Misc/toolFunctions.cpp
@@ -0,0 +1,402 @@
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>