Permalink
Browse files

Merge branch 'master' of github.com:cathywu/Sentiment-Analysis

  • Loading branch information...
2 parents e5814ec + bf01a7d commit 1c7933809f37c72a25a3dd13576d3cace0e3f3d1 @cathywu committed Feb 6, 2012
Showing with 3,186 additions and 0 deletions.
  1. +3 −0 egpaper_final/WARNINGS
  2. +33 −0 egpaper_final/egpaper_final.css
  3. +126 −0 egpaper_final/egpaper_final.html
  4. +50 −0 egpaper_final/images.bbl
  5. +210 −0 egpaper_final/images.pl
  6. +477 −0 egpaper_final/images.tex
  7. BIN egpaper_final/img1.png
  8. BIN egpaper_final/img10.png
  9. BIN egpaper_final/img11.png
  10. BIN egpaper_final/img12.png
  11. BIN egpaper_final/img13.png
  12. BIN egpaper_final/img14.png
  13. BIN egpaper_final/img15.png
  14. BIN egpaper_final/img16.png
  15. BIN egpaper_final/img17.png
  16. BIN egpaper_final/img18.png
  17. BIN egpaper_final/img19.png
  18. BIN egpaper_final/img2.png
  19. BIN egpaper_final/img20.png
  20. BIN egpaper_final/img21.png
  21. BIN egpaper_final/img22.png
  22. BIN egpaper_final/img23.png
  23. BIN egpaper_final/img24.png
  24. BIN egpaper_final/img25.png
  25. BIN egpaper_final/img26.png
  26. BIN egpaper_final/img27.png
  27. BIN egpaper_final/img28.png
  28. BIN egpaper_final/img29.png
  29. BIN egpaper_final/img3.png
  30. BIN egpaper_final/img4.png
  31. BIN egpaper_final/img5.png
  32. BIN egpaper_final/img6.png
  33. BIN egpaper_final/img7.png
  34. BIN egpaper_final/img8.png
  35. BIN egpaper_final/img9.png
  36. +126 −0 egpaper_final/index.html
  37. +42 −0 egpaper_final/internals.pl
  38. +49 −0 egpaper_final/labels.pl
  39. +70 −0 egpaper_final/node1.html
  40. +93 −0 egpaper_final/node10.html
  41. +71 −0 egpaper_final/node11.html
  42. +98 −0 egpaper_final/node12.html
  43. +95 −0 egpaper_final/node13.html
  44. +68 −0 egpaper_final/node14.html
  45. +71 −0 egpaper_final/node15.html
  46. +65 −0 egpaper_final/node16.html
  47. +65 −0 egpaper_final/node17.html
  48. +66 −0 egpaper_final/node18.html
  49. +175 −0 egpaper_final/node19.html
  50. +94 −0 egpaper_final/node2.html
  51. +119 −0 egpaper_final/node20.html
  52. +73 −0 egpaper_final/node21.html
  53. +93 −0 egpaper_final/node3.html
  54. +75 −0 egpaper_final/node4.html
  55. +269 −0 egpaper_final/node5.html
  56. +111 −0 egpaper_final/node6.html
  57. +142 −0 egpaper_final/node7.html
  58. +68 −0 egpaper_final/node8.html
  59. +89 −0 egpaper_final/node9.html
View
3 egpaper_final/WARNINGS
@@ -0,0 +1,3 @@
+No implementation found for style `cvpr'
+No implementation found for style `graphicx'
+No implementation found for style `url'
View
33 egpaper_final/egpaper_final.css
@@ -0,0 +1,33 @@
+/* Century Schoolbook font is very similar to Computer Modern Math: cmmi */
+.MATH { font-family: "Century Schoolbook", serif; }
+.MATH I { font-family: "Century Schoolbook", serif; font-style: italic }
+.BOLDMATH { font-family: "Century Schoolbook", serif; font-weight: bold }
+
+/* implement both fixed-size and relative sizes */
+SMALL.XTINY { font-size : xx-small }
+SMALL.TINY { font-size : x-small }
+SMALL.SCRIPTSIZE { font-size : smaller }
+SMALL.FOOTNOTESIZE { font-size : small }
+SMALL.SMALL { }
+BIG.LARGE { }
+BIG.XLARGE { font-size : large }
+BIG.XXLARGE { font-size : x-large }
+BIG.HUGE { font-size : larger }
+BIG.XHUGE { font-size : xx-large }
+
+/* heading styles */
+H1 { }
+H2 { }
+H3 { }
+H4 { }
+H5 { }
+
+/* mathematics styles */
+DIV.displaymath { } /* math displays */
+TD.eqno { } /* equation-number cells */
+
+
+/* document-specific styles come next */
+DIV.navigation { }
+SPAN.tt { }
+SPAN.arabic { }
View
126 egpaper_final/egpaper_final.html
@@ -0,0 +1,126 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Sentiment Classification using Machine Learning Techniques</TITLE>
+<META NAME="description" CONTENT="Sentiment Classification using Machine Learning Techniques">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node1.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html6"
+ HREF="node1.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up_g.png">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev_g.png">
+<BR>
+<B> Next:</B> <A NAME="tex2html7"
+ HREF="node1.html">Introduction</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<P>
+
+<P>
+
+<P>
+<H1 ALIGN=CENTER>Sentiment Classification using Machine Learning Techniques</H1>
+<P ALIGN=CENTER><STRONG>Pranjal Vachaspati</STRONG>
+<BR><I><TT><SMALL CLASS="SMALL">pranjal@mit.edu</SMALL></TT></I>
+</P><P ALIGN=CENTER><STRONG>Cathy Wu</STRONG>
+<BR><I><TT><SMALL CLASS="SMALL">cathywu@mit.edu</SMALL></TT> </I>
+</P>
+<HR>
+
+<P>
+
+<H3>Abstract:</H3>
+<DIV CLASS="ABSTRACT">
+We implement a series of classifiers (Naive Bayes, Maximum Entropy, and SVM) to distinguish positive and negative sentiment in critic and user reviews. We apply various processing methods, including negation tagging, part-of-speech tagging, and position tagging to achieve maximum accuracy. We test our classifiers on an external dataset to see how well they generalize. Finally, we use a majority-voting technique to combine classifiers and achieve accuracy of close to 90% in 3-fold cross-validation, far outperforming Pang's 2002 work [<A
+ HREF="node20.html#Pang">7</A>].
+</DIV>
+<P>
+
+<P>
+<BR><HR>
+<!--Table of Child-Links-->
+<A NAME="CHILD_LINKS"></A>
+
+<UL CLASS="ChildLinks">
+<LI><A NAME="tex2html8"
+ HREF="node1.html">Introduction</A>
+<LI><A NAME="tex2html9"
+ HREF="node2.html">Previous Work</A>
+<LI><A NAME="tex2html10"
+ HREF="node3.html">The User Review Domain</A>
+<LI><A NAME="tex2html11"
+ HREF="node4.html">Machine Learning Methods</A>
+<UL>
+<LI><A NAME="tex2html12"
+ HREF="node5.html">The Naive Bayes Classifier</A>
+<LI><A NAME="tex2html13"
+ HREF="node6.html">The Maximum Entropy Classifier</A>
+<LI><A NAME="tex2html14"
+ HREF="node7.html">The Support Vector Machine Classifier</A>
+</UL>
+<BR>
+<LI><A NAME="tex2html15"
+ HREF="node8.html">Experimental Setup</A>
+<LI><A NAME="tex2html16"
+ HREF="node9.html">Results</A>
+<UL>
+<LI><A NAME="tex2html17"
+ HREF="node10.html">Feature Counting Method</A>
+<LI><A NAME="tex2html18"
+ HREF="node11.html">Conditional Independence Assumption</A>
+<LI><A NAME="tex2html19"
+ HREF="node12.html">Number of Features</A>
+<LI><A NAME="tex2html20"
+ HREF="node13.html">Negation Tagging</A>
+<LI><A NAME="tex2html21"
+ HREF="node14.html">Position Tagging</A>
+<LI><A NAME="tex2html22"
+ HREF="node15.html">Part of Speech Tagging</A>
+<LI><A NAME="tex2html23"
+ HREF="node16.html">Adjectives</A>
+<LI><A NAME="tex2html24"
+ HREF="node17.html">Verbs</A>
+<LI><A NAME="tex2html25"
+ HREF="node18.html">Majority Voting</A>
+<LI><A NAME="tex2html26"
+ HREF="node19.html">Neighboring Domain Data</A>
+</UL>
+<BR>
+<LI><A NAME="tex2html27"
+ HREF="node20.html">Bibliography</A>
+<LI><A NAME="tex2html28"
+ HREF="node21.html">About this document ...</A>
+</UL>
+<!--End of Table of Child-Links-->
+<BR><HR>
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
50 egpaper_final/images.bbl
@@ -0,0 +1,50 @@
+\begin{thebibliography}{1}\itemsep=-1pt
+
+\bibitem{PyML}
+A.~Ben-Hur.
+\newblock Pyml - machine learning in python.
+\newblock http://pyml.sourceforge.net/, 2011.
+
+\bibitem{Manning}
+P.~R. Christopher D.~Manning and H.~Schütze.
+\newblock {\em Introduction to Information Retrieval}.
+\newblock Cambridge University Press, 2008.
+
+\bibitem{Jaynes}
+E.~Jaynes.
+\newblock Information theory and statistical mechanics.
+\newblock In {\em The Physical Review}, volume 106, 1957.
+
+\bibitem{Le}
+Z.~Le.
+\newblock Maximum entropy modeling toolkit for python and c++.
+\newblock http://homepages.inf.ed.ac.uk/lzhang10/maxent\_toolkit.html, 2011.
+
+\bibitem{Liu}
+D.~C. Liu and J.~Nocedal.
+\newblock On the limited memory bfgs method for large scale optimization.
+\newblock In {\em Mathematical Programming 45}, pages 503--528, 1989.
+
+\bibitem{qtag}
+O.~Mason.
+\newblock Qtag.
+\newblock http://phrasys.net/uob/om/software.
+
+\bibitem{Pang}
+B.~Pang, L.~Lee, and S.~Vaithyanathan.
+\newblock Thumbs up? {Sentiment} classification using machine learning
+ techniques.
+\newblock In {\em Proceedings of the 2002 Conference on Empirical Methods in
+ Natural Language Processing (EMNLP)}, pages 79--86, 2002.
+
+\bibitem{yelp}
+Yelp.
+\newblock Yelp academic dataset.
+\newblock http://www.yelp.com/academic\_dataset.
+
+\bibitem{Zhang}
+H.~Zhang.
+\newblock The optimality of naive bayes.
+\newblock In {\em American Association for Artificial Intelligence}, 2004.
+
+\end{thebibliography}
View
210 egpaper_final/images.pl
@@ -0,0 +1,210 @@
+# LaTeX2HTML 2008 (1.71)
+# Associate images original text with physical files.
+
+
+$key = q/displaystyleP(C|F_1,F_2,ldots,F_n)=frac{P(C)P(F_1,F_2,ldots,F_n|C)}{P(F_1,F_2,ldots,F_n)};MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="342" HEIGHT="55" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img1.png"
+ ALT="$\displaystyle P(C \vert F_1, F_2, \ldots, F_n)
+= \frac{P(C)P(F_1, F_2, \ldots, F_n \vert C)}{P(F_1, F_2, \ldots, F_n)} \\\\
+$">|;
+
+$key = q/displaystyle-lg(sum_jcountC_j,F_i))]);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="166" HEIGHT="53" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img16.png"
+ ALT="$\displaystyle - \lg (\sum_j count C_j, F_i))])$">|;
+
+$key = q/displaystyleP(C|F_1ldotsF_n)=P(C)[prod_{i=0}^nP(F_i|C)];MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="254" HEIGHT="62" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img8.png"
+ ALT="$\displaystyle P(C \vert F_1\ldots F_n) = P(C) [\prod_{i=0}^n P(F_i \vert C) ]$">|;
+
+$key = q/displaystyleforalli,zeta_ige0;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="67" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img24.png"
+ ALT="$\displaystyle \forall i, \zeta_i \ge 0$">|;
+
+$key = q/displaystyleforalli,y_i(vec{x}_i^Tcdotvec{B}+B0)ge1-zeta_i;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="195" HEIGHT="39" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img25.png"
+ ALT="$\displaystyle \forall i, y_i (\vec{x}_i^T \cdot \vec{B} + B0) \ge 1 - \zeta_i $">|;
+
+$key = q/displaystylef(vec{B},B_0)={vec{x}|vec{x}^Tcdotvec{B}+B_0=0};MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="227" HEIGHT="39" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img22.png"
+ ALT="$\displaystyle f(\vec{B}, B_0) = \{\vec{x} \vert \vec{x}^T \cdot \vec{B} + B_0 = 0\}$">|;
+
+$key = q/{figure*}tabular{{{|l}*{20}{|c}|r|}hlinemulticolumn{4}{|c|}{Testconfigurations}&ximum&Presence&0.43&0.41&0.38&0.34&0.30&0.56hlinetabular{{{{figure*};FSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="671" HEIGHT="270" BORDER="0"
+ SRC="|."$dir".q|img28.png"
+ ALT="\begin{figure*}
+\begin{tabular}{{\vert l}*{20}{\vert c}\vert r\vert}
+\hline
+\...
+...3 &amp; 0.41 &amp; 0.38 &amp; 0.34 &amp; 0.30 &amp; 0.56 \\\\
+\hline
+\end{tabular}
+\end{figure*}">|;
+
+$key = q/displaystyleP(F_i|C,F_jldotsF_k)=P(F_i|C);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="205" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img7.png"
+ ALT="$\displaystyle P(F_i \vert C, F_j\ldots F_k) = P(F_i \vert C)$">|;
+
+$key = q/displaystyleldots;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="22" HEIGHT="31" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img5.png"
+ ALT="$\displaystyle \ldots$">|;
+
+$key = q/C;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="16" HEIGHT="15" ALIGN="BOTTOM" BORDER="0"
+ SRC="|."$dir".q|img13.png"
+ ALT="$ C$">|;
+
+$key = q/displaystylefrac{1}{2}|vec{B}|^2+Csum_izeta_i;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="117" HEIGHT="50" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img23.png"
+ ALT="$\displaystyle \frac{1}{2} \vert\vec{B}\vert^2 + C\sum_i \zeta_i$">|;
+
+$key = q/displaystyle=P(C)P(F_1|C)P(F_2|C,F_1)P(F_3,F_4,ldots,F_n|C,F_1,F_2)dollar;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="386" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img4.png"
+ ALT="$\displaystyle = P(C)P(F_1 \vert C)P(F_2 \vert C, F_1)P(F_3, F_4, \ldots, F_n \vert C, F_1, F_2) \ $">|;
+
+$key = q/displaystyleC^*=operatorname*{arg,max}_CP(C|F_1...F_n);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="194" HEIGHT="38" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img14.png"
+ ALT="$\displaystyle C^* = \operatorname*{arg max}_C P(C \vert F_1...F_n)$">|;
+
+$key = q/count(C,F_j);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="91" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img11.png"
+ ALT="$ count(C, F_j)$">|;
+
+$key = q/F_j;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="21" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img12.png"
+ ALT="$ F_j$">|;
+
+$key = q/displaystyleP(C)P(F_1,F_2,ldots,F_n|C)dollar;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="179" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img2.png"
+ ALT="$\displaystyle P(C)P(F_1, F_2, \ldots, F_n \vert C)\ $">|;
+
+$key = q/displaystylefrac{(1+count(C,F_i))}{sum_icount(C_j,F_i))};MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="133" HEIGHT="55" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img10.png"
+ ALT="$\displaystyle \frac{(1+count(C, F_i))}{\sum_i count(C_j, F_i))}$">|;
+
+$key = q/displaystyleP(F_i|F_jldotsF_k)=F(F_i);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="169" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img6.png"
+ ALT="$\displaystyle P(F_i \vert F_j\ldots F_k) = F(F_i)$">|;
+
+$key = q/displaystyle=P(C)P(F_1|C)P(F_2,F_3,ldots,F_n|C,F_1)dollar;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="279" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img3.png"
+ ALT="$\displaystyle = P(C)P(F_1 \vert C)P( F_2, F_3, \ldots, F_n\vert C, F_1) \ $">|;
+
+$key = q/{figure*}tabular{{{|l}*{11}{|c}|r|}hlinemulticolumn{4}{|c|}{Testconfigurations}&&0.64&0.57&0.60&-&-&-&0.62&0.66&0.64hlinetabular{{{{figure*};MSF=1.6;FSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="798" HEIGHT="747" BORDER="0"
+ SRC="|."$dir".q|img26.png"
+ ALT="\begin{figure*}
+\begin{tabular}{{\vert l}*{11}{\vert c}\vert r\vert}
+\hline
+\...
+....60 &amp; - &amp; - &amp; - &amp; 0.62 &amp; 0.66 &amp; 0.64 \\\\
+\hline
+\end{tabular}
+\end{figure*}">|;
+
+$key = q/c;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="11" HEIGHT="17" ALIGN="BOTTOM" BORDER="0"
+ SRC="|."$dir".q|img21.png"
+ ALT="$ c$">|;
+
+$key = q/P(c|d);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="48" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img17.png"
+ ALT="$ P(c\vert d)$">|;
+
+$key = q/displaystyleP(c|d)=frac{1}{Z(d)}exp(sum_i(lambda_{i,c}F_{i,c}(d,c)));MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="264" HEIGHT="50" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img18.png"
+ ALT="$\displaystyle P(c\vert d) = \frac{1}{Z(d)} \exp(\sum_i(\lambda_{i,c} F_{i,c}(d,c)))$">|;
+
+$key = q/f_i;MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="17" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img20.png"
+ ALT="$ f_i$">|;
+
+$key = q/{figure*}tabular{{{|l}*{8}{|c}|r|}hlinemulticolumn{4}{|c|}{Testconfigurations}&mximum&Presence&0.44&0.43&0.41&0.37&0.32&0.56hlinetabular{{{{figure*};FSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="671" HEIGHT="270" BORDER="0"
+ SRC="|."$dir".q|img27.png"
+ ALT="\begin{figure*}
+\begin{tabular}{{\vert l}*{8}{\vert c}\vert r\vert}
+\hline
+\m...
+...4 &amp; 0.43 &amp; 0.41 &amp; 0.37 &amp; 0.32 &amp; 0.56 \\\\
+\hline
+\end{tabular}
+\end{figure*}">|;
+
+$key = q/displaystyleC^*=operatorname*{arg,max}_C(P(C)+sum_i[F_i(lgcount(C,F_i);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="313" HEIGHT="49" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img15.png"
+ ALT="$\displaystyle C^* = \operatorname*{arg max}_C (P(C) + \sum_i [F_i (\lg count (C, F_i)$">|;
+
+$key = q/P(Fi|C);MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="64" HEIGHT="32" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img9.png"
+ ALT="$ P(Fi \vert C)$">|;
+
+$key = q/{figure*}tabular{{{|l}*{20}{|c}|r|}hlinemulticolumn{4}{|c|}{Testconfigurations}&ximum&Presence&0.45&0.45&0.42&0.38&0.32&0.57hlinetabular{{{{figure*};FSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="671" HEIGHT="270" BORDER="0"
+ SRC="|."$dir".q|img29.png"
+ ALT="\begin{figure*}
+\begin{tabular}{{\vert l}*{20}{\vert c}\vert r\vert}
+\hline
+\...
+...5 &amp; 0.45 &amp; 0.42 &amp; 0.38 &amp; 0.32 &amp; 0.57 \\\\
+\hline
+\end{tabular}
+\end{figure*}">|;
+
+$key = q/lambda_{i,c};MSF=1.6;AAT/;
+$cached_env_img{$key} = q|<IMG
+ WIDTH="28" HEIGHT="30" ALIGN="MIDDLE" BORDER="0"
+ SRC="|."$dir".q|img19.png"
+ ALT="$ \lambda_{i,c}$">|;
+
+1;
+
View
477 egpaper_final/images.tex
@@ -0,0 +1,477 @@
+\batchmode
+\documentclass[10pt,twocolumn,letterpaper]{article}
+\RequirePackage{ifthen}
+
+
+
+
+\usepackage{cvpr}
+\usepackage{times}
+\usepackage{epsfig}
+\usepackage{graphicx}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{url}
+
+
+\cvprfinalcopy % *** Uncomment this line for the final submission
+
+
+% *** Enter the CVPR Paper ID here
+
+
+
+\ifcvprfinal\pagestyle{empty}\fi
+
+
+\usepackage[dvips]{color}
+
+
+\pagecolor[gray]{.7}
+
+\usepackage[]{inputenc}
+
+
+
+\makeatletter
+
+\makeatletter
+\count@=\the\catcode`\_ \catcode`\_=8
+\newenvironment{tex2html_wrap}{}{}%
+\catcode`\<=12\catcode`\_=\count@
+\newcommand{\providedcommand}[1]{\expandafter\providecommand\csname #1\endcsname}%
+\newcommand{\renewedcommand}[1]{\expandafter\providecommand\csname #1\endcsname{}%
+ \expandafter\renewcommand\csname #1\endcsname}%
+\newcommand{\newedenvironment}[1]{\newenvironment{#1}{}{}\renewenvironment{#1}}%
+\let\newedcommand\renewedcommand
+\let\renewedenvironment\newedenvironment
+\makeatother
+\let\mathon=$
+\let\mathoff=$
+\ifx\AtBeginDocument\undefined \newcommand{\AtBeginDocument}[1]{}\fi
+\newbox\sizebox
+\setlength{\hoffset}{0pt}\setlength{\voffset}{0pt}
+\addtolength{\textheight}{\footskip}\setlength{\footskip}{0pt}
+\addtolength{\textheight}{\topmargin}\setlength{\topmargin}{0pt}
+\addtolength{\textheight}{\headheight}\setlength{\headheight}{0pt}
+\addtolength{\textheight}{\headsep}\setlength{\headsep}{0pt}
+\setlength{\textwidth}{349pt}
+\newwrite\lthtmlwrite
+\makeatletter
+\let\realnormalsize=\normalsize
+\global\topskip=2sp
+\def\preveqno{}\let\real@float=\@float \let\realend@float=\end@float
+\def\@float{\let\@savefreelist\@freelist\real@float}
+\def\liih@math{\ifmmode$\else\bad@math\fi}
+\def\end@float{\realend@float\global\let\@freelist\@savefreelist}
+\let\real@dbflt=\@dbflt \let\end@dblfloat=\end@float
+\let\@largefloatcheck=\relax
+\let\if@boxedmulticols=\iftrue
+\def\@dbflt{\let\@savefreelist\@freelist\real@dbflt}
+\def\adjustnormalsize{\def\normalsize{\mathsurround=0pt \realnormalsize
+ \parindent=0pt\abovedisplayskip=0pt\belowdisplayskip=0pt}%
+ \def\phantompar{\csname par\endcsname}\normalsize}%
+\def\lthtmltypeout#1{{\let\protect\string \immediate\write\lthtmlwrite{#1}}}%
+\newcommand\lthtmlhboxmathA{\adjustnormalsize\setbox\sizebox=\hbox\bgroup\kern.05em }%
+\newcommand\lthtmlhboxmathB{\adjustnormalsize\setbox\sizebox=\hbox to\hsize\bgroup\hfill }%
+\newcommand\lthtmlvboxmathA{\adjustnormalsize\setbox\sizebox=\vbox\bgroup %
+ \let\ifinner=\iffalse \let\)\liih@math }%
+\newcommand\lthtmlboxmathZ{\@next\next\@currlist{}{\def\next{\voidb@x}}%
+ \expandafter\box\next\egroup}%
+\newcommand\lthtmlmathtype[1]{\gdef\lthtmlmathenv{#1}}%
+\newcommand\lthtmllogmath{\dimen0\ht\sizebox \advance\dimen0\dp\sizebox
+ \ifdim\dimen0>.95\vsize
+ \lthtmltypeout{%
+*** image for \lthtmlmathenv\space is too tall at \the\dimen0, reducing to .95 vsize ***}%
+ \ht\sizebox.95\vsize \dp\sizebox\z@ \fi
+ \lthtmltypeout{l2hSize %
+:\lthtmlmathenv:\the\ht\sizebox::\the\dp\sizebox::\the\wd\sizebox.\preveqno}}%
+\newcommand\lthtmlfigureA[1]{\let\@savefreelist\@freelist
+ \lthtmlmathtype{#1}\lthtmlvboxmathA}%
+\newcommand\lthtmlpictureA{\bgroup\catcode`\_=8 \lthtmlpictureB}%
+\newcommand\lthtmlpictureB[1]{\lthtmlmathtype{#1}\egroup
+ \let\@savefreelist\@freelist \lthtmlhboxmathB}%
+\newcommand\lthtmlpictureZ[1]{\hfill\lthtmlfigureZ}%
+\newcommand\lthtmlfigureZ{\lthtmlboxmathZ\lthtmllogmath\copy\sizebox
+ \global\let\@freelist\@savefreelist}%
+\newcommand\lthtmldisplayA{\bgroup\catcode`\_=8 \lthtmldisplayAi}%
+\newcommand\lthtmldisplayAi[1]{\lthtmlmathtype{#1}\egroup\lthtmlvboxmathA}%
+\newcommand\lthtmldisplayB[1]{\edef\preveqno{(\theequation)}%
+ \lthtmldisplayA{#1}\let\@eqnnum\relax}%
+\newcommand\lthtmldisplayZ{\lthtmlboxmathZ\lthtmllogmath\lthtmlsetmath}%
+\newcommand\lthtmlinlinemathA{\bgroup\catcode`\_=8 \lthtmlinlinemathB}
+\newcommand\lthtmlinlinemathB[1]{\lthtmlmathtype{#1}\egroup\lthtmlhboxmathA
+ \vrule height1.5ex width0pt }%
+\newcommand\lthtmlinlineA{\bgroup\catcode`\_=8 \lthtmlinlineB}%
+\newcommand\lthtmlinlineB[1]{\lthtmlmathtype{#1}\egroup\lthtmlhboxmathA}%
+\newcommand\lthtmlinlineZ{\egroup\expandafter\ifdim\dp\sizebox>0pt %
+ \expandafter\centerinlinemath\fi\lthtmllogmath\lthtmlsetinline}
+\newcommand\lthtmlinlinemathZ{\egroup\expandafter\ifdim\dp\sizebox>0pt %
+ \expandafter\centerinlinemath\fi\lthtmllogmath\lthtmlsetmath}
+\newcommand\lthtmlindisplaymathZ{\egroup %
+ \centerinlinemath\lthtmllogmath\lthtmlsetmath}
+\def\lthtmlsetinline{\hbox{\vrule width.1em \vtop{\vbox{%
+ \kern.1em\copy\sizebox}\ifdim\dp\sizebox>0pt\kern.1em\else\kern.3pt\fi
+ \ifdim\hsize>\wd\sizebox \hrule depth1pt\fi}}}
+\def\lthtmlsetmath{\hbox{\vrule width.1em\kern-.05em\vtop{\vbox{%
+ \kern.1em\kern0.8 pt\hbox{\hglue.17em\copy\sizebox\hglue0.8 pt}}\kern.3pt%
+ \ifdim\dp\sizebox>0pt\kern.1em\fi \kern0.8 pt%
+ \ifdim\hsize>\wd\sizebox \hrule depth1pt\fi}}}
+\def\centerinlinemath{%
+ \dimen1=\ifdim\ht\sizebox<\dp\sizebox \dp\sizebox\else\ht\sizebox\fi
+ \advance\dimen1by.5pt \vrule width0pt height\dimen1 depth\dimen1
+ \dp\sizebox=\dimen1\ht\sizebox=\dimen1\relax}
+
+\def\lthtmlcheckvsize{\ifdim\ht\sizebox<\vsize
+ \ifdim\wd\sizebox<\hsize\expandafter\hfill\fi \expandafter\vfill
+ \else\expandafter\vss\fi}%
+\providecommand{\selectlanguage}[1]{}%
+\makeatletter \tracingstats = 1
+\providecommand{\Beta}{\textrm{B}}
+\providecommand{\Mu}{\textrm{M}}
+\providecommand{\Kappa}{\textrm{K}}
+\providecommand{\Rho}{\textrm{R}}
+\providecommand{\Epsilon}{\textrm{E}}
+\providecommand{\Chi}{\textrm{X}}
+\providecommand{\Iota}{\textrm{J}}
+\providecommand{\omicron}{\textrm{o}}
+\providecommand{\Zeta}{\textrm{Z}}
+\providecommand{\Eta}{\textrm{H}}
+\providecommand{\Nu}{\textrm{N}}
+\providecommand{\Omicron}{\textrm{O}}
+\providecommand{\Tau}{\textrm{T}}
+\providecommand{\Alpha}{\textrm{A}}
+
+
+\begin{document}
+\pagestyle{empty}\thispagestyle{empty}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength hsize=\the\hsize}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength vsize=\the\vsize}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength hoffset=\the\hoffset}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength voffset=\the\voffset}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength topmargin=\the\topmargin}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength topskip=\the\topskip}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength headheight=\the\headheight}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength headsep=\the\headsep}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength parskip=\the\parskip}\lthtmltypeout{}%
+\lthtmltypeout{latex2htmlLength oddsidemargin=\the\oddsidemargin}\lthtmltypeout{}%
+\makeatletter
+\if@twoside\lthtmltypeout{latex2htmlLength evensidemargin=\the\evensidemargin}%
+\else\lthtmltypeout{latex2htmlLength evensidemargin=\the\oddsidemargin}\fi%
+\lthtmltypeout{}%
+\makeatother
+\setcounter{page}{1}
+\onecolumn
+
+% !!! IMAGES START HERE !!!
+
+\stepcounter{section}
+\stepcounter{section}
+\stepcounter{section}
+\stepcounter{section}
+\stepcounter{subsection}
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay545}%
+$\displaystyle P(C | F_1, F_2, \ldots, F_n)
+= \frac{P(C)P(F_1, F_2, \ldots, F_n | C)}{P(F_1, F_2, \ldots, F_n)} \\
+$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay547}%
+$\displaystyle P(C)P(F_1, F_2, \ldots, F_n | C)\\$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay549}%
+$\displaystyle = P(C)P(F_1 | C)P( F_2, F_3, \ldots, F_n| C, F_1) \\$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay551}%
+$\displaystyle = P(C)P(F_1 | C)P(F_2 | C, F_1)P(F_3, F_4, \ldots, F_n | C, F_1, F_2) \\$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay553}%
+$\displaystyle \ldots$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay555}%
+$\displaystyle P(F_i | F_j\ldots F_k) = F(F_i)$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay557}%
+$\displaystyle P(F_i | C, F_j\ldots F_k) = P(F_i | C)$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay559}%
+$\displaystyle P(C | F_1\ldots F_n) = P(C) [\prod_{i=0}^n P(F_i | C) ]$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline561}%
+$ P(Fi | C)$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay563}%
+$\displaystyle \frac{(1+count(C, F_i))}{\sum_i count(C_j, F_i))}$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline565}%
+$ count(C, F_j)$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline567}%
+$ F_j$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline569}%
+$ C$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay571}%
+$\displaystyle C^* = \operatorname*{arg\,max}_C P(C | F_1...F_n)$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay573}%
+$\displaystyle C^* = \operatorname*{arg\,max}_C (P(C) + \sum_i [F_i (\lg count (C, F_i)$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay575}%
+$\displaystyle - \lg (\sum_j count C_j, F_i))])$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+\stepcounter{subsection}
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline578}%
+$ P(c|d)$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay580}%
+$\displaystyle P(c|d) = \frac{1}{Z(d)} \exp(\sum_i(\lambda_{i,c} F_{i,c}(d,c)))$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline582}%
+$ \lambda_{i,c}$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline586}%
+$ f_i$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_inline588}%
+$ c$%
+\lthtmlinlinemathZ
+\lthtmlcheckvsize\clearpage}
+
+\stepcounter{subsection}
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay591}%
+$\displaystyle f(\vec{B}, B_0) = \{\vec{x} | \vec{x}^T \cdot \vec{B} + B_0 = 0\}$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay593}%
+$\displaystyle \frac{1}{2} |\vec{B}|^2 + C\sum_i \zeta_i$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay595}%
+$\displaystyle \forall i, \zeta_i \ge 0$%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlinlinemathA{tex2html_wrap_indisplay597}%
+$\displaystyle \forall i, y_i (\vec{x}_i^T \cdot \vec{B} + B0) \ge 1 - \zeta_i $%
+\lthtmlindisplaymathZ
+\lthtmlcheckvsize\clearpage}
+
+\stepcounter{section}
+\stepcounter{section}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+\stepcounter{subsection}
+{\newpage\clearpage
+\lthtmlfigureA{figurestar71}%
+\begin{figure*}
+\begin{tabular}{{|l}*{11}{|c}|r|}
+\hline
+\multicolumn{4}{|c|}{Test configurations} & \multicolumn{3}{|c|}{Naive Bayes} & \multicolumn{3}{|c|}{MaxEnt} & \multicolumn{3}{|c|}{SVM}\\
+\hline
+Domain & Features & \# of features & Frequency & + & - & $\pm$& + & - & $\pm$& + & - & $\pm$\\
+\hline
+No-negation & Unigrams & 16165 & Frequency & 0.94 & 0.62 & 0.78 & - & - & - & 0.82 & 0.82 & 0.82 \\
+No-negation & Unigrams & 16165 & Presence & 0.87 & 0.72 & 0.82 & 0.85 & 0.87 & 0.86 & 0.85 & 0.84 & 0.84 \\
+No-negation & Bigrams & 16165 & Frequency & 0.92 & 0.64 & 0.78 & - & - & - & 0.77 & 0.81 & 0.79 \\
+No-negation & Bigrams & 16165 & Presence & 0.89 & 0.73 & 0.81 & 0.79 & 0.82 & 0.81 & 0.8 & 0.81 & 0.80 \\
+adjectives & Unigrams & 16165 & Frequency & 0.95 & 0.52 & 0.73 & - & - & - & 0.75 & 0.77 & 0.76 \\
+default & Bigrams & 2633 & Frequency & 0.91 & 0.46 & 0.69 & - & - & - & 0.74 & 0.75 & 0.75 \\
+default & Bigrams & 16165 & Frequency & 0.92 & 0.64 & 0.78 & - & - & - & 0.78 & 0.79 & 0.78 \\
+default & Unigrams & 2633 & Frequency & 0.96 & 0.5 & 0.74 & - & - & - & 0.81 & 0.79 & 0.80 \\
+default & Unigrams & 16165 & Frequency & 0.93 & 0.59 & 0.76 & - & - & - & 0.82 & 0.81 & 0.82 \\
+default & Unigrams & maximum & Frequency & 0.95 & 0.49 & 0.72 & - & - & - & 0.82 & 0.81 & 0.82 \\
+partofspeech & Bigrams & 16165 & Frequency & 0.96 & 0.47 & 0.71 & - & - & - & 0.82 & 0.82 & 0.82 \\
+partofspeech & Unigrams & 16165 & Frequency & 0.96 & 0.54 & 0.75 & - & - & - & 0.82 & 0.81 & 0.81 \\
+position & Bigrams & 16165 & Frequency & 0.96 & 0.49 & 0.73 & - & - & - & 0.77 & 0.78 & 0.78 \\
+position & Unigrams & 16165 & Frequency & 0.93 & 0.58 & 0.76 & - & - & - & 0.81 & 0.82 & 0.82 \\
+verbs & Unigrams & maximum & Frequency & 0.8 & 0.55 & 0.67 & - & - & - & 0.61 & 0.65 & 0.63 \\
+adjectives & Unigrams & 16165 & Presence & 0.93 & 0.59 & 0.76 & 0.79 & 0.77 & 0.78 & 0.75 & 0.73 & 0.74 \\
+default & Bigrams & 2633 & Presence & 0.86 & 0.64 & 0.75 & 0.75 & 0.75 & 0.75 & 0.73 & 0.75 & 0.74 \\
+default & Bigrams & 16165 & Presence & 0.89 & 0.74 & 0.81 & 0.81 & 0.82 & 0.81 & 0.78 & 0.79 & 0.78 \\
+default & Unigrams & 2633 & Presence & 0.84 & 0.8 & 0.82 & 0.84 & 0.82 & 0.83 & 0.78 & 0.82 & 0.8 \\
+default & Unigrams & 16165 & Presence & 0.87 & 0.77 & 0.82 & 0.84 & 0.85 & 0.85 & 0.83 & 0.82 & 0.83 \\
+default & Unigrams & maximum & Presence & 0.91 & 0.7 & 0.81 & 0.84 & 0.86 & 0.85 & 0.83 & 0.85 & 0.84 \\
+partofspeech & Bigrams & 16165 & Presence & 0.89 & 0.73 & 0.81 & 0.84 & 0.84 & 0.84 & 0.79 & 0.82 & 0.8 \\
+partofspeech & Unigrams & 16165 & Presence & 0.86 & 0.76 & 0.81 & 0.85 & 0.85 & 0.85 & 0.84 & 0.83 & 0.84 \\
+position & Bigrams & 16165 & Presence & 0.87 & 0.66 & 0.76 & 0.82 & 0.83 & 0.82 & 0.73 & 0.76 & 0.74 \\
+position & Unigrams & 16165 & Presence & 0.86 & 0.78 & 0.82 & 0.84 & 0.85 & 0.85 & 0.80 & 0.80 & 0.80 \\
+verbs & Unigrams & maximum & Presence & 0.80 & 0.54 & 0.67 & 0.65 & 0.65 & 0.65 & 0.64 & 0.63 & 0.635 \\
+adjectives & Unigrams & 16165 & TF-IDF & 0.82 & 0.60 & 0.71 & - & - & - & 0.79 & 0.76 & 0.77 \\
+default & Bigrams & 2633 & TF-IDF & 0.92 & 0.46 & 0.69 & - & - & - & 0.76 & 0.71 & 0.74 \\
+default & Bigrams & 16165 & TF-IDF & 0.90 & 0.68 & 0.79 & - & - & - & 0.83 & 0.74 & 0.79 \\
+default & Unigrams & 2633 & TF-IDF & 0.85 & 0.52 & 0.74 & - & - & - & 0.81 & 0.79 & 0.80 \\
+default & Unigrams & 16165 & TF-IDF & 0.88 & 0.68 & 0.78 & - & - & - & 0.83 & 0.77 & 0.80 \\
+default & Unigrams & maximum & TF-IDF & 0.86 & 0.65 & 0.76 & - & - & - & 0.83 & 0.78 & 0.81 \\
+partofspeech & Bigrams & 16165 & TF-IDF & 0.89 & 0.67 & 0.78 & - & - & - & 0.79 & 0.74 & 0.76 \\
+partofspeech & Unigrams & 16165 & TF-IDF & 0.89 & 0.63 & 0.76 & - & - & - & 0.81 & 0.78 & 0.79 \\
+position & Bigrams & 16165 & TF-IDF & 0.89 & 0.59 & 0.74 & - & - & - & 0.79 & 0.69 & 0.74 \\
+position & Unigrams & 16165 & TF-IDF & 0.91 & 0.61 & 0.76 & - & - & - & 0.81 & 0.71 & 0.76 \\
+verbs & Unigrams & maximum & TF-IDF & 0.64 & 0.57 & 0.60 & - & - & - & 0.62 & 0.66 & 0.64 \\
+\hline
+\end{tabular}
+
+\end{figure*}%
+\lthtmlfigureZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlfigureA{figurestar91}%
+\begin{figure*}
+\begin{tabular}{{|l}*{8}{|c}|r|}
+\hline
+\multicolumn{4}{|c|}{Test configurations} & \multicolumn{6}{|c|}{Naive Bayes} \\
+\hline
+Domain & Features & \# of features & Frequency & ***** & **** & *** & ** & * & score \\
+\hline
+default & Unigrams & 16165 & Frequency & 0.72 & 0.68 & 0.53 & 0.34 & 0.24 & 0.74 \\
+default & Unigrams & 16165 & Presence & 0.49 & 0.41 & 0.24 & 0.14 & 0.08 & 0.71 \\
+default & Bigrams & 16165 & Presence & 0.50 & 0.42 & 0.26 & 0.13 & 0.10 & 0.70 \\
+position & Unigrams & 16165 & Presence & 0.35 & 0.29 & 0.14 & 0.08 & 0.04 & 0.65 \\
+partofspeech & Unigrams & 16165 & Presence & 0.45 & 0.37 & 0.20 & 0.11 & 0.06 & 0.69 \\
+adjectives & Unigrams & 16165 & Presence & 0.76 & 0.73 & 0.61 & 0.45 & 0.36 & 0.70 \\
+verbs & Unigrams & 16165 & Presence & 0.44 & 0.43 & 0.41 & 0.37 & 0.32 & 0.56 \\
+default & Unigrams & maximum & Presence & 0.59 & 0.55 & 0.36 & 0.23 & 0.15 & 0.72 \\
+position & Unigrams & maximum & Presence & 0.54 & 0.50 & 0.33 & 0.22 & 0.14 & 0.70 \\
+partofspeech & Unigrams & maximum & Presence & 0.56 & 0.52 & 0.35 & 0.22 & 0.14 & 0.71 \\
+adjectives & Unigrams & maximum & Presence & 0.76 & 0.73 & 0.61 & 0.45 & 0.36 & 0.70 \\
+verbs & Unigrams & maximum & Presence & 0.44 & 0.43 & 0.41 & 0.37 & 0.32 & 0.56 \\
+\hline
+\end{tabular}
+
+\end{figure*}%
+\lthtmlfigureZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlfigureA{figurestar105}%
+\begin{figure*}
+\begin{tabular}{{|l}*{20}{|c}|r|}
+\hline
+\multicolumn{4}{|c|}{Test configurations} & \multicolumn{6}{|c|}{MaxEnt}\\
+\hline
+Domain & Features & \# of features & Frequency & ***** & **** & *** & ** & * & score \\
+\hline
+default & Unigrams & 16165 & Frequency & - & - & - & - & - & - \\
+default & Unigrams & 16165 & Presence & 0.61 & 0.57 & 0.39 & 0.23 & 0.11 & 0.75 \\
+default & Bigrams & 16165 & Presence & 0.63 & 0.59 & 0.45 & 0.28 & 0.26 & 0.68 \\
+position & Unigrams & 16165 & Presence & 0.46 & 0.43 & 0.28 & 0.17 & 0.11 & 0.67 \\
+partofspeech & Unigrams & 16165 & Presence & 0.55 & 0.50 & 0.32 & 0.20 & 0.10 & 0.72 \\
+adjectives & Unigrams & 16165 & Presence & 0.75 & 0.72 & 0.62 & 0.45 & 0.37 & 0.69 \\
+verbs & Unigrams & 16165 & Presence & 0.43 & 0.41 & 0.38 & 0.34 & 0.30 & 0.56 \\
+default & Unigrams & maximum & Presence & 0.59 & 0.54 & 0.36 & 0.20 & 0.11 & 0.74 \\
+position & Unigrams & maximum & Presence & 0.44 & 0.40 & 0.26 & 0.15 & 0.09 & 0.68 \\
+partofspeech & Unigrams & maximum & Presence & 0.52 & 0.47 & 0.30 & 0.18 & 0.09 & 0.72 \\
+adjectives & Unigrams & maximum & Presence & 0.75 & 0.72 & 0.62 & 0.45 & 0.37 & 0.69 \\
+verbs & Unigrams & maximum & Presence & 0.43 & 0.41 & 0.38 & 0.34 & 0.30 & 0.56 \\
+\hline
+\end{tabular}
+
+\end{figure*}%
+\lthtmlfigureZ
+\lthtmlcheckvsize\clearpage}
+
+{\newpage\clearpage
+\lthtmlfigureA{figurestar119}%
+\begin{figure*}
+\begin{tabular}{{|l}*{20}{|c}|r|}
+\hline
+\multicolumn{4}{|c|}{Test configurations} & \multicolumn{6}{|c|}{SVM}\\
+\hline
+Domain & Features & \# of features & Frequency & ***** & **** & *** & ** & * & score \\
+\hline
+default & Unigrams & 16165 & Frequency & 0.78 & 0.76 & 0.62 & 0.42 & 0.30 & 0.74 \\
+default & Unigrams & 16165 & Presence & 0.58 & 0.54 & 0.38 & 0.25 & 0.14 & 0.72 \\
+default & Bigrams & 16165 & Presence & 0.62 & 0.58 & 0.48 & 0.30 & 0.29 & 0.67 \\
+position & Unigrams & 16165 & Presence & 0.42 & 0.39 & 0.27 & 0.39 & 0.42 & 0.50 \\
+partofspeech & Unigrams & 16165 & Presence & 0.52 & 0.48 & 0.31 & 0.21 & 0.01 & 0.75 \\
+adjectives & Unigrams & 16165 & Presence & 0.71 & 0.71 & 0.61 & 0.46 & 0.37 & 0.67 \\
+verbs & Unigrams & 16165 & Presence & 0.45 & 0.45 & 0.42 & 0.38 & 0.32 & 0.57 \\
+default & Unigrams & maximum & Presence & - & - & - & - & - & - \\
+position & Unigrams & maximum & Presence & - & - & - & - & - & - \\
+partofspeech & Unigrams & maximum & Presence & - & - & - & - & - & - \\
+adjectives & Unigrams & maximum & Presence & 0.71 & 0.71 & 0.61 & 0.46 & 0.37 & 0.67 \\
+verbs & Unigrams & maximum & Presence & 0.45 & 0.45 & 0.42 & 0.38 & 0.32 & 0.57 \\
+\hline
+\end{tabular}
+
+\end{figure*}%
+\lthtmlfigureZ
+\lthtmlcheckvsize\clearpage}
+
+
+\end{document}
View
BIN egpaper_final/img1.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img10.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img11.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img12.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img13.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img14.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img15.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img16.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img17.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img18.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img19.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img2.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img20.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img21.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img22.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img23.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img24.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img25.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img26.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img27.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img28.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img29.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img3.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img4.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img5.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View
BIN egpaper_final/img6.png
Diff not rendered.
View
BIN egpaper_final/img7.png
Diff not rendered.
View
BIN egpaper_final/img8.png
Diff not rendered.
View
BIN egpaper_final/img9.png
Diff not rendered.
View
126 egpaper_final/index.html
@@ -0,0 +1,126 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Sentiment Classification using Machine Learning Techniques</TITLE>
+<META NAME="description" CONTENT="Sentiment Classification using Machine Learning Techniques">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node1.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html6"
+ HREF="node1.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up_g.png">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev_g.png">
+<BR>
+<B> Next:</B> <A NAME="tex2html7"
+ HREF="node1.html">Introduction</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<P>
+
+<P>
+
+<P>
+<H1 ALIGN=CENTER>Sentiment Classification using Machine Learning Techniques</H1>
+<P ALIGN=CENTER><STRONG>Pranjal Vachaspati</STRONG>
+<BR><I><TT><SMALL CLASS="SMALL">pranjal@mit.edu</SMALL></TT></I>
+</P><P ALIGN=CENTER><STRONG>Cathy Wu</STRONG>
+<BR><I><TT><SMALL CLASS="SMALL">cathywu@mit.edu</SMALL></TT> </I>
+</P>
+<HR>
+
+<P>
+
+<H3>Abstract:</H3>
+<DIV CLASS="ABSTRACT">
+We implement a series of classifiers (Naive Bayes, Maximum Entropy, and SVM) to distinguish positive and negative sentiment in critic and user reviews. We apply various processing methods, including negation tagging, part-of-speech tagging, and position tagging to achieve maximum accuracy. We test our classifiers on an external dataset to see how well they generalize. Finally, we use a majority-voting technique to combine classifiers and achieve accuracy of close to 90% in 3-fold cross-validation, far outperforming Pang's 2002 work [<A
+ HREF="node20.html#Pang">7</A>].
+</DIV>
+<P>
+
+<P>
+<BR><HR>
+<!--Table of Child-Links-->
+<A NAME="CHILD_LINKS"></A>
+
+<UL CLASS="ChildLinks">
+<LI><A NAME="tex2html8"
+ HREF="node1.html">Introduction</A>
+<LI><A NAME="tex2html9"
+ HREF="node2.html">Previous Work</A>
+<LI><A NAME="tex2html10"
+ HREF="node3.html">The User Review Domain</A>
+<LI><A NAME="tex2html11"
+ HREF="node4.html">Machine Learning Methods</A>
+<UL>
+<LI><A NAME="tex2html12"
+ HREF="node5.html">The Naive Bayes Classifier</A>
+<LI><A NAME="tex2html13"
+ HREF="node6.html">The Maximum Entropy Classifier</A>
+<LI><A NAME="tex2html14"
+ HREF="node7.html">The Support Vector Machine Classifier</A>
+</UL>
+<BR>
+<LI><A NAME="tex2html15"
+ HREF="node8.html">Experimental Setup</A>
+<LI><A NAME="tex2html16"
+ HREF="node9.html">Results</A>
+<UL>
+<LI><A NAME="tex2html17"
+ HREF="node10.html">Feature Counting Method</A>
+<LI><A NAME="tex2html18"
+ HREF="node11.html">Conditional Independence Assumption</A>
+<LI><A NAME="tex2html19"
+ HREF="node12.html">Number of Features</A>
+<LI><A NAME="tex2html20"
+ HREF="node13.html">Negation Tagging</A>
+<LI><A NAME="tex2html21"
+ HREF="node14.html">Position Tagging</A>
+<LI><A NAME="tex2html22"
+ HREF="node15.html">Part of Speech Tagging</A>
+<LI><A NAME="tex2html23"
+ HREF="node16.html">Adjectives</A>
+<LI><A NAME="tex2html24"
+ HREF="node17.html">Verbs</A>
+<LI><A NAME="tex2html25"
+ HREF="node18.html">Majority Voting</A>
+<LI><A NAME="tex2html26"
+ HREF="node19.html">Neighboring Domain Data</A>
+</UL>
+<BR>
+<LI><A NAME="tex2html27"
+ HREF="node20.html">Bibliography</A>
+<LI><A NAME="tex2html28"
+ HREF="node21.html">About this document ...</A>
+</UL>
+<!--End of Table of Child-Links-->
+<BR><HR>
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
42 egpaper_final/internals.pl
@@ -0,0 +1,42 @@
+# LaTeX2HTML 2008 (1.71)
+# Associate internals original text with physical files.
+
+
+$key = q/cite_PyML/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Le/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Liu/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Pang/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Jaynes/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Zhang/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Manning/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_qtag/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_yelp/;
+$ref_files{$key} = "$dir".q|node20.html|;
+$noresave{$key} = "$nosave";
+
+1;
+
View
49 egpaper_final/labels.pl
@@ -0,0 +1,49 @@
+# LaTeX2HTML 2008 (1.71)
+# Associate labels original text with physical files.
+
+
+$key = q/cite_PyML/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Le/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Liu/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Pang/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Jaynes/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Zhang/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_Manning/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_qtag/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+$key = q/cite_yelp/;
+$external_labels{$key} = "$URL/" . q|node20.html|;
+$noresave{$key} = "$nosave";
+
+1;
+
+
+# LaTeX2HTML 2008 (1.71)
+# labels from external_latex_labels array.
+
+
+1;
+
View
70 egpaper_final/node1.html
@@ -0,0 +1,70 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Introduction</TITLE>
+<META NAME="description" CONTENT="Introduction">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node2.html">
+<LINK REL="previous" HREF="egpaper_final.html">
+<LINK REL="up" HREF="egpaper_final.html">
+<LINK REL="next" HREF="node2.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html37"
+ HREF="node2.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html35"
+ HREF="egpaper_final.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html29"
+ HREF="egpaper_final.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html38"
+ HREF="node2.html">Previous Work</A>
+<B> Up:</B> <A NAME="tex2html36"
+ HREF="egpaper_final.html">Sentiment Classification using Machine</A>
+<B> Previous:</B> <A NAME="tex2html30"
+ HREF="egpaper_final.html">Sentiment Classification using Machine</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H1><A NAME="SECTION00010000000000000000">
+Introduction</A>
+</H1>
+
+<P>
+Sentiment analysis, broadly speaking, is the set of techniques that allows detection of emotional content in text. This has a variety of applications: it is commonly used by trading algorithms to process news articles, as well as by corporations to better respond to consumer service needs. Similar techniques can also be applied to other text analysis problems, like spam filtering.
+
+<P>
+The source code described in this paper is available at https://github.com/cathywu/Sentiment-Analysis.
+
+<P>
+<BR><HR>
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
93 egpaper_final/node10.html
@@ -0,0 +1,93 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Feature Counting Method</TITLE>
+<META NAME="description" CONTENT="Feature Counting Method">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node11.html">
+<LINK REL="previous" HREF="node9.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node11.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html138"
+ HREF="node11.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html136"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html130"
+ HREF="node9.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html139"
+ HREF="node11.html">Conditional Independence Assumption</A>
+<B> Up:</B> <A NAME="tex2html137"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html131"
+ HREF="node9.html">Results</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H2><A NAME="SECTION00061000000000000000">
+Feature Counting Method</A>
+</H2>
+There are several ways to construct a probability model for a set of document n-grams. The most obvious is to use feature frequency. The value of a feature in a given document is simply the number of times it appears in that document. Presence, on the other hand, attributes a value of 1 if a feature exists in a document and 0 otherwise.
+
+<P>
+As a whole (across all other parameters), training on presence rather than frequency performed on average 5.5% better for Naive Bayes, ranging from 0% to 10% improvement, with no particular outliers in other test configurations, from 73.1% accuracy with frequency to 78.5% accuracy with presence. There was no significant difference for SVMs and applying TF-IDF did not provide any improvement from using frequency for either. Both of these comparisons do not apply to Maximum Entropy.
+
+<P>
+Interestingly, for Naive Bayes, the positive and negative tests performed very differently between presence and frequency tests. Excluding verb tests, which did not exhibit this disparity, positive tests averaged 6.5% worse (up to 12% worse in the case) on presence tests while negative tests averaged 18.9% better (up to 30% better). There was an average aggregate difference of 25.4% between positive and negative results. By comparison, SVMs exhibited an average aggregate difference of 0.7%. These results provide evidence that training on presence rather than frequency yields models with less bias.
+
+<P>
+
+<DIV CLASS="navigation"><HR>
+<!--Navigation Panel-->
+<A NAME="tex2html138"
+ HREF="node11.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html136"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html130"
+ HREF="node9.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html139"
+ HREF="node11.html">Conditional Independence Assumption</A>
+<B> Up:</B> <A NAME="tex2html137"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html131"
+ HREF="node9.html">Results</A></DIV>
+<!--End of Navigation Panel-->
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
71 egpaper_final/node11.html
@@ -0,0 +1,71 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Conditional Independence Assumption</TITLE>
+<META NAME="description" CONTENT="Conditional Independence Assumption">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node12.html">
+<LINK REL="previous" HREF="node10.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node12.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html148"
+ HREF="node12.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html146"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html140"
+ HREF="node10.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html149"
+ HREF="node12.html">Number of Features</A>
+<B> Up:</B> <A NAME="tex2html147"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html141"
+ HREF="node10.html">Feature Counting Method</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H2><A NAME="SECTION00062000000000000000">
+Conditional Independence Assumption</A>
+</H2>
+
+<P>
+The Bayes classifier depends on a conditional independence assumption, meaning that the model it predicts assumes that the probability of a given word is independent of the other words. Clearly, this assumption does not hold. Nevertheless, the Bayes classifier functions well, in part because the positive and negative correlations between features tend to cancel each other out [<A
+ HREF="node20.html#Zhang">9</A>].
+
+<P>
+We found a huge difference between results of Naive Bayes and Maximum Entropy for positive testing accuracy and negative testing accuracy. Maximum Entropy, which makes no unfounded assumptions about the data, gave very similar results for positive tests and negative tests with a 0.2% difference on average. On the other hand, positive and negative results from Naive Bayes, which assumes conditional independence, varies by 27.5% on average, with the worst cases performing on test configurations using frequency, averaging 40% difference. These disparities suggest evidence that the movie dataset does not satisfy the conditional independence assumption.
+
+<P>
+<BR><HR>
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
98 egpaper_final/node12.html
@@ -0,0 +1,98 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Number of Features</TITLE>
+<META NAME="description" CONTENT="Number of Features">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node13.html">
+<LINK REL="previous" HREF="node11.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node13.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html158"
+ HREF="node13.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html156"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html150"
+ HREF="node11.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html159"
+ HREF="node13.html">Negation Tagging</A>
+<B> Up:</B> <A NAME="tex2html157"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html151"
+ HREF="node11.html">Conditional Independence Assumption</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H2><A NAME="SECTION00063000000000000000">
+Number of Features</A>
+</H2>
+
+<P>
+One key decision in a bag-of-words feature set is which words to include. Using more words provides more information, but harms the performance of the classifiers, and words that appear only infrequently in the training data may not present accurate information due to the law of small numbers. We examine results with the entire training data, as well as with only the top 16165 and 2633 unigrams and bigrams.
+
+<P>
+Using the most frequent unigrams is an extremely simple method of feature selection, and in this case, not a particularly robust one, since feature selection should look for words that identify a given class. Choosing frequent words does not discriminate between the two classes and will select common words like ``the'' and ``it'', which likely are weak sentiment indicators. On the other hand, uncommon words that only appear in a handful or less of reviews will not contribute much to sentiment indication. Pang's motivation for limiting the number of features was for improve testing performance, but our classifiers and processors were fast enough that this was not particularly noticeable.
+
+<P>
+On average, limiting the number of features from 16165 to 2633, as in the original Pang paper, caused accuracy to drop by 5.2%, 4.0%, and 2.8% for Naive Bayes, Maximum Entropy, and SVM, respectively. These results indicate that valuable sentiment information was lost in the restriction of features.
+
+<P>
+However, when restricting from all features down to 16165, the results were a wash. Naive Bayes did vaguely worse, Maximum Entropy remained unchanged, and SVMs did vaguely better. These results suggest that uncommon features do not carry much sentiment information. Additionally, this validated Pang's use of limited features, as they did not significantly impact the results but satisfied their performance constraints.
+
+<P>
+
+<DIV CLASS="navigation"><HR>
+<!--Navigation Panel-->
+<A NAME="tex2html158"
+ HREF="node13.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html156"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html150"
+ HREF="node11.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html159"
+ HREF="node13.html">Negation Tagging</A>
+<B> Up:</B> <A NAME="tex2html157"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html151"
+ HREF="node11.html">Conditional Independence Assumption</A></DIV>
+<!--End of Navigation Panel-->
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
95 egpaper_final/node13.html
@@ -0,0 +1,95 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Negation Tagging</TITLE>
+<META NAME="description" CONTENT="Negation Tagging">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node14.html">
+<LINK REL="previous" HREF="node12.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node14.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html168"
+ HREF="node14.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html166"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html160"
+ HREF="node12.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html169"
+ HREF="node14.html">Position Tagging</A>
+<B> Up:</B> <A NAME="tex2html167"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html161"
+ HREF="node12.html">Number of Features</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H2><A NAME="SECTION00064000000000000000">
+Negation Tagging</A>
+</H2>
+
+<P>
+In an effort to preserve the potential value of negation information while using dead-simple features, we tagged words between those expressing negation and the next punctuation mark with a postfix ``_NOT.'' This distinguishes sentences like ``That movie was very good'' and ``That movie was not very good.'' Diverging from Pang, we also added negation tags to bigrams.
+
+<P>
+Negation tagging did not appear to have a significant effect on the data. For all the classifiers, the results from negation tagged data were almost the same as the results from the raw data. Nevertheless, we used negation tagging for the remainder of the tests, as it did not seem to hurt performance or accuracy.
+
+<P>
+The ineffectiveness of negation tagging probably comes from a few sources. First, it increases the number of uncommon features, which, as discussed previously, harms effectiveness and cancels out the increase in semantic awareness. Second, the presence of a “not” does not always indicate negation. Rather, it is often used idiomatically, as in the example fragment ``with his distinctive, more often than not ingenious dialogue''. Finally, the method of tagging all words up to the next punctuation mark is suspect. Only a few words after the not are actually semantically negated, and these often occur after a comma or other punctuation mark.
+
+<P>
+
+<DIV CLASS="navigation"><HR>
+<!--Navigation Panel-->
+<A NAME="tex2html168"
+ HREF="node14.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html166"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html160"
+ HREF="node12.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html169"
+ HREF="node14.html">Position Tagging</A>
+<B> Up:</B> <A NAME="tex2html167"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html161"
+ HREF="node12.html">Number of Features</A></DIV>
+<!--End of Navigation Panel-->
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
68 egpaper_final/node14.html
@@ -0,0 +1,68 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Position Tagging</TITLE>
+<META NAME="description" CONTENT="Position Tagging">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node15.html">
+<LINK REL="previous" HREF="node13.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node15.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html178"
+ HREF="node15.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html176"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html170"
+ HREF="node13.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html179"
+ HREF="node15.html">Part of Speech Tagging</A>
+<B> Up:</B> <A NAME="tex2html177"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html171"
+ HREF="node13.html">Negation Tagging</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H2><A NAME="SECTION00065000000000000000">
+Position Tagging</A>
+</H2>
+Reviews are split into a beginning, middle, and end, so to see if one section carries more sentiment than another, we split the reviews into a first quarter, a middle half, and a last quarter and tagged the words in each section.
+
+<P>
+Position tagging was not helpful. For bigrams, it harmed performance by around 5% in most cases, and for unigrams, it was not helpful. If reviews end up not actually following the model specified or if the model has no bearing on where the relevant data is, position tagging will be harmful because it increases the dimensionality of the input without increasing the information content. We suspect that is the case here.
+
+<P>
+<BR><HR>
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
71 egpaper_final/node15.html
@@ -0,0 +1,71 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Part of Speech Tagging</TITLE>
+<META NAME="description" CONTENT="Part of Speech Tagging">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node16.html">
+<LINK REL="previous" HREF="node14.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node16.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html188"
+ HREF="node16.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html186"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html180"
+ HREF="node14.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html189"
+ HREF="node16.html">Adjectives</A>
+<B> Up:</B> <A NAME="tex2html187"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html181"
+ HREF="node14.html">Position Tagging</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H2><A NAME="SECTION00066000000000000000">
+Part of Speech Tagging</A>
+</H2>
+
+<P>
+We appended POS tags to every word using Oliver Mason's Qtag program [<A
+ HREF="node20.html#qtag">6</A>]. This serves as a rough way to disambiguate words that may hold different meanings in different contexts. For example, it would distinguish the different uses of “love” in ``I love this movie'' versus ``This is a love story.'' However, it turns out that word disambiguation is a much more complicated problem, as POS says nothing to distinguish between the meaning of cold in ``I was a bit cold during the movie'' and ``The cold murderer chilled my heart.''
+
+<P>
+Part of speech tagging was not very helpful for unigram results; in fact, the NB classifier did slightly worse with parts of speech tagged when using unigrams. However, when using bigrams, the MaxEnt and SVM classifiers did significantly better, achieving 3-4% better accuracy with part of speech tagging when measuring frequency and presence information.
+
+<P>
+<BR><HR>
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
65 egpaper_final/node16.html
@@ -0,0 +1,65 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Adjectives</TITLE>
+<META NAME="description" CONTENT="Adjectives">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node17.html">
+<LINK REL="previous" HREF="node15.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node17.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html198"
+ HREF="node17.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html196"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html190"
+ HREF="node15.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html199"
+ HREF="node17.html">Verbs</A>
+<B> Up:</B> <A NAME="tex2html197"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html191"
+ HREF="node15.html">Part of Speech Tagging</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+
+<H2><A NAME="SECTION00067000000000000000">
+Adjectives</A>
+</H2>
+Intuitively, adjectives like ``beautiful'', ``wonderful'', and ``great'' hold valuable sentiment information, so we trained our classifiers after filtering out only the adjectives within reviews. On average, adjective tests performed about 6% worse than their unfiltered negation-tagged counterparts, with no notable difference between the 3 classifiers. These results suggest that the limited information conveyed in adjectives is not representative of the full review itself.
+
+<P>
+<BR><HR>
+<ADDRESS>
+Pranjal Vachaspati
+2012-02-05
+</ADDRESS>
+</BODY>
+</HTML>
View
65 egpaper_final/node17.html
@@ -0,0 +1,65 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+
+<!--Converted with LaTeX2HTML 2008 (1.71)
+original version by: Nikos Drakos, CBLU, University of Leeds
+* revised and updated by: Marcus Hennecke, Ross Moore, Herb Swan
+* with significant contributions from:
+ Jens Lippmann, Marek Rouchal, Martin Wilck and others -->
+<HTML>
+<HEAD>
+<TITLE>Verbs</TITLE>
+<META NAME="description" CONTENT="Verbs">
+<META NAME="keywords" CONTENT="egpaper_final">
+<META NAME="resource-type" CONTENT="document">
+<META NAME="distribution" CONTENT="global">
+
+<META NAME="Generator" CONTENT="LaTeX2HTML v2008">
+<META HTTP-EQUIV="Content-Style-Type" CONTENT="text/css">
+
+<LINK REL="STYLESHEET" HREF="egpaper_final.css">
+
+<LINK REL="next" HREF="node18.html">
+<LINK REL="previous" HREF="node16.html">
+<LINK REL="up" HREF="node9.html">
+<LINK REL="next" HREF="node18.html">
+</HEAD>
+
+<BODY >
+
+<DIV CLASS="navigation"><!--Navigation Panel-->
+<A NAME="tex2html208"
+ HREF="node18.html">
+<IMG WIDTH="37" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="next"
+ SRC="/usr/share/latex2html/icons/next.png"></A>
+<A NAME="tex2html206"
+ HREF="node9.html">
+<IMG WIDTH="26" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="up"
+ SRC="/usr/share/latex2html/icons/up.png"></A>
+<A NAME="tex2html200"
+ HREF="node16.html">
+<IMG WIDTH="63" HEIGHT="24" ALIGN="BOTTOM" BORDER="0" ALT="previous"
+ SRC="/usr/share/latex2html/icons/prev.png"></A>
+<BR>
+<B> Next:</B> <A NAME="tex2html209"
+ HREF="node18.html">Majority Voting</A>
+<B> Up:</B> <A NAME="tex2html207"
+ HREF="node9.html">Results</A>
+<B> Previous:</B> <A NAME="tex2html201"
+ HREF="node16.html">Adjectives</A>
+<BR>
+<BR></DIV>
+<!--End of Navigation Panel-->
+