From aed3eb71927d28b2023bce79ef95976f0f12ad23 Mon Sep 17 00:00:00 2001
From: Oleg Tarasenko <oltarasenko@gmail.com>
Date: Sat, 7 Dec 2019 22:09:17 +0100
Subject: [PATCH] Migrate the static documentation to ex_doc

---
 docs/README.md                      |   2 +-
 documentation/assets/logo.png       | Bin 0 -> 32038 bytes
 documentation/basic_concepts.md     | 154 ++++++++++++
 documentation/ethical_aspects.md    |  12 +
 documentation/http_api.md           |  34 +++
 documentation/installation_guide.md |  14 ++
 documentation/introduction.md       | 152 ++++++++++++
 documentation/quickstart.md         |  79 ++++++
 documentation/settings.md           | 214 ++++++++++++++++
 documentation/tutorial.md           | 371 ++++++++++++++++++++++++++++
 mix.exs                             |  35 ++-
 11 files changed, 1065 insertions(+), 2 deletions(-)
 create mode 100644 documentation/assets/logo.png
 create mode 100644 documentation/basic_concepts.md
 create mode 100644 documentation/ethical_aspects.md
 create mode 100644 documentation/http_api.md
 create mode 100644 documentation/installation_guide.md
 create mode 100644 documentation/introduction.md
 create mode 100644 documentation/quickstart.md
 create mode 100644 documentation/settings.md
 create mode 100644 documentation/tutorial.md
diff --git a/docs/README.md b/docs/README.md
index 164831d7..07a52990 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,4 +1,4 @@
-# Crawly into
+# Crawly intro
 ---
 
 Crawly is an application framework for crawling web sites and
diff --git a/documentation/assets/logo.png b/documentation/assets/logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..68992e3a095e25b0573d7b63ca545737afbd7f23
GIT binary patch
literal 32038
zcmdU&2Yi&}{lM?C6UZP5D?k=uZ=9{j7WWpb78R;kTsTm~1wq`_y=xudtXAvZBjO%)
zTdY<YR;5-kMJ&18CHa59m%RLsG-0Fm_wnm<_m1aze)D<n^1eqX6b_|?Mve^8mWFzd
z4u!^rLZQKf8{gm6BNUp-w$jq3_j#dEhbf^@9<9euNeP9Xoe>K4<{Y{RRf5~&GBOm(
zZ_`NQhSmNb=3ORqfObGn|C5r$tSjL6umDbi99WyAbHH|R9-IPLI+P6mYl4SE$G}(c
z1KbQnur_JS0SCheun_Dc7yj3zgbCjQE8$zvp9Qcs(f?!N8TcOVgZ^M%{I7`dx;wlH
zQFtA8gj86Ym^WSEOjz8+(X`s-e?jz<wd-P72FCumPzdWP*1X~H1bF_V&=oeN=p*Z_
zxg{B}DPl|?kA(S93-5#VOuw#=Xv5Rt%O>`r-4u1uKJ0r891j;Y@mtyfpdB_w+H>-0
z@CE3*3t>a*mocyvJOI^j6Ieet#z*KD-U3&g6Aq`n6b`5UgZK0u{w^F&q1GndN&h=R
z)&`EFZRtPsXHa+7J?-rF4H08$1Uv^zVJ38g4Y5Hs`t6yNoc&}*n=O~+wm)J;o7^3L
zNJ`48OG?g*wr#i9_Ze-sU7noMj<`)afHA!GSl@BD`UxEii{RgI8LUs<bNGLW@#wXK
zYr&^59SUH5=7#;l+oS6vnc2gacPhJSWvB96R%T_7h!6+$Ny#~N`JHF{P}JkkD{~4C
z`Yt&o|4D4H8)Fy;>ni$q5c~=L1FqkDK^knVY(occLk+wJ)&l&wt`g2h&$H7q`z&qW
z?RPcBeP52`bv!15o^{xzfjp>hle=@IxbG{GqKdmCSvjM=PD;+b78~?rgmtM~`XV2W
zf<@q(e=}6Tx}48pYZR}grord19PWW%LOQHX7(Z#tl+;ddq4Ubp{_jS+^?s%{D`#x2
zdUJAvHb_k?j&&@(uBNpAd$k>kFI$<>W{dZTm;KqdE-_%9jt18&eg7_u2iKQPA=<YG
zTnGOG>Q;Px)jw;iEp5>Al+5hymv`y$K%{)|=XIUC-&vcUH8>gyC!xFghZ>TS+SC*K
zH6?vtk3clHaMCx)sU0uCc3z9DDc9Ur2G_tp!McAQl)#$KWA}QAdE_<0!(bkK0w+Re
zSkpM&(Cxl<ou;iU8~91AY~ZJLg<a2yq@;C=aoQ^V!{Oxmj5b?Ey7YXwrhL$ny7t8v
z|0gY@*E6gO+tYt*G3>Y<U<MeQE8r1uo|_`?>1H)?9W}@Fe*|8I1E94yV}9XZ;>UM7
zcfVt0_aR@^758~Dnw`IYBq^yax!>sf#+DpNFN=04xhm3q@E3Jmdp;JSJ}wR?Wu8DE
z*5H=J?T-Ifz@Bj%oJ?B?Yw`)ZH%Rn@p9SWD>x;Se6byxye3ZiG$?2K>zbx(lL8N@p
zBK;asPqwe}J?A#&Kyur<oWesQCH>!tmJax9G`IcXD_9R7LX}P&(_9>{H&~1Gg**mZ
z0po5q(dHw;ak+Mh*V^l7Tgua1a{p4+_GR6Nd|g*M;KO>>{TOv^Roz?74&mg6jI5#2
z&fV|AR*PdDO0K9$O)p&<3MZ>mqCb7DkBN==XB6xidIt`JHn6$HcpnQ-!w+E1nF8iV
z;yjDV{k!u!pZxENq2D%?4p>l^*WoB)zfFAI!?&@tjEY)fyeSvL4Jm0wv4W!0YfAdP
z6)mdxQw?=;B{g7Q`c15}uD!;+pYLj6F&qQdg2eH8yII6BTyH0V@$Z`W9P9^fcfa)J
zdhhYVu5+p@hWxAEwU3xyiT*KsTA!IctfoWprL1*Z*Klk+2f|6|_32rIYTI=@w#J&7
zmeI3@V<)pe6%^xs2;2$ZgKPawPz){QAe%Rfs9!tS6J7`Ffi~D5^1%K5@$ECY?I$nq
z*8ABA>w1K1U)KGUde?sPqPC>pn^D$-6<h;TzmgjB0^4A>y5y9?81`_TIG(<&=l0ni
zjQdZ)oPQs7g~a&a?Ha{?Jbq*CWk2dV6&8VUK)nj>0Om+9eD@gjZY9?{k+h87QS5<^
z;fCat_A&e$VNI+n?)zFKufuO^sFSs<3+fw>1<tqCCZ~aQr!DLS+S%H>5KaeevaVtr
zbp@~AtSio6F`o{HEy4U!w{;PZD+1^E349Cp!){OkH*=j+qrY9_lc@1r+tn5no$2*%
zl)6|irTyQJaeY*q-}#gX*U3?1gmVM^FK_DWVA?9s&Y#0<D2H|NqsJ70_PYsQfhS=c
zC|=)dPv<@Xy1=@|lr|^>=d&2hg~!3%I2)XAgE8GUZ;wdl?zczE27DB=CR7Yv)?jVy
zKIEUV?t}lqwefS2?EL+#k%9i&;03rCoTIhE?{)qF{UHU`ChDP&^luedH>|nl(C(mA
zf%#-yEr9dE{)4*1%QcE)=<f^ROIQKcs59X!T-1=3*)PgxKd$lLt?kxlel6FzwOkX`
zb4?iKy5Rf#&eK=q7Eb(u>%Mr6;5>B~Yn(QXg6oYw8V>2OHqq~W;dfwsYCrp7PVw(v
zifNT$;F|so#6TP00^7s7=0jkEOE_mKEP-2qm`Gx6>l)2&w=cC|Obyq+Q9ctyy7jKA
zr8dU={1cCPa->=Q2R1Ms%q?x|oYJ5*3A4j`(G~QsHA#QyTYYXH=6gp7az|Tk3s=G4
zz~i;WBjB;^AUKwnt%~vT3z!R^!)IW;p+^!mAg#`|PaV^<2C93sTc77+T<7SMK-ZvE
z=Q^kbW9nAWPwLi^sJG***Di1=yaz$O&<;mKZ)m9x&ozdpfO`9RN}Ig}=1w=z?vAr2
zvF|P5V)!c<v+AKfaozNFo4j2kCH>xxd0oJFKg8u~b>CcE3LnFKm;*yVKeaTzzA;W6
z_f&WVjA3<n3QmD7AqQGIpUrVGh6`XUTn@&9^Vg2f!R?(v9X(dGoAa^_`u25CI126p
zZTJ<${TxopsLSne1nb|&b!7wpUdLx5^lw`K8*QcDm%x#*KREYt=mlLM7rfSW+_umI
zjMI7W2`qsZ;CvVb?I8u$Rn)H!oCL3fabOHS3A1277z5g4kEZv=+)(Ha&e1W8VOux|
zwC7@|_nJ4a!<3rR0UtzJ`yx37zxFyeh!5+JW1awqz-e$U7|%C>I=Sy~a2~eTfaiP-
zt`ir)ZeR^qAH6wzm3W@M9uJSgGB6gjgYobxyatZ<0lWc^!JTj;TmfgpG?)m};T?#1
z?cc8BaX)nJ^~4I+{pDOE`dJUX<2j%mtab0go1j0x0JklNf5GSQDZCHvcc0tm!%<+Y
zWI_^bj2O?xnLgA9&gXlGf_@TnTWUf3{2RUl?ez^f_mAMO!1i!mX4~y*iz@D|;X0tU
zY`}uruDu?Qw8`5g{(UINsK4hw0s3(!Xv;r>@uywvQ(M%)f51Gw40ZtPUovb;F&6B1
zENF*kVIlmpsePIDUbq_01CO~5UI6W-PTGhyAf3;_hY|Ph#VUq=6TklPyMcl(v-wW6
zAh73$96K4hLLRh-V(0_I;8$=mTm+6a9!ek$f*QojpG=&CbIAwmgL!X!@cF+@9oPO*
zul*aZ|CV|ESMPVfUL$c#e2t)834Z}=&^AyAsqk~qk7=|j=A8Z-2WP;;VC-8jmf(Ls
z2iJPd@3n9I`giab^?d)^^xaTgf8*r`Fn_)R<7f_y0{h<7@lBVTM9dRo_P6jNd<Kt$
z_4-tJi;~2(-|$*L?~wa3zUzvW^nV+h{HCVyyODUV7%SS#yw`q<U@26=R44<-`Z<a5
z-V@Z-waEJL2iOG;hCjiVK%N)IatbCy@;jf(_Z%nI6m~hQ#_N7;V=Z$HtA^)b0t|!G
zz}oJ1YyLFo0vlW3>0*<J^QZu0^gS@&uYzB|NO&5|<;P$N)8_k){JNCXj;w`+@$29C
zwQ=Kl*7=t}TRjScL0u1o`QX~{222F&%g;fahcR!SyAIz9)-2=UYH*!!+bmcBt_k=$
zY@So^!mbS~;bYK-9_O0598QKDu>MU1^J*Dr)1APY_)|(6gYOQ3Z_L-RPyk23LeMTJ
z!6onk+yZxl@vIGu`Fp_lxCYegT2N2>-3H8eZEz@98@`3Rpcnj9;#$-n{tRCCyancZ
zF7$&3U<KR;`@y|nZ0`%d0__q3eYzW*4R^wJ;C^#SJ<kJmPlAqc5}0G!?F`6<A6xWC
z54atS=LIkY+Ce5vg)iVM*c-M0_1B-{Ks_eH4R91#(`Ujnuq$+hGr<^n9fm_PXmfpT
zop9X0!+2;5KbGi+3b+uy0b|^`TBn>}5!?#;{yFFXC2%D?31c7?(!tu{+|~C|_zUR!
zHqaN;{Yy9n0v)q>txfIQ8eyFM6130e7WK;m{rfktww({!!|{y^ePxcCzuLv^+F&;5
z-`zpo1JRy+;0|~Vg8kZP4rt>?VJO&_bI1Yx{x%q6`l1-@Yja8(oezVjK-=F5>h7GB
zPEZL;L0{?@#k`mX&evAI2j|%54)8481+IDOsf>gV!5BCV+Cw15gK?n`Jmzpn?3cHj
zRP={2sjZj5`>+Q%f5mnj7?<Yl)nJ}0&h;>`=4rD*5cpD^PK7sNCTPRJHs-}V_%}QO
z##Ue(ecc~iJB)z`VKijGrWVKS4eG8A&UFR^{%|hl_(kwFd<F+XIw;y^M|cqyfWFcu
z+H@?u1h2q8kOhIL%OUV7EP)fi{0qeW=8Lv*|25DL0vqvilZf$a&a0RD9}C40jN_bL
z<KBQ8coYVLc2R7#pY{GrxDJXS8!ADY-386(9-MbO`;4hOz<z^qjT2*j5|~@s=?rjg
z!PvaqDAC5dfj(LW>d*^<{%!lhZLl0Zhoc}D5=C7Pg86VQ7~2J4f5yboU`#ZZOg4-M
z?Y<bSshuIn5y$uaiQqARhl`*Bf;sYXgG4)T2T#Jk;4#<<v`HZ5z;yTmzJ;ry9D@1r
zGA_RF0M>{tz}&E(U0@<~gXZV3DZZ@>?FFzD)Nvvt+E+cbr#blvIFET?Ej9NxM4XF#
z-VFNvaZq>t7>IrE0k4DWz4jRe>eiarZ=#;sIJMcf=I1)jFn9!vv8TatQXmj*pbgK1
ze}MD(BaDD7NF0;5YZUwH1joW+_z;eOHW2h<YaRr->3T2)%*WsuUTzYxKl?RD^uMuq
z1sL~<qAgupu7Phrzug7H!8}?QajZf(65fF&Fbhf{kYGNiv*9aP2(zIKwB6<q`|Szm
zgLC>Aeg(!xqWFF*xCg$2ui*w53W@Q;+ZIJTnUnLuntLPY=frc3gFQj}{1dK)0bm?&
zPEofl;ePO%z;+-c&W*S6x-y#f7SKPgTW5fEL_4-7>b?Wq2gZ-<s<uqj%Q=jJ2Vp5(
z4}+n(AK0{6#5LkzcpLPsv91k_{Y1&&jk#c+d;xEP`Ruyek`0`{F=D;5-#1}AB<kfD
zec%tE?%Lm4-rW9d+H7L(bp&g{N3aCUJ!^sc5=FlmFSmd;P^ZaYekUHQJxbv`us>_e
z5s(joc#PK-j<pE1{|=C7AKv~b;@q9v$)K;^gGrDJ%_W@;+rT`qZaVJ?U>qs-rR}D}
zr(hk|wguomMZcH8ncy+v*okW)Z+~<#pRGOS_=~VN1bXpum89}&Yq%Z0g}=a_pjgWe
z1Y=_noCBqxzZF}@IT1bpYyBnA7lLDW`ICrxjf6+QIJgsx_m=WeJ@<oGz}k5)D5Y>K
zXv2A+o$NOdV`>t-3)az#pdTb2!`q)sg83P@U&0*d2F-nAEF0%1!1M4NDAqIez6W-L
z#F#dgc84dxbxu2J-=8Aq8MZ}H&vFQ^3(Rl*l>^PiJT{hhguR+}n&&%#*GSq@J1OeE
zCwT4u9hfr*L85(l`?C=J@k=oN^^<e{1*mUxQ9tt|h+plO3F#2%?!GbLHUDy00M@%A
z@c5sbIB)%8tX6@w;3{yP+z>H#W`ee~-c1A7zMrG+Oe-$h!1y&r?}D454{V4S`!nG6
zrq;UgbJm^d#U+S?a^Cd<YyKLgvO%dXS=G++{i2e}cw1FcHNy8*BWSBi8Y)Nlzubn(
zS=A-=q2z{8Lv?z6C`?<E&aK_`p@vvE=A8}=^|mouZxe~PHCQycp+U1AZ+kqS;?o-%
zD)VS_8G>n!EMaJ;8?+;yZ>aYDtl7z&PDq9s1Br#>e>n<QhvT1aldHm^l1ei;97?b9
zfOOs|B~|ezG%MZ>e{YL-!;F~~!uSqo;{=!%6Qc?=wISO0ctJa>q47l8ct;~J9mlVl
zFM2HUJsnojq|jj{tsUFgG5@Dn*=;55Eap)~;VG#JC0CVHjx4G6*vgT))fu*>>D77m
zm4{aLoe{2dtm+i}?AU3v%q5mg+dvzh9dGMd-_*gDZWwfNQya;kJ34Ntp*uQm=*@fv
z!$b&&O8Gxh!Nnz!BhAEm+5!55^`NEe3^xC$VqQ8IugT`aF5r65lBm&+a1G1=>p8aj
zNhO2P_kg#+b;fHnx3wf$Y}^SRglAza1nU?te{|7yonaO%frr3wGEQ0&<E1B@3w~xf
z3-Vz@Mc><}_<DWCIN2VY_d@X6(6z5MQD4^v{q+p!gDqfvW60xOuLpwhs80ug+t#%&
zk1K?!VB9<bLtt&^Yp1SoI_QJxkOyl!k3*Dj8ux2f@I1qDJgfUK_int+GY;>Bld|sP
z-quOn<6PiD@k7`AKVwAO-41@9oCfN@F5<b};rH+&On`M5H=NFVx8hl=Yue`RzBs$!
z!0&lRg?q0`YIqL$-#nM}9`~jkg%0geVQul`wZ;^93myYMH?<b`?D{Y9*uCLh(05*I
z=RhmA3nOX8DXE>$YTIs~g`LW8UYS!kk^70-*QKVF#C(2)=Zt<phnMloUiip3ZB5J<
z^Hkd#J2Rmptgn=C!qxB*+yL7G%UKH&PEO|@$DMe_`<afV*RLq*abGRZM8%(XA(ra1
z^T$Wo7Afdb`LFcM0ax%mP!CMel0CH#{)xx^JG76$IB2bAva3}w|Hr~ZU|+ML4F1da
z#CUt|(>g7`(@Becey3B}A8J$5itCso&#@;r@C*~rabF+dzTTDSQpK~cyE1)0S2P#b
zAoJarc^giGF7RJtvTbdmEsXmh4&UVUWU!{XFOV?LUiQt*9&u}@^10PKXUa2JC)V;@
zPCW;3N#b)^h5Q!5^!PKV1w}I#qU-70uh@xwiT$K<{FZPFEP;7o+_&s=cC3Tw<Gye?
zd<NFkox!!PNx~`Q<&Na*3p_WmlKTgJZh+^=xdM&PKh)s!gKcuh@l3`1Xy@|Vmh%kp
z{p102OA2C0JzVe4ghlW^OoVojs3ULJTlDEjm<x;GMi>O<8NDZG^4!a0Z2pnY{dFw8
zHpVkC@#jIPncNrb?*Sz79CRr$bVK}}Xmov#Tp!Q5L0p^9dqEW}f{TG1S+API;Vp{#
zTYJ2AdH~D?*N1XQVGg;e85O4$cA33|=Qg6$nOc6=fcy9I<M(sNpYQYcEO@p%JO5X;
zKI_c$J`0)uk(_Hy*A7EL`}_m$fo;J2+f<?s`qDaTO?U;SK{2*1#NKD|+`$)}%5IME
z8w13CZXM5<#?UF!uH#gm%lA2?w0d<yAN}(M&nl3cVf(Z;nx_lEoIMoud28Za%oTmt
z7z2&h&E};#yT2s~bU2)yFT)Ek8G5q5p2;)AU+`N2)Sa$z+qZ4EceG2-hoam=ALW^X
zcs-5hbUZJTzGj$NoZB77!!ux=J{R1U1TBg4Ro`J?yak`3ujcgyV2n@~R_Vpd)kGaS
zfI7^FS78R6#*m+2<GR%J?y<a%#}emHMg7eKpZoV&AfFH9*|6Xl$mN`KFU$dL@pmxh
zdqZGTUaltA+bux7Uj%*iTTmW{PvB8F26}=%YDtpW=rz*u;QU{L>*0OuTZ`Rmd7dv~
z9pe7wSV_M($m#33r*}HfZ4YAp@wyb{_^)6gEP=V8P17M!w3p-M!5Fv!J^;@%7srBf
z5IhK<!s~Dr420(Wu_-S0Sq|p0>%uGW8F56M42@>x?hv(J^1BN@o9KPnQP!+F>k)C&
zz_GOP`}Wmg9*hQSkn;^Bi5KQ~f6(9egE{gvoDJ%uUW)6Kv11&0&GaE$1+E|ZEwNwT
z#zp`1gGx~6Z;2tFjpjL`jQZ5{@)*A>gD-B3re_WD+45lCH5~g5IEL4VBf)tlj*-H9
zeY`JR2m0AKy9BHuWso>8&o$@O^;mck^u?{P7nFee(?FkOfb+MlfMej#@DUK(VV-v%
z80%bqYqU#`2cujU5jXMQg>mlA<x-dmJ3t>Of?RM6>&RAc1~?CGqmK>(=bZ)mrX?{h
zJYV0;1#{v}Fz!x-!{KO9$6v!ZFm8u~dRhmcrTZGM-<W?5zhC6_GrwnG?8KkRF~{b>
zMeuvL1I(XuU`A8>Fs*TLCrpB#pslp?+Qef#b_kpWFM_@>XFh|)O|Rdft%AqlLHHx+
zuh*$VHRf@b9`~=Ley(H<s7XmJs$=*l`}NVi;M~;ZWw0+}`{O2FqCFo*f@?%;_GVYB
zVtkasesBT&3CtZo4><mfa5-E7&P6|e1^jM6E^F6WQJ<&gH#6#rD()kPhj~pDVgGAz
zI_v|xf-!ys%!JF}LYM$Oz*t!qn{!x;qTa@25g4a~VIcH|axgE<neku@{0;c6s{(4z
zl~H~dsGjG+8~AOg7@upb{W110g{MIK?hpN-Lla$T9V;7*<;^MTRtVbhTF?i#!Aq<I
z|0yUs!)F+=|H3-gVRIP!6GQR)PV_+)JPG=C6y(6h*qYPVS)99eIs_hq`@!6s4KHHn
z6|S`fU1mn8q5cj7*FarsnBKSS{}_yu<KSj^7w(1~!Fbu+;@msIu`nOZUE}p=P!IL_
zJN2f9&n>mATV4<O?EF93Zr*r~c0UldflJ_hxE+Q;25e5z&*q$SsDeqbGgy19d)D?>
z!5H}mVE1^<^1b`c1z(%{#{QN~uV1HCmp-74Hiu}N;h?QvhLd3soC+_2ad8nm4#&eh
z_yBH#3*Z_sXWjlR90d8W9k@*$ra}R1>Rd>rPv`0!-hlI<FK9pGCWr}Zqj6#^>;T5q
zu5bk00i)nI@ElwT*05aoHM{^%z|N2fn@W=C$l5j&T#v5@*LY(|d#aCfv6dQ7&T|N~
zfjsC8=F0=HEqL5@pw44Kzm|f2d=0LKK9B+%EBdh==xbx-b};X>Pj|Qij3w=--}iur
zV0Z8sbuWY8!3|BX)%#F*1>9Z=`q$(1t8rv(c7u&s2b_C0Xd`Q#F?%35zT36EItBLD
zj@yHFJqz?%Ti6BeffK-Uwd>a4wkp^aJU1PNgWIiZ`+@#*9vdQQoTR^v7jtqdbOCKO
z0JP^vpsm`2w$dl+YOIWa3OE-$UpqK==hFey=PwZGs;``{zJ3^Pf>F>0)>i_*&*4OE
ztY6E)@r{$C;azwFw3)V3wD%;K3p>FUV2oV?`op;^+Q8h`*7|E-@Lb0&g5%&Z@H%TW
zs6%irFV`y8gd<@-Xa{ZS9QE@kFvgv~KCuP{eLKGMG>&XLfp&MU`ZExZH<pfoSK(GL
zraVSJR=^B+8SaIh!I%xk;bn{B{wy$NwaeWw2DFX+_J+&hJ-7^df^$w3`|wy>(PzGI
zE}qu|?B^{w2TC9~$9=QmuVDP`3EHapcx+lF&RyHx0nYybXa}BS+#Czq%l#uke{76+
zzW&ipRp59*yr=QHKU@h5z*y4{`b{01i}5iSE&=CwB6I?6XP=(?Koc8V8|Wr3&O^KW
z2Ij*&2;#_n&S_iFN3Vgp1ofMjtHgZj2{YjpxD<>*=dK-wg7xMRm;lzBB-j+<7{<yR
zcnL0Sdhd266SV6UPzC40Kyc2A^RO-+1JA%*FyGuSz2R)oK1V@E*i>_;tGH;RzHkw|
z4Avj#6pR!2LHj$8b3pqld7w=nhIud=Jl?tY0)6r#=#%ac^vTOjDvq}W+y+m<p->1x
zteY#wl5ya=aRw-!b1U2jL0s#*Qt+BYpIi&ZoqBCfG0*k|*A;Vm9Atr_9@fy^;E(Vi
zD6T(O!I4l5DPZ1ChNt0n*a7T&bBn(12HLg?^m!i$d~WO7y%QV`igQ+nGSI%plfJWl
zJ`0C|=LCM_<z^M<V4PeHRiIsSArN)YmibK^jJr(G?y?o=n>S$^l!M!UEYVKZcx`+W
z7#kJPTw1cH^E6f-hSNdc{FrlRxVQv54CUQOD2#7y;eXyv6vJb-HF}P=I^I^7RO_`_
zRq?iRL@0d`t9w;RD7k7xD6}Z2v8~Pth1nOXNo#78h_hLZAAD_Nj@i)Ih8H(}`JDWG
zW1yzbW`U_>fnm1cDi(9v&_uAc;@c`g%e<<5&v=V$gK_BJ`?Ydy*^zEr_5ZEk9`j%P
z+wY*Q=Y24*Rr4ItH1D8A|3w}bAGfrVt6`P5cE#tzU08{&w}AQH+q-PNy(_WxHm@?a
zm(UJrY^xc^#i<lVLOA}47i4e?_i33o)&}FUGi(mg<I~|*D1}5Zj=i?H3&w-<+0^3o
zmbrgDG}kfAHrMVeVJ-yqgO{y}wZ&@{`*IzykJbb)$N%2`2CX)3DR1p>Z+HaoUW>=D
z*?T%tviZB|Zv3tBD!%`}k?*t)r!8P-OX9qo<1O$<P|wyxJtl+e+b|$16D1tto`6FC
z&Yx3w=t90b<~MhH^LuOuE#<eE=kRx;4t$r{+zuJ^piiHJgP>WTY*}4;@%Aye9K6o?
z|Ngimf!}`Ip5MKEjqiaOyO+NO$#103@$a$kq^6hdj4iZdAf8(W*THSj7yiFL-n1Z|
z|7-Bt;zU?wSN?{Sos~2COn#rdx_#I4qyF90zh9s~zrTF<cWv`_yOi&!jfp^v`BPv%
z=v!^EHgSID;_u*j*aL_cY@eLl4ZGjL-yUlC4M6_(Z1qXvw;_kb{Czb3o;#oWlBml`
z_Msl5;U#db=?ZJpokQZHkF=G!>b2J(PEF;z-mUq)=~uh-e2~AL4f5aOXd<_s?X~<S
z*;nbA{qS*;V+ZFy%Io2<HotSoYLdpgF<{JI06n+|YYaNR%Wr+x`geGKCzyW=WY5OG
z%V+kD5s%A~Q#%~ZT)j@23U9*{Xba8791QZ%`Sk$DOq9Te){TqFy9wN%@V0+L%xJq!
ztYg^?{`(A3?(d0mPgos#|G;q<!f(L%(TB}zjWJ*?a(?@O@(|n%+d~#4iq|s-f;oX7
zlio<n=;7}`*B1AAiN7QN#eWNe--_NQ%07sHulg3PzQb>;?5^(}do0Waud`1AWde9z
z@f1u3<2Er?wBG^nG@BQ3|Cs)d@jHak_T4Uwa-T>1J_z>L!cuT-S6S_AKh{6zasoUK
zcffwo2^9ThtU9mf;czGh{h<%UygCe+W70R&z8LppL_3yTjqOXYeRu`?%rou3BY0iv
zwO>ArgBw9R9|r?LJ16SsYkg(B*pG301z3ml<22AVo<EoS&z808bV7uGn}xsM58-b(
zNz2%N7hDT>gK_%@xD(XbI?xZ)r8RL5?O=cSJ!p%ULEqojgmLu=ak!GdVb>P-eYKX~
zhx50xm$UsII1x?;=d2%`!@;0#$*?Y>-;9B7Py(Lo>pkH*c%OLxfq#F(zhl)Ci`2rL
z2-`ns+J6Xb53qLH&zf{*_gclc9R=!gDcs55Zoc8(3I2|MbFIGtO>M4bdlg&>55TEV
z1ln|C#Ig5*Kf`Ho2HZ=xZ?oQgOHHm}4Oq(i``E6Zj)L*<Ak2h9*icF0L}O+iTmjC*
zIJg>^Lwx&tw8onLz8J2Dp>PBk@4G=7tgmP{edxIMxiw4&W6F5a4_m<w;Jh4vCzuO|
zg85qsx5E~oF6$$0Irw-`w_)I1tdY*axu~nx3f9NH;an(%8DI_?w|&8wnGO0uoz_Jh
zTb=HJ1Hk%Z?a`jwL07m6jPnXOAGC>eZhN>N_6O_FSkML&z+70HXs2yKT~CBgFaWG0
zmqBN+b{S{Zl+j>3Xk+#32-?EBRR-1*=Wc&{fOBk3JYT<E2NyNH?m&AEm{a=O8sc#U
z;IZ!S1Lle1{D;8ZV7@rM_B#$9g<pf)?4!Bpqp9Frw+8KJ{5}FxK)n;iJ^~WA2ljE!
z!yyHXhqK{v*dK!V@-p5>4(*X(4b_K^y$u*Y$AfyVk96T+?RE{A!`jVlXMwgE367yS
z7yan=qoECW?0IkjXlL!ZKBCTp!TFy7`QRLSf$^b_w*=*va3iR{el3St@Ou~tE%}Pg
zYZd1_42<2$px@or4Xy<JtQZr{$#Ff#STXm^okTzJcD==%Q?J`#BxnO|<GjotMf-{|
zFbN)o-$3(tW7B$zw$qpDcPSX-iu2IViZ+oVFvq7gz1~o}bNXuHSo-lmXusMX-tcuo
zvLsgzPYy52Nw2JGn_E?xlDmj{)kNoNY6*@Qh2fl2Kyqc{F(vhU;}IifYWVERu{@I&
zDrq`5onyk4p;Df=<JmhIQN#B*Jdei{e~{Um4)dTCR((&wKrMVvQO+T|fVN5$=cA6c
zfu!<cFEH1At#pNx!5E7RYj-l&PJOtyYe%jj3iuoy>@p9|guW2;Yilm+pN^ap=6C70
z=l<otWw!mr_k1?HAuDI3{`I(VU>v7Gq8L-tVGn-4HZ8Bi(I@fy&hgK(e2!R<oqxbk
z_Gu3DZxjS$^D<t$)!kZlGIJfv@8I6SJ!vt<=kw5*|8QMCm3_wh1i)qS@y!9{ATY+X
z)gZb)n`>wO&6&gE*U@~Y;eMDjYo@Vkjyl#DI0_Wcp99tn^JN!46FrokHDHB(@Ok(<
zw%rY$GZ+p7>zOr2ab5idoDY}7+00=c*Uw7}y3C4jFWskXb6s}~ZND>Ui{}1LXPfnG
z1iLQgp29^u1H!-m)O`v4+3y~Z4T-kn?P_99o(h-XhbL0fx_pTL)qOV10K>Ai^Ie0r
z$swRGwe?uAjvWiT!!e+ZnhTK~k4tk=`|Jbz!rrhW>;k`PdT;C|#*e}2nluSUgT6N2
z^t1Zf8n@bfTd*JJ)((^&;Jl2n!=W!EL!$Uz-?_g6roc|1IG<sl?)JYX8O6>+!FW5k
v>A<eE_H_U_-!+MP?FY_19={o7C<yTxJInCuRI;s#$gr(U*q8o1i39%!T(5$#

literal 0
HcmV?d00001

diff --git a/documentation/basic_concepts.md b/documentation/basic_concepts.md
new file mode 100644
index 00000000..34bad6a6
--- /dev/null
+++ b/documentation/basic_concepts.md
@@ -0,0 +1,154 @@
+# Basic concepts
+---
+
+## Spiders
+
+Spiders are modules which define how a certain site (or a group of
+sites) will be scraped, including how to perform the crawl
+(i.e. follow links) and how to extract structured data from their
+pages (i.e. scraping items). In other words, Spiders are the place
+where you define the custom behaviour for crawling and parsing pages
+for a particular site.
+
+For spiders, the scraping cycle goes through something like this:
+
+You start by generating the initial Requests to crawl the first URLs,
+and use a callback function called with the response downloaded
+from those requests.
+
+In the callback function, you parse the response (web page) and return
+a ` %Crawly.ParsedItem{}` struct. This struct should contain new
+requests to follow and items to be stored.
+
+In the callback functions, you parse the page contents, typically using
+Floki (but you can also use any other library you prefer) and generate
+items with the parsed data.
+
+Spiders are executed in the context of Crawly.Worker processes, and
+you can control the amount of concurrent workers via
+`concurrent_requests_per_domain` setting.
+
+All requests are being processed sequentially and are pre-processed by
+Middlewares.
+
+All items are processed sequentially and are processed by Item pipelines.
+
+### Behaviour functions
+
+In order to make a working web crawler, all the behaviour callbacks need
+to be implemented.
+
+`init()` - a part of the Crawly.Spider behaviour. This function should
+return a KVList which contains a `start_urls` entry with a list, which defines
+the starting requests made by Crawly.
+
+`base_url()` - defines a base_url of the given Spider. This function
+is used in order to filter out all requests which are going outside of
+the crawled website.
+
+`parse_item(response)` - a function which defines how a given response
+is translated into the `Crawly.ParsedItem` structure. On the high
+level this function defines the extraction rules for both Items and Requests.
+
+## Requests and Responses
+
+Crawly uses Request and Response objects for crawling web sites.
+
+Typically, Request objects are generated in the spiders and pass
+across the system until they reach the Crawly.Worker process, which
+executes the request and returns a Response object which travels back
+to the spider that issued the request. The Request objects are being
+modified by the selected Middlewares, before hitting the worker.
+
+The request is defined as the following structure:
+``` elixir
+  @type t :: %Crawly.Request{
+    url: binary(),
+    headers: [header()],
+    prev_response: %{},
+    options: [option()]
+    }
+
+@type header() :: {key(), value()}
+```
+
+Where:
+1. url - is the url of the request
+2. headers - define http headers which are going to be used with the
+   given request
+3. options - would define options (like follow redirects).
+
+Crawly uses HTTPoison library to perform the requests, but we have
+plans to extend the support with other pluggable backends like
+selenium and others.
+
+Responses are defined in the same way as HTTPoison responses. See more
+details here: https://hexdocs.pm/httpoison/HTTPoison.Response.html#content
+
+## Parsed Item
+
+ParsedItem is a structure which is filled by the `parse_item/1`
+callback of the Spider. The structure is defined in the following way:
+
+```elixir
+  @type item() :: %{}
+  @type t :: %__MODULE__{
+    items: [item()],
+    requests: [Crawly.Request.t()]
+    }
+
+```
+The parsed item is being processed by Crawly.Worker process, which
+sends all requests to the `Crawly.RequestsStorage` process,
+responsible for pre-processing requests and storing them for the
+future execution, all items are being sent to `Crawly.DataStorage`
+process, which is responsible for pre-processing items and storing them
+on disk.
+
+For now only one Storage backend is supported (writing on disc). But
+in future Crawly will also support work with amazon S3, sql and others.
+
+## Request Middlewares
+
+Crawly is using a concept of pipelines when it comes to processing of
+the elements sent to the system. In this section we will cover the
+topic of requests middlewares - a powerful tool which allows to modify
+the request before sending it to the target website. In most of the
+spider developers would want to modify request headers, which allows
+requests to look more natural to the crawled websites.
+
+At this point Crawly includes the following request middlewares:
+1. `Crawly.Middlewares.DomainFilter` - this middleware will disable
+   scheduling for all requests leading outside of the crawled
+   site. The middleware uses `base_url()` defined in the
+   `Crawly.Spider` behaviour in order to do it's job
+2. ` Crawly.Middlewares.RobotsTxt` - this middleware ensures that
+Crawly respects the robots.txt defined by the target website.
+3. `Crawly.Middlewares.UniqueRequest` - this middleware ensures that
+crawly would not schedule the same URL(request) multiple times.
+4. `Crawly.Middlewares.UserAgent` - this middleware is used to set a
+   User Agent HTTP header. Allows to rotate UserAgents, if the last
+   one is defined as a list.
+
+A list of request middlewares which are going to be used with a given
+project is defined in the project settings.
+
+## Item pipelines
+
+Crawly is using a concept of pipelines when it comes to processing of
+the elements sent to the system. In this section we will cover the
+topic of item pipelines - a tool which is used in order to pre-process
+items before storing them in the storage.
+
+At this point Crawly includes the following Item pipelines:
+1.  `Crawly.Pipelines.Validate` - validates that a given item has all
+the required fields. All items which don't have all required fields
+are dropped.
+2.  `Crawly.Pipelines.DuplicatesFilter` - filters out items which are
+already stored the system.
+3. `Crawly.Pipelines.JSONEncoder`- converts items into JSON format.
+4. `Crawly.Pipelines.CSVEncoder`- converts items into CSV format.
+5. `Crawly.Pipelines.WriteToFile`- Writes information to a given file.
+
+The list of item pipelines used with a given project is defined in the
+project settings.
diff --git a/documentation/ethical_aspects.md b/documentation/ethical_aspects.md
new file mode 100644
index 00000000..80938ac2
--- /dev/null
+++ b/documentation/ethical_aspects.md
@@ -0,0 +1,12 @@
+# Ethical aspects of crawling
+---
+
+It's important to be polite, when doing a web crawling. You should
+avoid cases when your spiders are putting harm on the scrapped
+websites. As it's mentioned here: https://blog.scrapinghub.com/2016/08/25/how-to-crawl-the-web-politely-with-scrapy#comments-listing
+
+1. A polite crawler respects robots.txt.
+2. A polite crawler never degrades a website’s performance.
+3. A polite crawler identifies its creator with contact information.
+4. A polite crawler is not a pain in the buttocks of system
+administrators.
diff --git a/documentation/http_api.md b/documentation/http_api.md
new file mode 100644
index 00000000..f0729d12
--- /dev/null
+++ b/documentation/http_api.md
@@ -0,0 +1,34 @@
+# HTTP API
+---
+
+Crawly supports a basic HTTP API, which allows to control the Engine
+behaviour.
+
+## Starting a spider
+
+The following command will start a given Crawly spider:
+
+```
+curl -v localhost:4001/spiders/<spider_name>/schedule
+```
+
+## Stopping a spider
+
+The following command will stop a given Crawly spider:
+
+```
+curl -v localhost:4001/spiders/<spider_name>/stop
+```
+
+## Getting currently running spiders
+
+```
+curl -v localhost:4001/spiders
+```
+
+## Getting spider stats
+
+```
+curl -v localhost:4001/spiders/<spider_name>/scheduled-requests
+curl -v localhost:4001/spiders/<spider_name>/scraped-items
+```
diff --git a/documentation/installation_guide.md b/documentation/installation_guide.md
new file mode 100644
index 00000000..b5b1abc3
--- /dev/null
+++ b/documentation/installation_guide.md
@@ -0,0 +1,14 @@
+# Installation guide
+---
+
+Crawly requires Elixir v1.7 or higher. In order to make a Crawly
+project execute the following steps:
+
+1. Generate an new Elixir project: `mix new <project_name> --sup`
+2. Add Crawly to you mix.exs file
+    ```elixir
+    def deps do
+        [{:crawly, "~> 0.6.0"}]
+    end
+    ```
+3. Fetch crawly: `mix deps.get`
\ No newline at end of file
diff --git a/documentation/introduction.md b/documentation/introduction.md
new file mode 100644
index 00000000..500e1fee
--- /dev/null
+++ b/documentation/introduction.md
@@ -0,0 +1,152 @@
+# Crawly intro
+---
+
+Crawly is an application framework for crawling web sites and
+extracting structured data which can be used for a wide range of
+useful applications, like data mining, information processing or
+historical archival.
+
+## Walk-through of an example spider
+
+In order to show you what Crawly brings to the table, we’ll walk you
+through an example of a Crawly spider using the simplest way to run a spider.
+
+Here’s the code for a spider that scrapes blog posts from the Erlang
+Solutions blog:  https://www.erlang-solutions.com/blog.html,
+following the pagination:
+
+```elixir
+defmodule Esl do
+@behaviour Crawly.Spider
+
+  @impl Crawly.Spider
+  def base_url(), do: "https://www.erlang-solutions.com"
+
+  def init() do
+    [
+      start_urls: ["https://www.erlang-solutions.com/blog.html"]
+    ]
+  end
+
+  @impl Crawly.Spider
+  def parse_item(response) do
+    # Getting new urls to follow
+    urls =
+      response.body
+      |> Floki.find("a.more")
+      |> Floki.attribute("href")
+      |> Enum.uniq()
+
+    # Convert URLs into requests
+    requests =
+      Enum.map(urls, fn url ->
+        url
+        |> build_absolute_url(response.request_url)
+        |> Crawly.Utils.request_from_url()
+      end)
+
+    # Extract item from a page, e.g.
+    # https://www.erlang-solutions.com/blog/introducing-telemetry.html
+    title =
+      response.body
+      |> Floki.find("article.blog_post h1:first-child")
+      |> Floki.text()
+
+    author =
+      response.body
+      |> Floki.find("article.blog_post p.subheading")
+      |> Floki.text(deep: false, sep: "")
+      |> String.trim_leading()
+      |> String.trim_trailing()
+
+    time =
+      response.body
+      |> Floki.find("article.blog_post p.subheading time")
+      |> Floki.text()
+
+    url = response.request_url
+
+    %Crawly.ParsedItem{
+      :requests => requests,
+      :items => [%{title: title, author: author, time: time, url: url}]
+    }
+  end
+
+  def build_absolute_url(url, request_url) do
+    URI.merge(request_url, url) |> to_string()
+  end
+end
+```
+
+Put this code into your project and run it using the Crawly REST API:
+`curl -v localhost:4001/spiders/Esl/schedule`
+
+When it finishes you will get the ESL.jl file stored on your
+filesystem containing the following information about blog posts:
+
+```json
+{"url":"https://www.erlang-solutions.com/blog/erlang-trace-files-in-wireshark.html","title":"Erlang trace files in Wireshark","time":"2018-06-07","author":"by Magnus Henoch"}
+{"url":"https://www.erlang-solutions.com/blog/railway-oriented-development-with-erlang.html","title":"Railway oriented development with Erlang","time":"2018-06-13","author":"by Oleg Tarasenko"}
+{"url":"https://www.erlang-solutions.com/blog/scaling-reliably-during-the-world-s-biggest-sports-events.html","title":"Scaling reliably during the World’s biggest sports events","time":"2018-06-21","author":"by Erlang Solutions"}
+{"url":"https://www.erlang-solutions.com/blog/escalus-4-0-0-faster-and-more-extensive-xmpp-testing.html","title":"Escalus 4.0.0: faster and more extensive XMPP testing","time":"2018-05-22","author":"by Konrad Zemek"}
+{"url":"https://www.erlang-solutions.com/blog/mongooseim-3-1-inbox-got-better-testing-got-easier.html","title":"MongooseIM 3.1 - Inbox got better, testing got easier","time":"2018-07-25","author":"by Piotr Nosek"}
+....
+```
+
+## What just happened?
+
+When you ran the curl command:
+```curl -v localhost:4001/spiders/Esl/schedule```
+
+Crawly runs a spider ESL, Crawly looked for a Spider definition inside
+it and ran it through its crawler engine.
+
+The crawl started by making requests to the URLs defined in the
+start_urls attribute of the spider's init, and called the default
+callback method `parse_item`, passing the response object as an
+argument. In the parse callback, we loop:
+1. Look through all pagination the elements using a Floki Selector and
+extract absolute URLs to follow. URLS are converted into Requests,
+using
+`Crawly.Utils.request_from_url()` function
+2. Extract item(s) (items are defined in separate modules, and this part
+will be covered later on)
+3. Return a Crawly.ParsedItem structure which is containing new
+requests to follow and items extracted from the given page, all
+following requests are going to be processed by the same `parse_item` function.
+
+Crawly is fully asynchronous. Once the requests are scheduled, they
+are picked up by separate workers and are executed in parallel. This
+also means that other requests can keep going even if some request
+fails or an error happens while handling it.
+
+
+While this enables you to do very fast crawls (sending multiple
+concurrent requests at the same time, in a fault-tolerant way) Crawly
+also gives you control over the politeness of the crawl through a few
+settings. You can do things like setting a download delay between each
+request, limiting the amount of concurrent requests per domain or
+respecting robots.txt rules
+
+```
+This is using JSON export to generate the JSON lines file, but you can
+easily extend it to change the export format (XML or CSV, for
+example).
+
+```
+
+## What else?
+
+You’ve seen how to extract and store items from a website using
+Crawly, but this is just a basic example. Crawly provides a lot of
+powerful features for making scraping easy and efficient, such as:
+
+1. Flexible request spoofing (for example user-agents rotation,
+cookies management (this feature is planned.))
+2. Items validation, using pipelines approach.
+3. Filtering already seen requests and items.
+4. Filter out all requests which targeted at other domains.
+5. Robots.txt enforcement.
+6. Concurrency control.
+7. HTTP API for controlling crawlers.
+8. Interactive console, which allows you to create and debug spiders more easily.
diff --git a/documentation/quickstart.md b/documentation/quickstart.md
new file mode 100644
index 00000000..2d40ff71
--- /dev/null
+++ b/documentation/quickstart.md
@@ -0,0 +1,79 @@
+# Quickstart
+
+In this section we will show how to bootstrap a small project and to setup
+Crawly for proper data extraction.
+
+1. Create a new Elixir project: `mix new crawly_example --sup`
+2. Add Crawly to the dependencies (mix.exs file):
+```elixir
+defp deps do
+    [
+      {:crawly, "~> 0.6.0"}
+    ]
+end
+```
+3. Fetch dependencies: `mix deps.get`
+4. Define Crawling rules (Spider)
+```elixir
+cat > lib/crawly_example/esl_spider.ex << EOF
+defmodule EslSpider do
+  @behaviour Crawly.Spider
+  alias Crawly.Utils
+
+  @impl Crawly.Spider
+  def base_url(), do: "https://www.erlang-solutions.com"
+
+  @impl Crawly.Spider
+  def init(), do: [start_urls: ["https://www.erlang-solutions.com/blog.html"]]
+
+  @impl Crawly.Spider
+  def parse_item(response) do
+    hrefs = response.body |> Floki.find("a.more") |> Floki.attribute("href")
+
+    requests =
+      Utils.build_absolute_urls(hrefs, base_url())
+      |> Utils.requests_from_urls()
+
+    title = response.body |> Floki.find("article.blog_post h1") |> Floki.text()
+
+    %{
+      :requests => requests,
+      :items => [%{title: title, url: response.request_url}]
+    }
+  end
+end
+EOF
+```
+
+5. Configure Crawly:
+By default Crawly does not require any configuration. But obviously you will need
+a configuration for fine tuning the Crawls:
+
+```elixir
+config :crawly,
+  closespider_timeout: 10,
+  concurrent_requests_per_domain: 8,
+  follow_redirects: true,
+  closespider_itemcount: 1000,
+  output_format: "csv",
+  item: [:title, :url],
+  item_id: :title,
+  middlewares: [
+    Crawly.Middlewares.DomainFilter,
+    Crawly.Middlewares.UniqueRequest,
+    Crawly.Middlewares.UserAgent
+  ],
+  pipelines: [
+    Crawly.Pipelines.Validate,
+    Crawly.Pipelines.DuplicatesFilter,
+    Crawly.Pipelines.CSVEncoder,
+    Crawly.Pipelines.WriteToFile
+  ]
+```
+
+
+6. Start the Crawl:
+ - `iex -S mix`
+ - `Crawly.Engine.start_spider(EslSpider)`
+
+7. Results can be seen in: `cat /tmp/EslSpider.csv`
diff --git a/documentation/settings.md b/documentation/settings.md
new file mode 100644
index 00000000..58d1c523
--- /dev/null
+++ b/documentation/settings.md
@@ -0,0 +1,214 @@
+# Crawly settings
+
+The Crawly settings allows you to customize the behaviour of all
+Crawly components, including crawling speed, used pipelines and middlewares.
+
+Here’s a list of all available Crawly settings, along with their
+default values and the scope where they apply.
+
+The scope, where available, shows where the setting is being used, if
+it’s tied to any particular component. In that case the module of that
+component will be shown, typically an extension, middleware or
+pipeline. It also means that the component must be enabled in order
+for the setting to have any effect.
+
+The settings are defined in the Elixir config style. For example:
+
+```elixir
+config :crawly,
+  # The path where items are stored
+  base_store_path: "/tmp/",
+  # Item definition
+  item: [:title, :author, :time, :url],
+  # Identifier which is used to filter out duplicates
+  item_id: :title
+```
+
+### base_store_path :: binary() [DEPRECATED in 0.6.0]
+
+default: "/tmp"
+
+Defines the path where items are stored in the filesystem. This setting
+is used by the Crawly.DataStorageWorker process.
+
+### user_agents :: list()
+
+default: ["Crawly Bot 1.0"]
+
+Defines a user agent string for Crawly requests. This setting is used
+by the `Crawly.Middlewares.UserAgent` middleware. When the list has more than one
+item, all requests will be executed, each with a user agent string chosen
+randomly from the supplied list.
+
+### item :: [atom()]
+
+default: []
+
+Defines a list of required fields for the item. When none of the default
+fields are added to the following item (or if the values of
+required fields are "" or nil), the item will be dropped. This setting
+is used by the `Crawly.Pipelines.Validate` pipeline
+
+### item_id :: atom()
+
+default: nil
+
+Defines a field which will be used in order to identify if an item is
+a duplicate or not. In most of the ecommerce websites the desired id
+field is the SKU. This setting is used in
+the `Crawly.Pipelines.DuplicatesFilter` pipeline. If unset, the related
+middleware is effectively disabled.
+
+### pipelines :: [module()]
+
+default: []
+
+Defines a list of pipelines responsible for pre processing all the scraped
+items. All items not passing any of the pipelines are dropped. If
+unset, all items are stored without any modifications.
+
+Example configuration of item pipelines:
+```
+config :crawly,
+  pipelines: [
+    Crawly.Pipelines.Validate,
+    Crawly.Pipelines.DuplicatesFilter,
+    Crawly.Pipelines.JSONEncoder,
+    Crawly.Pipelines.WriteToFile [NEW IN 0.6.0]
+    ]
+```
+
+#### CSVEncoder pipeline
+
+It's possible to export data in CSV format, if the pipelines are
+defined in the following way:
+```
+config :crawly,
+  pipelines: [
+    Crawly.Pipelines.Validate,
+    Crawly.Pipelines.DuplicatesFilter,
+    Crawly.Pipelines.CSVEncoder,
+    Crawly.Pipelines.WriteToFile [NEW IN 0.6.0]
+    ]
+```
+
+**NOTE**: Set the file extension config for `WriteToFile` to "csv"
+
+#### JSONEncoder pipeline
+
+It's possible to export data in CSV format, if the pipelines are
+defined in the following way:
+```
+config :crawly,
+  pipelines: [
+    Crawly.Pipelines.Validate,
+    Crawly.Pipelines.DuplicatesFilter,
+    Crawly.Pipelines.JSONEncoder,
+    Crawly.Pipelines.WriteToFile [NEW IN 0.6.0]
+    ]
+```
+
+**NOTE**: Set the file extension config for `WriteToFile` to "jl"
+
+#### WriteToFile pipeline
+
+Writes a given item to a file.
+```
+config :crawly,
+  pipelines: [
+    ...
+    Crawly.Pipelines.JSONEncoder,
+    Crawly.Pipelines.WriteToFile
+    ]
+
+config :crawly, Crawly.Pipelines.WriteToFile,
+  folder: "/tmp",
+  extension: "jl"
+
+```
+
+**NOTE**: Set the file extension config for `WriteToFile` to "jl"
+
+### middlewares :: [module()]
+
+default:  [
+    Crawly.Middlewares.DomainFilter,
+    Crawly.Middlewares.UniqueRequest,
+    Crawly.Middlewares.RobotsTxt,
+    Crawly.Middlewares.UserAgent
+    ]
+
+Defines a list of middlewares responsible for pre-processing
+requests. If any of the requests from the `Crawly.Spider` is not
+passing the middleware, it's dropped.
+
+### closespider_itemcount :: pos_integer()
+
+default: 5000
+
+An integer which specifies a number of items. If the spider scrapes
+more than that amount and those items are passed by the item pipeline,
+the spider will be closed. If set to nil the spider will not be
+stopped.
+
+### closespider_timeout :: pos_integer()
+
+default: nil
+
+Defines a minimal amount of items which needs to be scraped by the
+spider within the given timeframe (30s). If the limit is not reached
+by the spider - it will be stopped.
+
+### follow_redirects :: boolean()
+
+default: false
+
+Defines is Crawly spider is supposed to follow HTTP redirects or not.
+
+### concurrent_requests_per_domain :: pos_integer()
+
+default: 4
+
+The maximum number of concurrent (ie. simultaneous) requests that will
+be performed by the Crawly workers.
+
+### using crawly with a proxy
+
+Now it's possible to direct all Crawly's requests through a proxy,
+it's possible to set proxy using the proxy value of Crawly config, for example:
+```
+config :crawly,
+    proxy: "<proxy_host>:<proxy_port>",
+```
+
+Example usage:
+```
+iex(3)> Crawly.fetch("http://httpbin.org/ip")
+{:ok,
+ %HTTPoison.Response{
+   body: "{\n  \"origin\": \"101.4.136.34, 101.4.136.34\"\n}\n",
+   headers: [
+     {"Server", "nginx/1.7.10"},
+     {"Date", "Sat, 03 Aug 2019 18:57:20 GMT"},
+     {"Content-Type", "application/json"},
+     {"Content-Length", "45"},
+     {"Connection", "keep-alive"},
+     {"Access-Control-Allow-Credentials", "true"},
+     {"Access-Control-Allow-Origin", "*"},
+     {"Referrer-Policy", "no-referrer-when-downgrade"},
+     {"X-Content-Type-Options", "nosniff"},
+     {"X-Frame-Options", "DENY"},
+     {"X-XSS-Protection", "1; mode=block"}
+   ],
+   request: %HTTPoison.Request{
+     body: "",
+     headers: [],
+     method: :get,
+     options: [false, {:proxy, "101.4.136.34:81"}],
+     params: %{},
+     url: "http://httpbin.org/ip"
+   },
+   request_url: "http://httpbin.org/ip",
+   status_code: 200
+ }}
+```
diff --git a/documentation/tutorial.md b/documentation/tutorial.md
new file mode 100644
index 00000000..d7c650fe
--- /dev/null
+++ b/documentation/tutorial.md
@@ -0,0 +1,371 @@
+# Crawly tutorial
+---
+
+In this tutorial, we’ll assume that Elixir is already installed on
+your system. If that’s not the case, see Installation guide:
+https://elixir-lang.org/install.html
+
+We are going to scrape `https://www.homebase.co.uk`, a website that
+contains products of different types.
+
+This tutorial will walk you through these tasks:
+1. Creating a new Crawly project.
+2. Writing a spider to crawl a site and extract data.
+3. Exporting the scraped data.
+
+Crawly is written in Elixir. If you’re new to the language you might
+want to start by getting an idea of what the language is like, to get
+the most out of Crawly.
+
+If you’re already familiar with other languages, and want to learn
+Elixir quickly, the Elixir website
+https://elixir-lang.org/learning.html is a good resource.
+
+## Creating a project
+
+Before you start crawling, you will have to set up a new Crawly
+project. Enter a directory where you’d like to store your code and
+run:
+
+```mix new tutorial --sup```
+
+This will create a tutorial directory with the following contents:
+```bash
+tutorial
+├── README.md
+├── config
+│   └── config.exs
+├── lib
+│   ├── tutorial
+│   │   └── application.ex
+│   └── tutorial.ex
+├── mix.exs
+└── test
+    ├── test_helper.exs
+    └── tutorial_test.exs
+
+```
+
+Switch to the project folder: `cd ./tutorial` and update the mix.exs
+file with the following code:
+```elixir
+    def deps do
+        [{:crawly, "~> 0.5.0"}]
+    end
+```
+Now run `mix deps.get`
+
+
+## Our first spider
+
+Spiders are behaviours which you defined and that Crawly uses to
+extract information from a given website. The spider must implement
+the spider behaviour (it's required to implement `parse_item/1`, `init/0`,
+`base_url/0` callbacks)
+
+This is the code for our first spider. Save it in a file name
+homebase.ex under the lib/tutorial/spiders directory of your project.
+
+```elixir
+defmodule Homebase do
+  @behaviour Crawly.Spider
+
+  @impl Crawly.Spider
+  def base_url(), do: "https://www.homebase.co.uk"
+
+  @impl Crawly.Spider
+  def init() do
+    [
+      start_urls: [
+        "https://www.homebase.co.uk/our-range/tools"
+      ]
+    ]
+  end
+
+  @impl Crawly.Spider
+  def parse_item(_response) do
+    %Crawly.ParsedItem{:items => [], :requests => []}
+  end
+end
+```
+
+As you can see, our Spider implements the Spider behaviour and defines
+some functions:
+
+1. base_url: method which returns base_urls for the given Spider, used in
+order to filter out all irrelevant requests. In our case we don't want
+our crawler to follow links going to social media sites and other
+partner sites (which are not related to the homebase website themselves)
+
+2. init(): must return a KW list which contains start_urls list which
+Crawler will begin to crawl from. Subsequent requests will be
+generated from these initial urls.
+
+3. parse_item(): function which will be called to handle response
+downloaded by Crawly. It must return the `Crawly.ParsedItem` structure.
+
+
+## How to run our spider
+
+To put our spider to work, go to the project’s top level directory and
+run:
+
+1. iex -S mix - It will start the Elixir application which we have
+created, and will open interactive console
+2. Execute the following command in the opened Elixir console:
+```Crawly.Engine.start_spider(Homebase)```
+
+You will get an output similar to this:
+
+ ```elixir
+iex(2)> Crawly.Engine.start_spider(Homebase)
+
+15:03:47.134 [info]  Starting the manager for Elixir.Homebase
+
+=PROGRESS REPORT==== 23-May-2019::15:03:47 ===
+          supervisor: {<0.415.0>,'Elixir.Crawly.ManagerSup'}
+             started: [{pid,<0.416.0>},
+                       {id,'Elixir.Homebase'},
+                       {mfargs,
+                           {'Elixir.DynamicSupervisor',start_link,
+                               [[{strategy,one_for_one},
+                                 {name,'Elixir.Homebase'}]]}},
+                       {restart_type,permanent},
+                       {shutdown,infinity},
+                       {child_type,supervisor}]
+
+15:03:47.137 [debug] Starting requests storage worker for
+Elixir.Homebase..
+
+15:04:06.698 [debug] No work, increase backoff to 2400
+15:04:06.699 [debug] No work, increase backoff to 4800
+15:04:06.699 [debug] No work, increase backoff to 9600
+15:04:07.973 [debug] No work, increase backoff to 19200
+15:04:17.787 [info]  Stopping Homebase, itemcount timeout achieved
+```
+
+## What just happened under the hood?
+
+Crawly schedules the Request objects returned by the init function of
+the Spider. Upon receiving a response for each one, it instantiates
+Response objects and calls the callback function associated with the
+request passing the response as argument.
+
+In our case we have not defined any data to be returned by the
+`parse_item` callback. And in our the Crawly worker processes
+(processes responsible for downloading requests) did not have work
+to do. And in the cases like that, they will slow down progressively,
+until the switch off (which happened because the Spider was not
+extracting items fast enough).
+
+And if you're wondering how to extract the data from the response,
+please hold on. We're going to cover it in the next section.
+
+## Extracting data
+
+The best way to learn how to extract data with Crawly is trying the
+selectors in Crawly shell.
+
+1. Start the Elixir shell using `iex -S mix` command
+2. Now you can fetch a given HTTP response using the following
+   command:
+   `{:ok, response} = Crawly.fetch("https://www.homebase.co.uk/our-range/tools")`
+
+You will see something like:
+
+```
+{:ok,
+ %HTTPoison.Response{
+   body: "[response body here...]"
+   headers: [
+     {"Date", "Fri, 24 May 2019 02:37:26 GMT"},
+     {"Content-Type", "text/html; charset=utf-8"},
+     {"Transfer-Encoding", "chunked"},
+     {"Connection", "keep-alive"},
+     {"Cache-Control", "no-cache, no-store"},
+     {"Pragma", "no-cache"},
+     {"Expires", "-1"},
+     {"Vary", "Accept-Encoding"},
+     {"Set-Cookie", "Bunnings.Device=default; path=/"},
+     {"Set-Cookie",
+      "ASP.NET_SessionId=bcb2deqlapednir0lysulo1h; path=/; HttpOnly"},
+     {"Set-Cookie", "Bunnings.Device=default; path=/"},
+     {"Set-Cookie",
+      "ASP.NET_SessionId=bcb2deqlapednir0lysulo1h; path=/; HttpOnly"},
+     {"Set-Cookie", "Bunnings.UserType=RetailUser; path=/"},
+     ....,
+     {"Set-Cookie",
+      "__AntiXsrfToken=fd198cd78d1b4826ba00c24c3af1ec56; path=/; HttpOnly"},
+     {"Server", "cloudflare"},
+     {"CF-RAY", "4dbbe33fae7e8b20-KBP"}
+   ],
+   request: %HTTPoison.Request{
+     body: "",
+     headers: [],
+     method: :get,
+     options: [],
+     params: %{},
+     url: "https://www.homebase.co.uk/our-range/tools"
+   },
+   request_url: "https://www.homebase.co.uk/our-range/tools",
+   status_code: 200
+ }}
+```
+
+Using the shell, you can try selecting elements using Floki with the
+response. Lets say that we want to extract all product categories links from the
+page above:
+
+```
+response.body |> Floki.find("div.product-list-footer a") |>
+Floki.attribute("href")
+
+"/our-range/tools/power-tools/drills", "/our-range/tools/power-tools/saws",
+ "/our-range/tools/power-tools/sanders",
+ "/our-range/tools/power-tools/electric-screwdrivers",
+ "/our-range/tools/power-tools/tools-accessories",
+ "/our-range/tools/power-tools/routers-and-planers",
+ "/our-range/tools/power-tools/multi-tools",
+ "/our-range/tools/power-tools/impact-drivers-and-wrenches",
+ "/our-range/tools/power-tools/air-compressors",
+ "/our-range/tools/power-tools/angle-grinders",
+ "/our-range/tools/power-tools/heat-guns",
+ "/our-range/tools/power-tools/heavy-duty-construction-tools",
+ "/our-range/tools/power-tools/welding" ...]
+```
+
+The result of running the command above is a list of elements which
+contain href attribute of links selected with
+`a.category-block-heading__title` css selector. These URLs will be
+used in order to feed Crawly with requests to follow.
+
+In order to find the proper CSS selectors to use, you might find
+useful opening the target page from the shell in your web browser. You
+can use your browser developer tools to inspect the HTML and come up
+with a selector.
+
+Now let's navigate to one of the Homebase product pages and extract
+data from it.
+
+```
+{:ok, response} =
+Crawly.fetch("https://www.homebase.co.uk/4-tier-heavy-duty-shelving-unit_p375180")
+
+```
+
+Extract the `title` with:
+```
+response.body |> Floki.find(".page-title h1") |> Floki.text()
+"4 Tier Heavy Duty Shelving Unit"
+```
+
+Extract the `SKU` with:
+
+```
+response.body |> Floki.find(".product-header-heading span") |> Floki.text
+"SKU:  375180"
+```
+
+Extract the `price` with:
+```
+response.body |> Floki.find(".price-value [itemprop=priceCurrency]") |> Floki.text
+"£75"
+```
+
+## Extracting data in our spider
+
+Let’s get back to our spider. Until now, it doesn’t extract any data,
+just makes an `empty run`. Let’s integrate the extraction logic above
+into our spider.
+
+```elixir
+defmodule Homebase do
+  @behaviour Crawly.Spider
+
+  @impl Crawly.Spider
+  def base_url(), do: "https://www.homebase.co.uk"
+
+  @impl Crawly.Spider
+  def init() do
+    [
+      start_urls: [
+        "https://www.homebase.co.uk/our-range/tools"
+      ]
+    ]
+  end
+
+  @impl Crawly.Spider
+  def parse_item(response) do
+    # Extract product category URLs
+    product_categories =
+      response.body
+      |> Floki.find("div.product-list-footer a")
+      |> Floki.attribute("href")
+
+    # Extract individual product page URLs
+    product_pages =
+      response.body
+      |> Floki.find("a.product-tile  ")
+      |> Floki.attribute("href")
+
+    urls = product_pages ++ product_categories
+
+    # Convert URLs into Requests
+    requests =
+      urls
+      |> Enum.uniq()
+      |> Enum.map(&build_absolute_url/1)
+      |> Enum.map(&Crawly.Utils.request_from_url/1)
+
+    # Create item (for pages where items exists)
+    item = %{
+      title: response.body |> Floki.find(".page-title h1") |> Floki.text(),
+      sku:
+        response.body
+        |> Floki.find(".product-header-heading span")
+        |> Floki.text(),
+      price:
+        response.body
+        |> Floki.find(".price-value [itemprop=priceCurrency]")
+        |> Floki.text()
+    }
+
+    %Crawly.ParsedItem{:items => [item], :requests => requests}
+  end
+
+  defp build_absolute_url(url), do: URI.merge(base_url(), url) |> to_string()
+end
+
+```
+
+If you run this spider, it will output the extracted data with the log:
+```
+17:23:42.536 [debug] Scraped %{price: "£3.99", sku: "SKU:  486386", title: "Bon Safety EN20471 Hi viz Yellow Vest, size XL"}
+17:23:43.432 [debug] Scraped %{price: "£3.99", sku: "SKU:  486384", title: "Bon Safety EN20471 Hi viz Yellow Vest, size L"}
+17:23:42.389 [debug] Scraped %{price: "£5.25", sku: "SKU:  414464", title: "Toughbuilt 24in Wall Organizer"}
+```
+
+Also you will see messages like:
+```
+17:23:42.435 [debug] Dropping request: https://www.homebase.co.uk/bon-safety-rain-de-pro-superlight-weight-rainsuit-xxl_p275608, as it's already processed
+17:23:42.435 [debug] Dropping request: https://www.homebase.co.uk/bon-safety-rain-de-pro-superlight-weight-rainsuit-l_p275605, as it's already processed
+17:23:42.435 [debug] Dropping request: https://www.homebase.co.uk/bon-safety-rain-de-pro-superlight-weight-rainsuit-xl_p275607, as it's already processed
+```
+That's because Crawly filters out requests which it has already
+visited during the current run.
+
+## Where the data is stored afterwords?
+
+You might wonder where is the resulting data is located by default?
+Well the default location of the scraped data is under the /tmp
+folder. This can be controlled by the `base_store_path` variable in
+the Crawly configuration (`:crawly`, `:base_store_path`).
+
+
+## Next steps
+
+This tutorial covered only the basics of Crawly, but there’s a lot of
+other features not mentioned here.
+
+You can continue from the section Basic concepts to know more about
+the basic Crawly features.
diff --git a/mix.exs b/mix.exs
index 3eb8d052..5d53db49 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,10 +1,12 @@
 defmodule Crawly.Mixfile do
   use Mix.Project
 
+  @version "0.7.0-dev"
+
   def project do
     [
       app: :crawly,
-      version: "0.6.0",
+      version: @version,
       name: "Crawly",
       source_url: "https://github.com/oltarasenko/crawly",
       elixir: "~> 1.7",
@@ -13,6 +15,7 @@ defmodule Crawly.Mixfile do
       test_coverage: [tool: ExCoveralls],
       start_permanent: Mix.env() == :prod,
       elixirc_paths: elixirc_paths(Mix.env()),
+      docs: docs(),
       elixirc_options: [warnings_as_errors: true],
       deps: deps()
     ]
@@ -60,4 +63,34 @@ defmodule Crawly.Mixfile do
       {:excoveralls, "~> 0.10", only: :test}
     ]
   end
+
+  defp docs do
+    [
+      source_ref: "v#{@version}",
+      logo: "documentation/assets/logo.png",
+      extra_section: "documentation",
+      main: "quickstart",
+#      assets: "guides/assets",
+      formatters: ["html", "epub"],
+#      groups_for_modules: groups_for_modules(),
+      extras: extras(),
+#      groups_for_extras: groups_for_extras()
+    ]
+  end
+
+  defp extras do
+    [
+      "documentation/quickstart.md",
+      "documentation/introduction.md",
+      "documentation/ethical_aspects.md",
+      "documentation/installation_guide.md",
+      "documentation/tutorial.md",
+      "documentation/basic_concepts.md",
+      "documentation/settings.md",
+      "documentation/http_api.md",
+
+    ]
+  end
+
+
 end