Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 312 lines (240 sloc) 7.168 kb
8634b74 initial import into fresh git repo.
pad authored
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2010 Facebook
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * version 2.1 as published by the Free Software Foundation, with the
8 * special exception on linking described in file license.txt.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
13 * license.txt for more details.
14 *)
d49b917 @aryx * commons/file_type.ml: misc
aryx authored
15 open Common
8634b74 initial import into fresh git repo.
pad authored
16
17 (*****************************************************************************)
18 (* Prelude *)
19 (*****************************************************************************)
20
21 (*****************************************************************************)
22 (* Types *)
23 (*****************************************************************************)
24
25 (* see also dircolors.el and LFS *)
26 type file_type =
27 | PL of pl_type
28 | Obj of string (* .o, .a, .aux, .bak, etc *)
29 | Binary of string
30 | Text of string (* tex, txt, readme, noweb, org, etc *)
31 | Doc of string (* ps, pdf *)
32 | Media of media_type
33 | Archive of string (* tgz, rpm, etc *)
34 | Other of string
35
36 and pl_type =
37 | ML of string (* mli, ml, mly, mll *)
38 | Haskell of string
fe5281a lang_lisp: can now lex my .emacs and backquote.lisp
pad authored
39 | Lisp of lisp_type
871ec54 file_type: handling prolog files
pad authored
40 | Prolog of string
8634b74 initial import into fresh git repo.
pad authored
41 | Makefile
42 | Script of string (* sh, csh, awk, sed, etc *)
b3dbd2a @aryx lang_c++: misc
aryx authored
43 | C of string | Cplusplus of string | Java | Csharp
8634b74 initial import into fresh git repo.
pad authored
44 | Perl | Python | Ruby
45 | Erlang
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
46 | Beta
cb1a332 @aryx file_type.ml: add Pascal
aryx authored
47 | Pascal
c9879d4 @aryx * commons/file_type.ml: Opa
aryx authored
48 | Haxe | Opa
8634b74 initial import into fresh git repo.
pad authored
49 | Web of webpl_type
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
50 | R of string
8634b74 initial import into fresh git repo.
pad authored
51 | Asm
52 | Thrift
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
53 | MiscPL of string
8634b74 initial import into fresh git repo.
pad authored
54
fe5281a lang_lisp: can now lex my .emacs and backquote.lisp
pad authored
55 and lisp_type = CommonLisp | Elisp | Scheme
56
d49b917 @aryx * commons/file_type.ml: misc
aryx authored
57 and webpl_type =
58 | Php of string (* php or phpt or script *)
59 | Js
60 | Css
61 | Html | Xml | Json
62 | Sql
8634b74 initial import into fresh git repo.
pad authored
63
64 and media_type =
65 | Sound of string
66 | Picture of string
67 | Video of string
68
69 (*****************************************************************************)
70 (* Main entry point *)
71 (*****************************************************************************)
72
73 let file_type_of_file2 file =
74 let (d,b,e) = Common.dbe_of_filename_noext_ok file in
75 match e with
76 (* expensive ? *)
77 (* todo:
78 | _ when b =~ ".md5sum_.*" -> Obj ("syncweb")
79 | _ when b =~ "Makefile.*" -> PL Makefile
80 *)
81
82 | "ml" | "mli"
d49b917 @aryx * commons/file_type.ml: misc
aryx authored
83 | "mly" | "mll"
8634b74 initial import into fresh git repo.
pad authored
84 -> PL (ML e)
d49b917 @aryx * commons/file_type.ml: misc
aryx authored
85 | "mlb" (* mlburg *)
86 | "mlp" (* used in some source *)
87 | "eliom" (* ocsigen, obviously *)
aec0760 @aryx file_type.ml: adding stuff for ocsigen
aryx authored
88 -> PL (ML e)
8634b74 initial import into fresh git repo.
pad authored
89
19f0db1 misc
pad authored
90 | "sml" -> PL (ML e)
5c1d525 misc
pad authored
91 (* fsharp *)
1c64f2c file_type: handling fsharp extensions
pad authored
92 | "fsi" | "fsx" | "fs" -> PL (ML e)
19f0db1 misc
pad authored
93 (* linear ML *)
94 | "lml" -> PL (ML e)
8634b74 initial import into fresh git repo.
pad authored
95
96 | "hs" | "lhs" -> PL (Haskell e)
19f0db1 misc
pad authored
97
b324f85 @aryx added support for erlang files in codemap
aryx authored
98 | "erl" | "hrl" -> PL Erlang
8634b74 initial import into fresh git repo.
pad authored
99
e125a09 file_type: haxe files
pad authored
100 | "hx" | "hxp" | "hxml" -> PL Haxe
c9879d4 @aryx * commons/file_type.ml: Opa
aryx authored
101 | "opa" -> PL Opa
e125a09 file_type: haxe files
pad authored
102
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
103 | "bet" -> PL Beta
104
8634b74 initial import into fresh git repo.
pad authored
105 (* todo detect false C file, look for "Mode: Objective-C++" string in file ?
106 * can also be a c++, use Parser_cplusplus.is_problably_cplusplus_file
107 *)
b3dbd2a @aryx lang_c++: misc
aryx authored
108 | "c" -> PL (C e)
109 | "h" -> PL (C e)
8634b74 initial import into fresh git repo.
pad authored
110 (* todo? have a PL of xxx_kind * pl_kind ? *)
b3dbd2a @aryx lang_c++: misc
aryx authored
111 | "y" | "l" -> PL (C e)
8634b74 initial import into fresh git repo.
pad authored
112
b3dbd2a @aryx lang_c++: misc
aryx authored
113 | "hpp" -> PL (Cplusplus e) | "hxx" -> PL (Cplusplus e)
114 | "hh" -> PL (Cplusplus e)
115 | "cpp" -> PL (Cplusplus e) | "C" -> PL (Cplusplus e)
116 | "cc" -> PL (Cplusplus e) | "cxx" -> PL (Cplusplus e)
e66b936 @aryx lang_c++: better visualizer
aryx authored
117 (* used in libstdc++ *)
118 | "tcc" -> PL (Cplusplus e)
8634b74 initial import into fresh git repo.
pad authored
119
120 | "java" -> PL Java
121 | "cs" -> PL Csharp
122
cb1a332 @aryx file_type.ml: add Pascal
aryx authored
123 | "p" -> PL Pascal
124
8634b74 initial import into fresh git repo.
pad authored
125 | "thrift" -> PL Thrift
126
3c9fdce misc
pad authored
127 | "scm" | "rkt" | "ss" -> PL (Lisp Scheme)
fe5281a lang_lisp: can now lex my .emacs and backquote.lisp
pad authored
128 | "lisp" -> PL (Lisp CommonLisp)
129 | "el" -> PL (Lisp Elisp)
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
130
889db0f @aryx * main_codemap.ml: filter pfff now show PHP and Prolog
aryx authored
131 (* Perl or Prolog ... *)
132 | "pl" -> PL (Prolog "pl")
133 | "perl" -> PL Perl
8634b74 initial import into fresh git repo.
pad authored
134 | "py" -> PL Python
135 | "rb" -> PL Ruby
871ec54 file_type: handling prolog files
pad authored
136
137 | "clp" -> PL (Prolog e)
8634b74 initial import into fresh git repo.
pad authored
138
139 | "s" | "S" | "asm" -> PL Asm
140
19f0db1 misc
pad authored
141 | "c--" -> PL (MiscPL e)
24e7efa file_type: oz support
pad authored
142 | "oz" -> PL (MiscPL e)
19f0db1 misc
pad authored
143
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
144 | "R" | "Rd" -> PL (R e)
145
19f0db1 misc
pad authored
146 | "scala" -> PL (MiscPL e)
147
8634b74 initial import into fresh git repo.
pad authored
148 | "sh" -> PL (Script e)
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
149 | "m4" -> PL (MiscPL e)
d072598 file_type and co: more patterns
pad authored
150 | "conf" -> PL (MiscPL e)
151
19f0db1 misc
pad authored
152 (* Andrew Appel's Tiger toy language *)
153 | "tig" -> PL (MiscPL e)
3ae41d1 misc
pad authored
154
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
155 (* merd *)
156 | "me" -> PL (MiscPL "me")
157
c8427b7 file_type: adding vim files and other stuff
pad authored
158 | "vim" -> PL (MiscPL "vim")
ff8417c pfff_visual: more highlights
pad authored
159 | "nanorc" -> PL (MiscPL "nanorc")
c8427b7 file_type: adding vim files and other stuff
pad authored
160
0e58a2b misc
pad authored
161 (* from hex to bcc *)
162 | "he" -> PL (MiscPL "he")
163 | "bc" -> PL (MiscPL "bc")
164
8634b74 initial import into fresh git repo.
pad authored
165 | "php" | "phpt" -> PL (Web (Php e))
166 | "css" -> PL (Web Css)
167 | "js" -> PL (Web Js)
168 | "html" | "htm" -> PL (Web Html)
169 | "xml" -> PL (Web Xml)
170 | "json" -> PL (Web Json)
171 | "sql" -> PL (Web Sql)
172 | "sqlite" -> PL (Web Sql)
173
d819bc4 file_type.ml: adding beta, racket, m4, etc
pad authored
174 (* apple stuff ? *)
175 | "xib" -> PL (Web Xml)
176
8634b74 initial import into fresh git repo.
pad authored
177 (* facebook: sqlshim files *)
178 | "sql3" -> PL (Web Sql)
d7d8675 adding doc about git archeology, fbobj handling, and not failing when…
pad authored
179 | "fbobj" -> PL (MiscPL "fbobj")
8634b74 initial import into fresh git repo.
pad authored
180
181 | "png" | "jpg" | "JPG" | "gif" | "tiff" -> Media (Picture e)
182 | "xcf" | "xpm" -> Media (Picture e)
183 | "icns" | "icon" | "ico" -> Media (Picture e)
50943b8 pfff_visual: misc
pad authored
184 | "ppm" -> Media (Picture e)
8ec6744 misc
pad authored
185 | "tga" -> Media (Picture e)
8634b74 initial import into fresh git repo.
pad authored
186 | "ttf" | "font" -> Media (Picture e)
187
50943b8 pfff_visual: misc
pad authored
188 | "wav" -> Media (Sound e)
189
8634b74 initial import into fresh git repo.
pad authored
190 | "swf" -> Media (Picture e)
191
192
193 | "ps" | "pdf" -> Doc e
194 | "ppt" -> Doc e
195
7c916a1 visual: handling noweb files
pad authored
196 | "tex" | "texi" -> Text e
8634b74 initial import into fresh git repo.
pad authored
197 | "txt" | "doc" -> Text e
c51fee4 file_type: recognizing .web knuth files
pad authored
198 | "nw" | "web" -> Text e
7c916a1 visual: handling noweb files
pad authored
199
b88e33d adding wiki markdown in file_type.ml, so can use pfff_visual to browse
pad authored
200 | "org"
aec0760 @aryx file_type.ml: adding stuff for ocsigen
aryx authored
201 | "md" | "rest" | "textile" | "wiki"
b88e33d adding wiki markdown in file_type.ml, so can use pfff_visual to browse
pad authored
202 -> Text e
203
8634b74 initial import into fresh git repo.
pad authored
204 | "rtf" -> Text e
205
206 | "cmi" | "cmo" | "cmx" | "cma" | "cmxa"
207 | "annot"
208 | "o" | "a"
209 | "pyc"
210 | "log"
211 | "toc" | "brf"
212 | "out" | "output"
5dd0bab fix ocaml parser bug
pad authored
213 | "hi"
8634b74 initial import into fresh git repo.
pad authored
214 -> Obj e
c8427b7 file_type: adding vim files and other stuff
pad authored
215 (* pad: I use it to store marshalled data *)
216 | "db"
217 -> Obj e
8634b74 initial import into fresh git repo.
pad authored
218
219 | "msi"
220 -> Obj e
221
222 | "po" | "pot"
223 | "gmo"
224 -> Obj e
225
226 (* facebook fbcode stuff *)
227 | "apcarc" | "serialized" | "wsdl" | "dat" | "train"
228 ->
229 Obj e
230
231
232 (* pad specific, cached git blame info *)
233 | "git_annot" ->
234 Obj e
235
236 | "byte" | "top" -> Binary e
237
238 | "tar" -> Archive e
239 | "tgz" -> Archive e
240 | "jar" -> Archive e
241
242 | "bz2" -> Archive e
243 | "gz" -> Archive e
244 | "rar" -> Archive e
245 | "zip" -> Archive e
246
247
248 | "exe" -> Binary e
249 | "mk" -> PL Makefile
250
251 | _ when Common.is_executable file -> Binary e
252
d072598 file_type and co: more patterns
pad authored
253 | _ when b = "Makefile" || b = "mkfile" || b = "Imakefile" -> PL Makefile
4eba308 make top works again. Also add file_type for readme and .emacs
pad authored
254 | _ when b = "README" -> Text "txt"
8634b74 initial import into fresh git repo.
pad authored
255
256 | _ when b = "TAGS" -> Binary e
257 | _ when b = "TARGETS" -> PL Makefile
258 | _ when b = ".depend" -> Obj "depend"
4eba308 make top works again. Also add file_type for readme and .emacs
pad authored
259 | _ when b = ".emacs" -> PL (Lisp (Elisp))
8634b74 initial import into fresh git repo.
pad authored
260
261 | _ when Common.filesize file > 300_000 ->
262 Obj e
263 | _ -> Other e
264
265 let file_type_of_file a =
266 Common.profile_code "file_type_of_file" (fun () -> file_type_of_file2 a)
267
268
269
270 (*****************************************************************************)
271 (* Misc *)
272 (*****************************************************************************)
273
274 let is_textual_file file =
275 match file_type_of_file file with
276 (* if this contains weird code then pfff_visual crash *)
277 | PL (Web Sql) -> false
278
279 | PL _
280 | Text _ -> true
281 | _ -> false
282
283 let webpl_type_of_file file =
284 match file_type_of_file file with
285 | PL (Web x) -> Some x
286 | _ -> None
287
288
d49b917 @aryx * commons/file_type.ml: misc
aryx authored
289 (*
8634b74 initial import into fresh git repo.
pad authored
290 let detect_pl_of_file file =
291 raise Todo
292
293 let string_of_pl x =
294 raise Todo
295 | C -> "c"
296 | Cplusplus -> "c++"
297 | Java -> "java"
298
299 | Web _ -> raise Todo
300 *)
301
150f469 codemap: added a -ocaml_mli_filter, useful when work on c-- source
pad authored
302 let is_syncweb_obj_file file =
303 file =~ ".*md5sum_"
eda1ead @aryx database_code: make the json vs marshall load/save automatic based on…
aryx authored
304
305 let is_json_filename filename =
306 filename =~ ".*\\.json$"
307 (*
308 match File_type.file_type_of_file filename with
309 | File_type.PL (File_type.Web (File_type.Json)) -> true
310 | _ -> false
311 *)
Something went wrong with that request. Please try again.