Skip to content

Commit

Permalink
[infer/python][2/X] Introducing "local" vs "global" name tracking
Browse files Browse the repository at this point in the history
Summary:
Python has a two level namespace:
- anything in the "current" scope is managed using `LOAD/STORE_NAME`.
  If the current scope is the toplevel one, `_NAME` is used instead of
  `_GLOBAL`.
- anything in the "global" (toplevel of a module) scope is managed using
  `LOAD/STORE_GLOBAL` if not in the toplevel scope

This diff introduces the correct tracking of symbols depending on their
scope.

Reviewed By: davidpichardie

Differential Revision: D46646267

fbshipit-source-id: 10ecbf4ef0442aafbdc8b76d5ade41b4e361b353
  • Loading branch information
Vincent Siles authored and facebook-github-bot committed Jun 16, 2023
1 parent 954e807 commit ca90751
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 60 deletions.
49 changes: 34 additions & 15 deletions infer/src/python/PyEnv.ml
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,10 @@ module SMap = Caml.Map.Make (String)

type info = {is_code: bool; is_class: bool; typ: T.Typ.t}

type global_name = {value: string; loc: T.Location.t}
(* TODO: check how much structure we need once we start investigating nested classes/functions *)
type qualified_name = {value: string; loc: T.Location.t}

type global_info = {global_name: global_name; is_builtin: bool; info: info}
type symbol_info = {qualified_name: qualified_name; is_builtin: bool; info: info}

type label_info =
{ label_name: string
Expand All @@ -254,7 +255,8 @@ type label_info =
and shared =
{ idents: T.Ident.Set.t
; idents_info: info T.Ident.Map.t
; globals: global_info SMap.t
; globals: symbol_info SMap.t
; names: symbol_info SMap.t
; builtins: BuiltinSet.t
; classes: string list
; toplevel_signatures: Signature.t SMap.t
Expand Down Expand Up @@ -350,9 +352,9 @@ let initial_globals =
List.fold ~init:SMap.empty
~f:(fun acc (b, _) ->
let value = Builtin.python_to_string b in
let global_name = {value; loc= T.Location.Unknown} in
let qualified_name = {value; loc= T.Location.Unknown} in
let info = {is_code= true; is_class= false; typ= PyCommon.pyCode} in
let global_info = {global_name; is_builtin= true; info} in
let global_info = {qualified_name; is_builtin= true; info} in
SMap.add value global_info acc )
BuiltinSet.python_builtins

Expand All @@ -361,6 +363,7 @@ let empty =
{ idents= T.Ident.Set.empty
; idents_info= T.Ident.Map.empty
; globals= initial_globals
; names= SMap.empty
; builtins= BuiltinSet.empty
; classes= []
; toplevel_signatures= SMap.empty
Expand All @@ -375,7 +378,9 @@ let empty = {shared= empty; node= empty_node}
let stack {node= {stack}} = stack

let enter_proc ~is_toplevel {shared} =
let shared = {shared with is_toplevel; idents= T.Ident.Set.empty; next_label= 0} in
let shared =
{shared with is_toplevel; idents= T.Ident.Set.empty; next_label= 0; names= SMap.empty}
in
{shared; node= empty_node}


Expand Down Expand Up @@ -444,12 +449,26 @@ let label_of_offset {shared} offset =

let instructions {node= {instructions}} = List.rev instructions

let register_global ({shared} as env) name global_name info =
let register_global ({globals} as env) name global_name info =
let global_info = {global_name; is_builtin= false; info} in
{env with globals= SMap.add name global_info globals}
let register_symbol ({shared} as env) ~global name qualified_name info =
let register map name qualified_name info =
let symbol_info = {qualified_name; is_builtin= false; info} in
SMap.add name symbol_info map
in
{env with shared= register_global shared name global_name info}
let {globals; names} = shared in
if global then
let globals = register globals name qualified_name info in
let shared = {shared with globals} in
{env with shared}
else
let names = register names name qualified_name info in
let shared = {shared with names} in
{env with shared}


let lookup_symbol {shared} ~global name =
let lookup map name = SMap.find_opt name map in
let {globals; names} = shared in
if global then lookup globals name else lookup names name


let globals {shared= {globals}} = globals
Expand Down Expand Up @@ -498,14 +517,14 @@ let register_call env fname =
env


let register_toplevel ({shared} as env) name loc annotations =
let register_function ({shared} as env) name loc annotations =
let value = PyCommon.global name in
let global_name = {value; loc} in
let qualified_name = {value; loc} in
let info = {is_code= true; is_class= false; typ= PyCommon.pyObject} in
let global_info = {global_name; is_builtin= false; info} in
let symbol_info = {qualified_name; is_builtin= false; info} in
let {toplevel_signatures; globals} = shared in
let toplevel_signatures = SMap.add value annotations toplevel_signatures in
let globals = SMap.add name global_info globals in
let globals = SMap.add name symbol_info globals in
let shared = {shared with toplevel_signatures; globals} in
{env with shared}

Expand Down
27 changes: 14 additions & 13 deletions infer/src/python/PyEnv.mli
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ type info =

module SMap : Caml.Map.S with type key = string

(** Fully expanded name of a global symbol *)
type global_name = {value: string; loc: T.Location.t}
(** Fully expanded name of a symbol *)
type qualified_name = {value: string; loc: T.Location.t}

(** Global information for global symbols *)
type global_info = {global_name: global_name; is_builtin: bool; info: info}
(** Information about symbols to correct do the translation to Textual's qualified names *)
type symbol_info = {qualified_name: qualified_name; is_builtin: bool; info: info}

(** Global environment used during bytecode processing. Stores common global information like the
toplevel symbols processed so far, or more local ones like the set of labels or variable ids
Expand Down Expand Up @@ -107,7 +107,7 @@ val loc : t -> T.Location.t
val stack : t -> DataStack.t
(** Returns the [DataStack.t] for the current declaration *)

val globals : t -> global_info SMap.t
val globals : t -> symbol_info SMap.t
(** Return the [globals] map *)

val builtins : t -> BuiltinSet.t
Expand Down Expand Up @@ -160,11 +160,12 @@ val register_label : offset:int -> Label.info -> t -> t
val process_label : offset:int -> Label.info -> t -> t
(** Mark the label [info] at [offset] as processed *)

val register_global : t -> string -> global_name -> info -> t
(** Register a global name (function, variable, ...). Since Python allows "toplevel" code, they are
encoded within a specially named function that behaves as a toplevel scope, and global
identifiers are scope accordingly. That way, there is no mixing them with locals with the same
name. *)
val register_symbol : t -> global:bool -> string -> qualified_name -> info -> t
(** Register a name (function, variable, ...). It might be a [global] symbol at the module level or
in a local object. *)

val lookup_symbol : t -> global:bool -> string -> symbol_info option
(** Lookup information about a global/local symbol previously registered via [register_symbol] *)

val register_call : t -> string -> t
(** Register a function call. It enables us to deal correctly with builtin declaration. *)
Expand All @@ -173,9 +174,9 @@ val mk_builtin_call : t -> Builtin.textual -> T.Exp.t list -> t * T.Ident.t * T.
(** Wrapper to compute the Textual version of a call to a "textual" builtin * function (a builtin we
introduced for modeling purpose) *)

val register_toplevel : t -> string -> T.Location.t -> PyCommon.annotated_name list -> t
(** Register a top level function declaration. We keep track of them since they might shadow Python
builtins *)
val register_function : t -> string -> T.Location.t -> PyCommon.annotated_name list -> t
(** Register a function declaration. We keep track of them since they might shadow Python builtins
or previous definitions *)

val register_method :
t -> enclosing_class:string -> method_name:string -> PyCommon.annotated_name list -> t
Expand Down
62 changes: 30 additions & 32 deletions infer/src/python/PyTrans.ml
Original file line number Diff line number Diff line change
Expand Up @@ -82,29 +82,28 @@ let load_cell env {FFI.Code.co_consts; co_names; co_varnames} cell =
let info = info ty in
(env, Ok exp, info)
| Name ndx ->
if not (Env.is_toplevel env) then
L.die InternalError "TODO: load_cell inside a class declaration"
let global = Env.is_toplevel env in
let name = co_names.(ndx) in
let ({Env.typ; is_code} as info), qualified_name =
Env.lookup_symbol ~global env name
|> Option.value_map
~default:(default, PyCommon.global name)
~f:(fun {Env.qualified_name= {value}; info} -> (info, value))
in
let var_name = var_name ~loc qualified_name in
(* If we are trying to load some code, use the dedicated builtin *)
if is_code then
let env, exp, typ = code_to_exp ~fun_or_class:true env qualified_name in
let info = {info with Env.typ} in
(env, Ok exp, info)
else
let name = co_names.(ndx) in
let gname = PyCommon.global name in
let var_name = var_name ~loc gname in
let ({Env.typ; is_code} as info) =
Env.SMap.find_opt name (Env.globals env)
|> Option.value_map ~default ~f:(fun {Env.info} -> info)
in
(* If we are trying to load some code, use the dedicated builtin *)
if is_code then
let env, exp, typ = code_to_exp ~fun_or_class:true env gname in
let info = {info with Env.typ} in
(env, Ok exp, info)
else
let exp = T.Exp.Lvar var_name in
let loc = Env.loc env in
let env, id = Env.mk_fresh_ident env info in
let instr = T.Instr.Load {id; exp; typ; loc} in
let env = Env.push_instr env instr in
(* TODO: try to trace the type of names, not only global ones ? *)
(env, Ok (T.Exp.Var id), info)
let exp = T.Exp.Lvar var_name in
let loc = Env.loc env in
let env, id = Env.mk_fresh_ident env info in
let instr = T.Instr.Load {id; exp; typ; loc} in
let env = Env.push_instr env instr in
(* TODO: try to trace the type of names, not only global ones ? *)
(env, Ok (T.Exp.Var id), info)
| VarName ndx ->
let name = co_varnames.(ndx) in
let exp = T.Exp.Lvar (var_name ~loc name) in
Expand Down Expand Up @@ -321,23 +320,22 @@ module STORE = struct
Works as [STORE_NAME], but stores the name as a global.
Since there is a special namespace for global varialbes, this is in fact the same as
Since there is a special namespace for global variables, this is in fact the same as
[STORE_NAME], but only called from within a function/method. *)

let run kind env ({FFI.Code.co_names; co_varnames} as code) {FFI.Instruction.opname; arg} =
let name, is_global, is_attr =
let name, global, is_attr =
match kind with
| FAST ->
(co_varnames.(arg), false, false)
| NAME ->
if Env.is_toplevel env then (co_names.(arg), true, false)
else L.die InternalError "[%s] TODO in class declartaion" opname
(co_names.(arg), Env.is_toplevel env, false)
| ATTR ->
(co_names.(arg), false, true)
| GLOBAL ->
(co_names.(arg), true, false)
in
let gname = if is_global then PyCommon.global name else name in
let gname = if global then PyCommon.global name else name in
Debug.p "[%s] name = %s\n" opname gname ;
let loc = Env.loc env in
let var_name = var_name ~loc gname in
Expand All @@ -347,7 +345,7 @@ module STORE = struct
match exp with
| Ok exp ->
let {Env.typ; is_code; is_class} = info in
let env = if is_global then Env.register_global env name global_name info else env in
let env = Env.register_symbol ~global env name global_name info in
if is_attr then
let env, cell = pop_datastack opname env in
let env, value, {Env.typ} = load_cell env code cell in
Expand All @@ -368,7 +366,7 @@ module STORE = struct
Debug.p " top-level class declaration initialized\n" ;
(env, None) )
else if is_code then
if is_global then (
if global then (
Debug.p " top-level function defined\n" ;
(env, None) )
else L.die InternalError "[%s] no support for closure at the moment: %s" opname name
Expand Down Expand Up @@ -425,7 +423,7 @@ module FUNCTION = struct
let known_globals = Env.globals env in
let proc =
match Env.SMap.find_opt fname known_globals with
| Some {Env.global_name= {value; loc}; is_builtin} ->
| Some {Env.qualified_name= {value; loc}; is_builtin} ->
if is_builtin then PyCommon.builtin_name value
else qualified_procname @@ proc_name ~loc value
| None ->
Expand Down Expand Up @@ -584,7 +582,7 @@ module FUNCTION = struct
(FFI.Constant.show const) )
in
let loc = Env.loc env in
let env = Env.register_toplevel env qualified_name loc annotations in
let env = Env.register_function env qualified_name loc annotations in
let env = Env.push env (DataStack.Code {fun_or_class= true; qualified_name; code}) in
(env, None)
end
Expand Down Expand Up @@ -1310,7 +1308,7 @@ let to_module ~sourcefile ({FFI.Code.co_consts; co_name; instructions} as code)
(* Translate globals to Textual *)
let globals =
Env.SMap.fold
(fun _name {Env.global_name= {value; loc}; info= {is_code}} acc ->
(fun _name {Env.qualified_name= {value; loc}; info= {is_code}} acc ->
let varname = var_name ~loc value in
if is_code then
(* don't generate a global variable name, it will be declared as a toplevel decl *)
Expand Down

0 comments on commit ca90751

Please sign in to comment.