/
service_admin_extra_gdpr.erl
145 lines (121 loc) · 5.58 KB
/
service_admin_extra_gdpr.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
-module(service_admin_extra_gdpr).
-include("ejabberd_commands.hrl").
-export([commands/0,
retrieve_all/3]).
% Exported for RPC call
-export([retrieve_logs/2]).
-define(CMD_TIMEOUT, 300000).
-spec commands() -> [ejabberd_commands:cmd()].
commands() -> [
#ejabberd_commands{name = retrieve_personal_data, tags = [gdpr],
desc = "Retrieve user's presonal data.",
longdesc = "Retrieves all personal data from MongooseIM for a given user. Example:\n"
" mongooseimctl alice localhost /home/mim/alice.smith.zip ",
module = ?MODULE,
function = retrieve_all,
args = [{username, binary}, {domain, binary}, {path, binary}],
result = {res, rescode}}
].
-spec retrieve_all(jid:user(), jid:server(), Path :: binary()) ->
RetrievedFilesInZipName :: binary() | {error, Reason :: any()}.
retrieve_all(Username, Domain, ResultFilePath) ->
case user_exists(Username, Domain) of
true ->
DataFromModules = get_data_from_modules(Username, Domain),
TmpDir = make_tmp_dir(),
CsvFiles = lists:map(
fun({DataGroup, Schema, Entries}) ->
BinDataGroup = atom_to_binary(DataGroup, utf8),
FileName = <<BinDataGroup/binary, ".csv">>,
to_csv_file(FileName, Schema, Entries, TmpDir),
binary_to_list(FileName)
end,
DataFromModules),
LogFiles = get_all_logs(Username, Domain, TmpDir),
ZipFile = binary_to_list(ResultFilePath),
{ok, ZipFile} = zip:create(ZipFile, CsvFiles ++ LogFiles, [{cwd, TmpDir}]),
remove_tmp_dir(TmpDir),
ok;
false ->
{error, "User does not exist"}
end.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Private funs
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-spec modules_with_personal_data() -> [module()].
modules_with_personal_data() ->
mongoose_lib:find_behaviour_implementations(gdpr).
-spec get_data_from_modules(jid:user(), jid:server()) ->
[{gdpr:data_group(), gdpr:schema(), gdpr:entries()}].
get_data_from_modules(Username, Domain) ->
Modules = modules_with_personal_data(),
lists:flatmap(fun(M) -> try_get_data_from_module(M, Username, Domain) end, Modules).
try_get_data_from_module(Module, Username, Domain) ->
try Module:get_personal_data(Username, Domain) of
[{_, _, []}] -> [];
Val -> Val
catch
_:_ -> []
end.
-spec to_csv_file(CsvFilename :: binary(), gdpr:schema(), gdpr:entities(), file:name()) -> ok.
to_csv_file(Filename, DataSchema, DataRows, TmpDir) ->
FilePath = <<(list_to_binary(TmpDir))/binary, "/", Filename/binary>>,
{ok, File} = file:open(FilePath, [write]),
csv_gen:row(File, DataSchema),
lists:foreach(fun(Row) -> csv_gen:row(File, Row) end, DataRows),
file:close(File).
-spec user_exists(gdpr:username(), gdpr:domain()) -> boolean().
user_exists(Username, Domain) ->
ejabberd_auth:is_user_exists(Username, Domain).
-spec make_tmp_dir() -> file:name().
make_tmp_dir() ->
TmpDirName = lists:flatten(io_lib:format("/tmp/gdpr-~4.36.0b", [rand:uniform(36#zzzz)])),
case file:make_dir(TmpDirName) of
ok -> TmpDirName;
{error, eexist} -> make_tmp_dir();
{error, Error} -> {error, Error}
end.
-spec remove_tmp_dir(file:name()) -> ok.
remove_tmp_dir(TmpDir) ->
{ok, FileNames} = file:list_dir(TmpDir),
[file:delete(TmpDir ++ "/" ++ File) || File <- FileNames],
file:del_dir(TmpDir).
-type cmd() :: string() | binary().
-spec run(cmd(), [cmd()], timeout()) -> non_neg_integer() | timeout.
run(Cmd, Args, Timeout) ->
Port = erlang:open_port({spawn_executable, Cmd}, [exit_status, {args, Args}]),
receive
{Port, {exit_status, ExitStatus}} -> ExitStatus
after Timeout ->
timeout
end.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Logs retrieval
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-spec retrieve_logs(gdpr:username(), gdpr:domain()) -> {ok, ZippedLogs :: binary()}.
retrieve_logs(Username, Domain) ->
TmpDir = make_tmp_dir(),
LogFile = get_logs(Username, Domain, TmpDir),
{ok, {_, ZippedLogs}} = zip:create("archive.zip", [LogFile], [memory, {cwd, TmpDir}]),
remove_tmp_dir(TmpDir),
{ok, ZippedLogs}.
-spec get_all_logs(gdpr:username(), gdpr:domain(), file:name()) -> [file:name()].
get_all_logs(Username, Domain, TmpDir) ->
OtherNodes = ejabberd_config:other_cluster_nodes(),
LogFile = get_logs(Username, Domain, TmpDir),
LogFilesFromOtherNodes = [get_logs_from_node(Node, Username, Domain, TmpDir) || Node <- OtherNodes],
[LogFile | LogFilesFromOtherNodes].
-spec get_logs(gdpr:username(), gdpr:domain(), file:name()) -> file:name().
get_logs(Username, Domain, TmpDir) ->
FileList = [filename:absname(F) || F <- ejabberd_loglevel:get_log_files()],
Cmd = code:priv_dir(mongooseim) ++ "/parse_logs.sh",
FileName = "logs-" ++ atom_to_list(node()) ++ ".txt",
FilePath = TmpDir ++ "/" ++ FileName,
Args = [FilePath, Username, Domain | FileList],
0 = run(Cmd, Args, ?CMD_TIMEOUT),
FileName.
-spec get_logs_from_node(node(), gdpr:username(), gdpr:domain(), file:name()) -> file:name().
get_logs_from_node(Node, Username, Domain, TmpDir) ->
{ok, ZippedData} = rpc:call(Node, ?MODULE, retrieve_logs, [Username, Domain]),
{ok, [File]} = zip:unzip(ZippedData, [{cwd, TmpDir}]),
filename:basename(File).