Skip to content
2 changes: 2 additions & 0 deletions java/change-notes/2021-03-02-guava-io.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lgtm,codescanning
* Increased coverage of the Guava framework by including classes in the `com.google.common.io` package.
3 changes: 3 additions & 0 deletions java/ql/src/experimental/Security/CWE/CWE-036/OpenStream.ql
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.dataflow.FlowSources
import semmle.code.java.dataflow.ExternalFlow
import DataFlow::PathGraph

class URLConstructor extends ClassInstanceExpr {
Expand Down Expand Up @@ -37,6 +38,8 @@ class RemoteURLToOpenStreamFlowConfig extends TaintTracking::Configuration {
exists(MethodAccess m |
sink.asExpr() = m.getQualifier() and m.getMethod() instanceof URLOpenStreamMethod
)
or
sinkNode(sink, "url-open-stream")
}

override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
Expand Down
1 change: 1 addition & 0 deletions java/ql/src/semmle/code/java/dataflow/ExternalFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ private import internal.DataFlowPrivate
private module Frameworks {
private import semmle.code.java.frameworks.ApacheHttp
private import semmle.code.java.frameworks.apache.Lang
private import semmle.code.java.frameworks.guava.Guava
}

private predicate sourceModelCsv(string row) {
Expand Down
1 change: 1 addition & 0 deletions java/ql/src/semmle/code/java/frameworks/guava/Guava.qll
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ import java
import StringUtils
import Collections
import Preconditions
import IO
98 changes: 98 additions & 0 deletions java/ql/src/semmle/code/java/frameworks/guava/IO.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/** Definitions of taint steps in the IO package of the Guava framework */

import java
private import semmle.code.java.dataflow.ExternalFlow

private class GuavaIoCsv extends SummaryModelCsv {
override predicate row(string row) {
row =
[
//"package;type;overrides;name;signature;ext;inputspec;outputspec;kind",
"com.google.common.io;BaseEncoding;true;decode;(CharSequence);;Argument[0];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;decodingStream;(Reader);;Argument[0];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;decodingSource;(CharSource);;Argument[0];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;encode;(byte[]);;Argument[0];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;encode;(byte[],int,int);;Argument[0];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;withSeparator;(String,int);;Argument[0];ReturnValue;taint",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can taint be modelled backwards?
In that case should this also match encodingSink and encodingStream where the return value taints the argument?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a few special cases that model backwards taint steps, however there is no general mechanism for it, as it would likely lead to false positives from "time-travel flow", in cases like the following:

x = new StringWriter();
y = enc.encodingStream(x)
sink(x.toString());
y.write(taint());

If the writes to y are propegated backwards to x, then we get spurious flow to sink.
So, the question of whether to include things like encodingStream (in general, wrappers around an output stream - of which there are several in the standard library that we don't model for simlar reasons) amounts to whether the additional effort to implement a special-case backwards flow step plus the potential for these kinds of FPs is worth the few additional results it could get; which I don't think it is.

"com.google.common.io;BaseEncoding;true;decode;(CharSequence);;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;decodingStream;(Reader);;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;decodingSource;(CharSource);;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;encode;(byte[]);;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;upperCase;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;lowerCase;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;withPadChar;(char);;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;omitPadding;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;BaseEncoding;true;encode;(byte[],int,int);;Argument[-1];ReturnValue;taint",
Comment on lines +17 to +25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be good to add a comment why BaseEncoding can be tainted. I was puzzled at first until I noticed that withSeparator (and only that method) can taint the encoding object.

"com.google.common.io;ByteSource;true;asCharSource;(Charset);;Argument[-1];ReturnValue;taint",
"com.google.common.io;ByteSource;true;concat;;;Argument[0];ReturnValue;taint",
"com.google.common.io;ByteSource;true;copyTo;(OutputStream);;Argument[-1];Argument[0];taint",
"com.google.common.io;ByteSource;true;openStream;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;ByteSource;true;openBufferedStream;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;ByteSource;true;read;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;ByteSource;true;slice;(long,long);;Argument[-1];ReturnValue;taint",
"com.google.common.io;ByteSource;true;wrap;(byte[]);;Argument[0];ReturnValue;taint",
"com.google.common.io;ByteStreams;false;copy;(InputStream,OutputStream);;Argument[0];Argument[1];taint",
"com.google.common.io;ByteStreams;false;copy;(ReadablyByteChannel,WritableByteChannel);;Argument[0];Argument[1];taint",
"com.google.common.io;ByteStreams;false;limit;(InputStream,long);;Argument[0];ReturnValue;taint",
"com.google.common.io;ByteStreams;false;newDataInput;(byte[]);;Argument[0];ReturnValue;taint",
"com.google.common.io;ByteStreams;false;newDataInput;(byte[],int);;Argument[0];ReturnValue;taint",
"com.google.common.io;ByteStreams;false;newDataInput;(ByteArrayInputStream);;Argument[0];ReturnValue;taint",
"com.google.common.io;ByteStreams;false;newDataOutput;(ByteArrayOutputStream);;Argument[0];ReturnValue;taint",
"com.google.common.io;ByteStreams;false;read;(InputStream,byte[],int,int);;Argument[0];Argument[1];taint",
"com.google.common.io;ByteStreams;false;readFully;(InputStream,byte[]);;Argument[0];Argument[1];taint",
"com.google.common.io;ByteStreams;false;readFully;(InputStream,byte[],int,int);;Argument[0];Argument[1];taint",
"com.google.common.io;ByteStreams;false;toByteArray;(InputStream);;Argument[0];ReturnValue;taint",
"com.google.common.io;CharSource;true;asByteSource;(Charset);;Argument[-1];ReturnValue;taint",
"com.google.common.io;CharSource;true;concat;;;Argument[0];ReturnValue;taint",
"com.google.common.io;CharSource;true;copyTo;(Appendable);;Argument[-1];Argument[0];taint",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this also cover copyTo(CharSink)? Someone could write their own CharSink which acts like a StringWriter, though this might be unlikely since it does not fit the intended use case of CharSink very well.

"com.google.common.io;CharSource;true;openStream;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;CharSource;true;openBufferedStream;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;CharSource;true;read;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;CharSource;true;readFirstLine;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;CharSource;true;readLines;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;CharSource;true;lines;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;CharSource;true;wrap;(CharSequence);;Argument[0];ReturnValue;taint",
"com.google.common.io;CharStreams;false;copy;(Readable,Appendable);;Argument[0];Argument[1];taint",
"com.google.common.io;CharStreams;false;readLines;(Readable);;Argument[0];ReturnValue;taint",
"com.google.common.io;CharStreams;false;toString;(Readable);;Argument[0];ReturnValue;taint",
"com.google.common.io;Closer;true;register;;;Argument[0];ReturnValue;value",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should exception objects also be tracked for taint? E.g. when an exception message is created with a user-defined value and is then logged, it could allow log injection. Though in general any flow from user-defined data to an exception constructor is bad, so maybe it is not worth trying to check where the tainted exception flows afterwards?

"com.google.common.io;Files;false;getFileExtension;(String);;Argument[0];ReturnValue;taint",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should all the methods which read or transfer the file content to in-memory structures (e.g. Appendable) also be considered, when the user can chose the file (though this on its own is already bad)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is already a query for user controlled data flowing to a File, so additional taint steps from that point are unlikely to be helpful. Additionally, a user controlling a file path doesn't necassarily mean they can control the file contents.

"com.google.common.io;Files;false;getNameWithoutExtension;(String);;Argument[0];ReturnValue;taint",
"com.google.common.io;Files;false;simplifyPath;(String);;Argument[0];ReturnValue;taint",
"com.google.common.io;MoreFiles;false;getFileExtension;(Path);;Argument[0];ReturnValue;taint",
"com.google.common.io;MoreFiles;false;getNameWithoutExtension;(Path);;Argument[0];ReturnValue;taint",
"com.google.common.io;LineReader;false;LineReader;(Readable);;Argument[0];ReturnValue;taint",
"com.google.common.io;LineReader;true;readLine;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;ByteArrayDataOutput;true;toByteArray;();;Argument[-1];ReturnValue;taint",
"com.google.common.io;ByteArrayDataOutput;true;write;(byte[]);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;write;(byte[],int,int);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;write;(int);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeByte;(int);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeBytes;(String);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeChar;(int);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeChars;(String);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeDouble;(double);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeFloat;(float);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeInt;(int);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeLong;(long);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeShort;(int);;Argument[0];Argument[-1];taint",
"com.google.common.io;ByteArrayDataOutput;true;writeUTF;(String);;Argument[0];Argument[-1];taint"
]
}
}

private class GuavaIoSinkCsv extends SinkModelCsv {
override predicate row(string row) {
row =
[
//"package;type;overrides;name;signature;ext;inputspec;kind",
"com.google.common.io;Resources;false;asByteSource;(URL);;Argument[0];url-open-stream",
"com.google.common.io;Resources;false;asCharSource;(URL,Charset);;Argument[0];url-open-stream",
"com.google.common.io;Resources;false;copy;(URL,OutputStream);;Argument[0];url-open-stream",
"com.google.common.io;Resources;false;asByteSource;(URL);;Argument[0];url-open-stream",
"com.google.common.io;Resources;false;readLines;;;Argument[0];url-open-stream",
"com.google.common.io;Resources;false;toByteArray;(URL);;Argument[0];url-open-stream",
"com.google.common.io;Resources;false;toString;(URL,Charset);;Argument[0];url-open-stream"
]
}
}
64 changes: 32 additions & 32 deletions java/ql/test/library-tests/frameworks/guava/TestCollect.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@ void test1() {
String x = taint();

ImmutableSet<String> xs = ImmutableSet.of(x, "y", "z");
sink(xs.asList());
sink(xs.asList()); // $numTaintFlow=1

ImmutableSet<String> ys = ImmutableSet.of("a", "b", "c");

sink(Sets.filter(Sets.union(xs, ys), y -> true));
sink(Sets.filter(Sets.union(xs, ys), y -> true)); // $numTaintFlow=1

sink(Sets.newHashSet("a", "b", "c", "d", x));
sink(Sets.newHashSet("a", "b", "c", "d", x)); // $numTaintFlow=1
}

void test2() {
sink(ImmutableList.of(taint(), taint(), taint(), taint())); // expect 4 alerts
sink(ImmutableMap.of(taint(), taint(), taint(), taint())); // expect 2 alerts
sink(ImmutableMultimap.of(taint(), taint(), taint(), taint())); // expect 2 alerts
sink(ImmutableTable.of(taint(),taint(), taint())); // expect 1 alert
sink(ImmutableList.of(taint(), taint(), taint(), taint())); // $numTaintFlow=4
sink(ImmutableMap.of(taint(), taint(), taint(), taint())); // $numTaintFlow=2
sink(ImmutableMultimap.of(taint(), taint(), taint(), taint())); // $numTaintFlow=2
sink(ImmutableTable.of(taint(),taint(), taint())); // $numTaintFlow=1
}

void test3() {
Expand All @@ -38,20 +38,20 @@ void test3() {
b.add("a");
sink(b);
b.add(x);
sink(b.build());
sink(b.build()); // $numTaintFlow=1

b = ImmutableList.builder();

b.add("a").add(x);
sink(b.build());
sink(b.build()); // $numTaintFlow=1

sink(ImmutableList.builder().add("a").add(x).build());
sink(ImmutableList.builder().add("a").add(x).build()); // $numTaintFlow=1

ImmutableMap.Builder<String, String> b2 = ImmutableMap.builder();
b2.put(x,"v");
sink(b2);
b2.put("k",x);
sink(b2.build());
sink(b2.build()); // $numTaintFlow=1
}

void test4(Table<String, String, String> t1, Table<String, String, String> t2, Table<String, String, String> t3) {
Expand All @@ -61,62 +61,62 @@ void test4(Table<String, String, String> t1, Table<String, String, String> t2, T
t1.put("r", x, "v");
sink(t1);
t1.put("r", "c", x);
sink(t1);
sink(t1.row("r"));
sink(t1); // $numTaintFlow=1
sink(t1.row("r")); // $numTaintFlow=1

t2.putAll(t1);
for (Table.Cell<String,String,String> c : t2.cellSet()) {
sink(c.getValue());
sink(c.getValue()); // $numTaintFlow=1
}

sink(t1.remove("r", "c"));
sink(t1.remove("r", "c")); // $numTaintFlow=1

t3.row("r").put("c", x);
sink(t3); // Not detected
sink(t3); // $ MISSING:numTaintFlow=1
}

void test4(Multimap<String, String> m1, Multimap<String, String> m2, Multimap<String, String> m3,
Multimap<String, String> m4, Multimap<String, String> m5){
String x = taint();
m1.put("k", x);
sink(m1);
sink(m1.get("k"));
sink(m1); // $numTaintFlow=1
sink(m1.get("k")); // $numTaintFlow=1

m2.putAll("k", ImmutableList.of("a", x, "b"));
sink(m2);
sink(m2); // $numTaintFlow=1

m3.putAll(m1);
sink(m3);
sink(m3); // $numTaintFlow=1

m4.replaceValues("k", m1.replaceValues("k", ImmutableList.of("a")));
for (Map.Entry<String, String> e : m4.entries()) {
sink(e.getValue());
sink(e.getValue()); // $numTaintFlow=1
}

m5.asMap().get("k").add(x);
sink(m5); // Not detected
sink(m5); // $ MISSING:numTaintFlow=1
}

void test5(Comparator<String> comp, SortedSet<String> sorS, SortedMap<String, String> sorM) {
ImmutableSortedSet<String> s = ImmutableSortedSet.of(taint());

sink(s);
sink(ImmutableSortedSet.copyOf(s));
sink(ImmutableSortedSet.copyOf(comp, s));
sink(s); // $numTaintFlow=1
sink(ImmutableSortedSet.copyOf(s)); // $numTaintFlow=1
sink(ImmutableSortedSet.copyOf(comp, s)); // $numTaintFlow=1

sorS.add(taint());
sink(ImmutableSortedSet.copyOfSorted(sorS));
sink(ImmutableSortedSet.copyOfSorted(sorS)); // $numTaintFlow=1

sink(ImmutableList.sortedCopyOf(s));
sink(ImmutableList.sortedCopyOf(comp, s));
sink(ImmutableList.sortedCopyOf(s)); // $numTaintFlow=1
sink(ImmutableList.sortedCopyOf(comp, s)); // $numTaintFlow=1

ImmutableSortedMap<String, String> m = ImmutableSortedMap.of("k", taint());

sink(m);
sink(ImmutableSortedMap.copyOf(m));
sink(ImmutableSortedMap.copyOf(m, comp));
sink(m); // $numTaintFlow=1
sink(ImmutableSortedMap.copyOf(m)); // $numTaintFlow=1
sink(ImmutableSortedMap.copyOf(m, comp)); // $numTaintFlow=1

sorM.put("k", taint());
sink(ImmutableSortedMap.copyOfSorted(sorM));
sink(ImmutableSortedMap.copyOfSorted(sorM)); // $numTaintFlow=1
}
}
Loading