Skip to content

Commit

Permalink
SOLR-16949: Restrict certain file types from being uploaded to or dow…
Browse files Browse the repository at this point in the history
…nloaded from Config Sets
  • Loading branch information
janhoy committed Dec 13, 2023
1 parent 6e9ed20 commit 7e9a2e6
Show file tree
Hide file tree
Showing 20 changed files with 410 additions and 11 deletions.
1 change: 1 addition & 0 deletions lucene/ivy-versions.properties
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ com.fasterxml.jackson.core.version = 2.15.2
/com.healthmarketscience.jackcess/jackcess = 3.0.1
/com.healthmarketscience.jackcess/jackcess-encrypt = 3.0.0
/com.ibm.icu/icu4j = 62.1
/com.j256.simplemagic/simplemagic = 1.17
/com.jayway.jsonpath/json-path = 2.7.0
/com.lmax/disruptor = 3.4.2
/com.pff/java-libpst = 0.9.3
Expand Down
2 changes: 2 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ Other Changes
* SOLR-14853: Security: Converted enableRemoteStreaming and enableStreamBody solrconfig options into system properties and env vars.
Attempts to set them the old way are no-op and log a warning. (David Smiley, janhoy, Ishan Chattopadhyaya)

* SOLR-16949: Restrict certain file types from being uploaded to or downloaded from Config Sets (janhoy, Houston Putman)

================== 8.11.2 ==================

Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
Expand Down
1 change: 1 addition & 0 deletions solr/core/ivy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
<dependency org="com.google.guava" name="failureaccess" rev="${/com.google.guava/failureaccess}" conf="compile"/>
<dependency org="com.google.guava" name="listenablefuture" rev="${/com.google.guava/listenablefuture}" conf="compile"/>
<dependency org="com.google.j2objc" name="j2objc-annotations" rev="${/com.google.j2objc/j2objc-annotations}" conf="compile"/>
<dependency org="com.j256.simplemagic" name="simplemagic" rev="${/com.j256.simplemagic/simplemagic}" conf="compile"/>
<dependency org="org.locationtech.spatial4j" name="spatial4j" rev="${/org.locationtech.spatial4j/spatial4j}" conf="compile"/>
<dependency org="org.antlr" name="antlr4-runtime" rev="${/org.antlr/antlr4-runtime}"/>
<dependency org="org.apache.commons" name="commons-math3" rev="${/org.apache.commons/commons-math3}" conf="compile"/>
Expand Down
23 changes: 20 additions & 3 deletions solr/core/src/java/org/apache/solr/core/backup/BackupManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import org.apache.solr.util.FileTypeMagicUtil;

import com.google.common.base.Preconditions;
import org.apache.lucene.store.IOContext;
Expand Down Expand Up @@ -302,8 +303,16 @@ private void downloadFromZK(SolrZkClient zkClient, String zkPath, URI dir) throw
if (children.size() == 0) {
log.debug("Writing file {}", file);
byte[] data = zkClient.getData(zkPath + "/" + file, null, null, true);
try (OutputStream os = repository.createOutput(repository.resolve(dir, file))) {
os.write(data);
if (!FileTypeMagicUtil.isFileForbiddenInConfigset(data)) {
try (OutputStream os = repository.createOutput(repository.resolve(dir, file))) {
os.write(data);
}
} else {
String mimeType = FileTypeMagicUtil.INSTANCE.guessMimeType(data);
log.warn(
"Not including zookeeper file {} in backup, as it matched the MAGIC signature of a forbidden mime type {}",
file,
mimeType);
}
} else {
URI uri = repository.resolve(dir, file);
Expand All @@ -329,7 +338,15 @@ private void uploadToZk(SolrZkClient zkClient, URI sourceDir, String destZkPath)
try (IndexInput is = repository.openInput(sourceDir, file, IOContext.DEFAULT)) {
byte[] arr = new byte[(int) is.length()]; // probably ok since the config file should be small.
is.readBytes(arr, 0, (int) is.length());
zkClient.makePath(zkNodePath, arr, true);
if (!FileTypeMagicUtil.isFileForbiddenInConfigset(arr)) {
zkClient.makePath(zkNodePath, arr, true);
} else {
String mimeType = FileTypeMagicUtil.INSTANCE.guessMimeType(arr);
log.warn(
"Not restoring configset file {} to zookeeper, as it matched the MAGIC signature of a forbidden mime type {}",
file,
mimeType);
}
} catch (KeeperException | InterruptedException e) {
throw new IOException(SolrZkClient.checkInterrupted(e));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -56,6 +57,7 @@
import org.apache.solr.security.AuthenticationPlugin;
import org.apache.solr.security.AuthorizationContext;
import org.apache.solr.security.PermissionNameProvider;
import org.apache.solr.util.FileTypeMagicUtil;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
Expand Down Expand Up @@ -203,9 +205,17 @@ private void handleConfigUploadRequest(SolrQueryRequest req, SolrQueryResponse r
try {
// Create a node for the configuration in zookeeper
// For creating the baseZnode, the cleanup parameter is only allowed to be true when singleFilePath is not passed.
createBaseZnode(zkClient, overwritesExisting, requestIsTrusted, configPathInZk);
String filePathInZk = configPathInZk + "/" + fixedSingleFilePath;
zkClient.makePath(filePathInZk, IOUtils.toByteArray(inputStream), CreateMode.PERSISTENT, null, !allowOverwrite, true);
byte[] bytes = IOUtils.toByteArray(inputStream);
if (!FileTypeMagicUtil.isFileForbiddenInConfigset(bytes)) {
createBaseZnode(zkClient, overwritesExisting, requestIsTrusted, configPathInZk);
String filePathInZk = configPathInZk + "/" + fixedSingleFilePath;
zkClient.makePath(filePathInZk, bytes, CreateMode.PERSISTENT, null, !allowOverwrite, true);
} else {
String mimeType = FileTypeMagicUtil.INSTANCE.guessMimeType(bytes);
throw new SolrException(ErrorCode.BAD_REQUEST,
String.format(Locale.ROOT, "Not uploading file %s to configset, as it matched the MAGIC signature of a forbidden mime type %s",
fixedSingleFilePath, mimeType));
}
} catch(KeeperException.NodeExistsException nodeExistsException) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"The path " + singleFilePath + " for configSet " + configSetName + " already exists. In order to overwrite, provide overwrite=true or use an HTTP PUT with the V2 API.");
Expand Down Expand Up @@ -244,8 +254,15 @@ private void handleConfigUploadRequest(SolrQueryRequest req, SolrQueryResponse r
if (zipEntry.isDirectory()) {
zkClient.makePath(filePathInZk, false, true);
} else {
createZkNodeIfNotExistsAndSetData(zkClient, filePathInZk,
IOUtils.toByteArray(zis));
byte[] bytes = IOUtils.toByteArray(zis);
if (!FileTypeMagicUtil.isFileForbiddenInConfigset(bytes)) {
createZkNodeIfNotExistsAndSetData(zkClient, filePathInZk, bytes);
} else {
String mimeType = FileTypeMagicUtil.INSTANCE.guessMimeType(bytes);
throw new SolrException(ErrorCode.BAD_REQUEST,
String.format(Locale.ROOT, "Not uploading file %s to configset, as it matched the MAGIC signature of a forbidden mime type %s",
zipEntry.getName(), mimeType));
}
}
}
zis.close();
Expand Down
156 changes: 156 additions & 0 deletions solr/core/src/java/org/apache/solr/util/FileTypeMagicUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.util;

import com.j256.simplemagic.ContentInfo;
import com.j256.simplemagic.ContentInfoUtil;
import com.j256.simplemagic.ContentType;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.solr.common.SolrException;

/** Utility class to guess the mime type of file based on its magic number. */
public class FileTypeMagicUtil implements ContentInfoUtil.ErrorCallBack {
private final ContentInfoUtil util;
private static final Set<String> SKIP_FOLDERS = new HashSet<>(Arrays.asList(".", ".."));

public static FileTypeMagicUtil INSTANCE = new FileTypeMagicUtil();

FileTypeMagicUtil() {
try {
util = new ContentInfoUtil("/magic/executables", this);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error parsing magic file", e);
}
}

/**
* Asserts that an entire configset folder is legal to upload.
*
* @param confPath the path to the folder
* @throws SolrException if an illegal file is found in the folder structure
*/
public static void assertConfigSetFolderLegal(Path confPath) throws IOException {
Files.walkFileTree(confPath, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
// Read first 100 bytes of the file to determine the mime type
try(InputStream fileStream = Files.newInputStream(file)) {
byte[] bytes = new byte[100];
fileStream.read(bytes);
if (FileTypeMagicUtil.isFileForbiddenInConfigset(bytes)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
String.format(Locale.ROOT, "Not uploading file %s to configset, as it matched the MAGIC signature of a forbidden mime type %s",
file, FileTypeMagicUtil.INSTANCE.guessMimeType(bytes)));
}
return FileVisitResult.CONTINUE;
}
}

@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
if (SKIP_FOLDERS.contains(dir.getFileName().toString())) return FileVisitResult.SKIP_SUBTREE;

return FileVisitResult.CONTINUE;
}
});
}

/**
* Guess the mime type of file based on its magic number.
*
* @param stream input stream of the file
* @return string with content-type or "application/octet-stream" if unknown
*/
public String guessMimeType(InputStream stream) {
try {
ContentInfo info = util.findMatch(stream);
if (info == null) {
return ContentType.OTHER.getMimeType();
}
return info.getContentType().getMimeType();
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}

/**
* Guess the mime type of file bytes based on its magic number.
*
* @param bytes the first bytes at start of the file
* @return string with content-type or "application/octet-stream" if unknown
*/
public String guessMimeType(byte[] bytes) {
return guessMimeType(new ByteArrayInputStream(bytes));
}

@Override
public void error(String line, String details, Exception e) {
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR,
String.format(Locale.ROOT, "%s: %s", line, details),
e);
}

/**
* Determine forbidden file type based on magic bytes matching of the file itself. Forbidden types
* are:
*
* <ul>
* <li><code>application/x-java-applet</code>: java class file
* <li><code>application/zip</code>: jar or zip archives
* <li><code>application/x-tar</code>: tar archives
* <li><code>text/x-shellscript</code>: shell or bash script
* </ul>
*
* @param fileStream stream from the file content
* @return true if file is among the forbidden mime-types
*/
public static boolean isFileForbiddenInConfigset(InputStream fileStream) {
return forbiddenTypes.contains(FileTypeMagicUtil.INSTANCE.guessMimeType(fileStream));
}

/**
* Determine forbidden file type based on magic bytes matching of the first bytes of the file.
*
* @param bytes byte array of the file content
* @return true if file is among the forbidden mime-types
*/
public static boolean isFileForbiddenInConfigset(byte[] bytes) {
return isFileForbiddenInConfigset(new ByteArrayInputStream(bytes));
}

private static final Set<String> forbiddenTypes =
new HashSet<>(
Arrays.asList(
System.getProperty(
"solr.configset.upload.mimetypes.forbidden",
"application/x-java-applet,application/zip,application/x-tar,text/x-shellscript")
.split(",")));

}
2 changes: 2 additions & 0 deletions solr/core/src/java/org/apache/solr/util/SolrCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -1971,6 +1971,7 @@ protected void runCloudTool(CloudSolrClient cloudSolrClient, CommandLine cli) th

echoIfVerbose("Uploading " + confPath.toAbsolutePath().toString() +
" for config " + confname + " to ZooKeeper at " + cloudSolrClient.getZkHost(), cli);
FileTypeMagicUtil.assertConfigSetFolderLegal(confPath);
((ZkClientClusterStateProvider) cloudSolrClient.getClusterStateProvider()).uploadConfig(confPath, confname);
}

Expand Down Expand Up @@ -2265,6 +2266,7 @@ protected void runImpl(CommandLine cli) throws Exception {
echo("Uploading " + confPath.toAbsolutePath().toString() +
" for config " + cli.getOptionValue("confname") + " to ZooKeeper at " + zkHost);

FileTypeMagicUtil.assertConfigSetFolderLegal(confPath);
zkClient.upConfig(confPath, confName);
} catch (Exception e) {
log.error("Could not complete upconfig operation for reason: ", e);
Expand Down
59 changes: 59 additions & 0 deletions solr/core/src/resources/magic/executables
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# POSIX tar archives
# URL: https://en.wikipedia.org/wiki/Tar_(computing)
# Reference: https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5&manpath=FreeBSD+8-current
# header mainly padded with nul bytes
500 quad 0
!:strength /2
# filename or extended attribute printable strings in range space null til umlaut ue
>0 ubeshort >0x1F00
>>0 ubeshort <0xFCFD
# last 4 header bytes often null but tar\0 in gtarfail2.tar gtarfail.tar-bad
# at https://sourceforge.net/projects/s-tar/files/testscripts/
>>>508 ubelong&0x8B9E8DFF 0
# nul, space or ascii digit 0-7 at start of mode
>>>>100 ubyte&0xC8 =0
>>>>>101 ubyte&0xC8 =0
# nul, space at end of check sum
>>>>>>155 ubyte&0xDF =0
# space or ascii digit 0 at start of check sum
>>>>>>>148 ubyte&0xEF =0x20
# check for specific 1st member name that indicates other mime type and file name suffix
>>>>>>>>0 string TpmEmuTpms/permall
!:mime application/x-tar
!:ext tar
# other stuff in padding
# some implementations add new fields to the blank area at the end of the header record
# created for example by DOS TAR 3.20g 1994 Tim V.Shapore with -j option
>>257 ulong !0 tar archive (old)
!:mime application/x-tar
!:ext tar
# magic in newer, GNU, posix variants
>257 string =ustar
# 2 last char of magic and UStar version because string expression does not work
# 2 space characters followed by a null for GNU variant
>>261 ubelong =0x72202000 POSIX tar archive (GNU)
!:mime application/x-gtar
!:ext tar/gtar


# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\005\006 Zip archive data (empty)
0 string PK\003\004 Zip archive data
!:strength +1
!:mime application/zip
!:ext zip/cbz


# JAVA
0 belong 0xcafebabe
>4 ubelong >30 compiled Java class data,
!:mime application/x-java-applet
#!:mime application/java-byte-code
!:ext class


# SHELL scripts
#0 string/w : shell archive or script for antique kernel text
0 regex \^#!\\s?(/bin/|/usr/) POSIX shell script text executable
!:mime text/x-shellscript
!:ext sh/bash
5 changes: 5 additions & 0 deletions solr/core/src/test-files/magic/HelloWorld.java.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class HelloWorld {
public static void main(String[] args) {
System.out.println("Hellow world");
}
}
Binary file not shown.
12 changes: 12 additions & 0 deletions solr/core/src/test-files/magic/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
The two binary files were created by the following commands:

```bash
echo "Hello" > hello.txt && \
tar -cvf hello.tar.bin hello.txt && \
rm hello.txt

cp HelloWorld.java.txt HelloWorld.java && \
javac HelloWorld.java && \
mv HelloWorld.class HelloWorldJavaClass.class.bin && \
rm HelloWorld.java
```
Binary file added solr/core/src/test-files/magic/hello.tar.bin
Binary file not shown.
1 change: 1 addition & 0 deletions solr/core/src/test-files/magic/plain.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello world
2 changes: 2 additions & 0 deletions solr/core/src/test-files/magic/shell.sh.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#! /usr/bin/env bash
echo Hello
Loading

0 comments on commit 7e9a2e6

Please sign in to comment.