cmd/influx_inspect/tsm.go

package main

import (
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"text/tabwriter"
	"time"

	"github.com/golang/snappy"
	"github.com/influxdb/influxdb/tsdb"
	"github.com/influxdb/influxdb/tsdb/engine/tsm1"
)

// these consts are for the old tsm format. They can be removed once we remove
// the inspection for the original tsm1 files.
const (
	//IDsFileExtension is the extension for the file that keeps the compressed map
	// of keys to uint64 IDs.
	IDsFileExtension = "ids"

	// FieldsFileExtension is the extension for the file that stores compressed field
	// encoding data for this db
	FieldsFileExtension = "fields"

	// SeriesFileExtension is the extension for the file that stores the compressed
	// series metadata for series in this db
	SeriesFileExtension = "series"
)

type tsdmDumpOpts struct {
	dumpIndex  bool
	dumpBlocks bool
	filterKey  string
	path       string
}

type tsmIndex struct {
	series  int
	offset  int64
	minTime time.Time
	maxTime time.Time
	blocks  []*block
}

type block struct {
	id     uint64
	offset int64
}

type blockStats struct {
	min, max int
	counts   [][]int
}

func (b *blockStats) inc(typ int, enc byte) {
	for len(b.counts) <= typ {
		b.counts = append(b.counts, []int{})
	}
	for len(b.counts[typ]) <= int(enc) {
		b.counts[typ] = append(b.counts[typ], 0)
	}
	b.counts[typ][enc]++
}

func (b *blockStats) size(sz int) {
	if b.min == 0 || sz < b.min {
		b.min = sz
	}
	if b.min == 0 || sz > b.max {
		b.max = sz
	}
}

var (
	fieldType = []string{
		"timestamp", "float", "int", "bool", "string",
	}
	blockTypes = []string{
		"float64", "int64", "bool", "string",
	}
	timeEnc = []string{
		"none", "s8b", "rle",
	}
	floatEnc = []string{
		"none", "gor",
	}
	intEnc = []string{
		"none", "s8b", "rle",
	}
	boolEnc = []string{
		"none", "bp",
	}
	stringEnc = []string{
		"none", "snpy",
	}
	encDescs = [][]string{
		timeEnc, floatEnc, intEnc, boolEnc, stringEnc,
	}
)

func readFields(path string) (map[string]*tsdb.MeasurementFields, error) {
	fields := make(map[string]*tsdb.MeasurementFields)

	f, err := os.OpenFile(filepath.Join(path, FieldsFileExtension), os.O_RDONLY, 0666)
	if os.IsNotExist(err) {
		return fields, nil
	} else if err != nil {
		return nil, err
	}
	b, err := ioutil.ReadAll(f)
	if err != nil {
		return nil, err
	}

	data, err := snappy.Decode(nil, b)
	if err != nil {
		return nil, err
	}

	if err := json.Unmarshal(data, &fields); err != nil {
		return nil, err
	}
	return fields, nil
}

func readSeries(path string) (map[string]*tsdb.Series, error) {
	series := make(map[string]*tsdb.Series)

	f, err := os.OpenFile(filepath.Join(path, SeriesFileExtension), os.O_RDONLY, 0666)
	if os.IsNotExist(err) {
		return series, nil
	} else if err != nil {
		return nil, err
	}
	defer f.Close()
	b, err := ioutil.ReadAll(f)
	if err != nil {
		return nil, err
	}

	data, err := snappy.Decode(nil, b)
	if err != nil {
		return nil, err
	}

	if err := json.Unmarshal(data, &series); err != nil {
		return nil, err
	}

	return series, nil
}

func readIds(path string) (map[string]uint64, error) {
	f, err := os.OpenFile(filepath.Join(path, IDsFileExtension), os.O_RDONLY, 0666)
	if os.IsNotExist(err) {
		return nil, nil
	} else if err != nil {
		return nil, err
	}
	b, err := ioutil.ReadAll(f)
	if err != nil {
		return nil, err
	}

	b, err = snappy.Decode(nil, b)
	if err != nil {
		return nil, err
	}

	ids := make(map[string]uint64)
	if b != nil {
		if err := json.Unmarshal(b, &ids); err != nil {
			return nil, err
		}
	}
	return ids, err
}
func readIndex(f *os.File) (*tsmIndex, error) {
	// Get the file size
	stat, err := f.Stat()
	if err != nil {
		return nil, err
	}

	// Seek to the series count
	f.Seek(-4, os.SEEK_END)
	b := make([]byte, 8)
	_, err = f.Read(b[:4])
	if err != nil {
		return nil, err
	}

	seriesCount := binary.BigEndian.Uint32(b)

	// Get the min time
	f.Seek(-20, os.SEEK_END)
	f.Read(b)
	minTime := time.Unix(0, int64(btou64(b)))

	// Get max time
	f.Seek(-12, os.SEEK_END)
	f.Read(b)
	maxTime := time.Unix(0, int64(btou64(b)))

	// Figure out where the index starts
	indexStart := stat.Size() - int64(seriesCount*12+20)

	// Seek to the start of the index
	f.Seek(indexStart, os.SEEK_SET)
	count := int(seriesCount)
	index := &tsmIndex{
		offset:  indexStart,
		minTime: minTime,
		maxTime: maxTime,
		series:  count,
	}

	if indexStart < 0 {
		return nil, fmt.Errorf("index corrupt: offset=%d", indexStart)
	}

	// Read the index entries
	for i := 0; i < count; i++ {
		f.Read(b)
		id := binary.BigEndian.Uint64(b)
		f.Read(b[:4])
		pos := binary.BigEndian.Uint32(b[:4])
		index.blocks = append(index.blocks, &block{id: id, offset: int64(pos)})
	}

	return index, nil
}

func cmdDumpTsm1(opts *tsdmDumpOpts) {
	var errors []error

	f, err := os.Open(opts.path)
	if err != nil {
		println(err.Error())
		os.Exit(1)
	}

	// Get the file size
	stat, err := f.Stat()
	if err != nil {
		println(err.Error())
		os.Exit(1)
	}

	b := make([]byte, 8)
	f.Read(b[:4])

	// Verify magic number
	if binary.BigEndian.Uint32(b[:4]) != 0x16D116D1 {
		println("Not a tsm1 file.")
		os.Exit(1)
	}

	ids, err := readIds(filepath.Dir(opts.path))
	if err != nil {
		println("Failed to read series:", err.Error())
		os.Exit(1)
	}

	invIds := map[uint64]string{}
	for k, v := range ids {
		invIds[v] = k
	}

	index, err := readIndex(f)
	if err != nil {
		println("Failed to readIndex:", err.Error())

		// Create a stubbed out index so we can still try and read the block data directly
		// w/o panicing ourselves.
		index = &tsmIndex{
			minTime: time.Unix(0, 0),
			maxTime: time.Unix(0, 0),
			offset:  stat.Size(),
		}
	}

	blockStats := &blockStats{}

	println("Summary:")
	fmt.Printf("  File: %s\n", opts.path)
	fmt.Printf("  Time Range: %s - %s\n",
		index.minTime.UTC().Format(time.RFC3339Nano),
		index.maxTime.UTC().Format(time.RFC3339Nano),
	)
	fmt.Printf("  Duration: %s ", index.maxTime.Sub(index.minTime))
	fmt.Printf("  Series: %d ", index.series)
	fmt.Printf("  File Size: %d\n", stat.Size())
	println()

	tw := tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, "  "+strings.Join([]string{"Pos", "ID", "Ofs", "Key", "Field"}, "\t"))
	for i, block := range index.blocks {
		key := invIds[block.id]
		split := strings.Split(key, "#!~#")

		// We dont' know know if we have fields so use an informative default
		var measurement, field string = "UNKNOWN", "UNKNOWN"

		// We read some IDs from the ids file
		if len(invIds) > 0 {
			// Change the default to error until we know we have a valid key
			measurement = "ERR"
			field = "ERR"

			// Possible corruption? Try to read as much as we can and point to the problem.
			if key == "" {
				errors = append(errors, fmt.Errorf("index pos %d, field id: %d, missing key for id", i, block.id))
			} else if len(split) < 2 {
				errors = append(errors, fmt.Errorf("index pos %d, field id: %d, key corrupt: got '%v'", i, block.id, key))
			} else {
				measurement = split[0]
				field = split[1]
			}
		}

		if opts.filterKey != "" && !strings.Contains(key, opts.filterKey) {
			continue
		}
		fmt.Fprintln(tw, "  "+strings.Join([]string{
			strconv.FormatInt(int64(i), 10),
			strconv.FormatUint(block.id, 10),
			strconv.FormatInt(int64(block.offset), 10),
			measurement,
			field,
		}, "\t"))
	}

	if opts.dumpIndex {
		println("Index:")
		tw.Flush()
		println()
	}

	tw = tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, "  "+strings.Join([]string{"Blk", "Ofs", "Len", "ID", "Type", "Min Time", "Points", "Enc [T/V]", "Len [T/V]"}, "\t"))

	// Staring at 4 because the magic number is 4 bytes
	i := int64(4)
	var blockCount, pointCount, blockSize int64
	indexSize := stat.Size() - index.offset

	// Start at the beginning and read every block
	for i < index.offset {
		f.Seek(int64(i), 0)

		f.Read(b)
		id := btou64(b)
		f.Read(b[:4])
		length := binary.BigEndian.Uint32(b[:4])
		buf := make([]byte, length)
		f.Read(buf)

		blockSize += int64(len(buf)) + 12

		startTime := time.Unix(0, int64(btou64(buf[:8])))
		blockType := buf[8]

		encoded := buf[9:]

		cnt := tsm1.BlockCount(buf)
		pointCount += int64(cnt)

		// Length of the timestamp block
		tsLen, j := binary.Uvarint(encoded)

		// Unpack the timestamp bytes
		ts := encoded[int(j) : int(j)+int(tsLen)]

		// Unpack the value bytes
		values := encoded[int(j)+int(tsLen):]

		tsEncoding := timeEnc[int(ts[0]>>4)]
		vEncoding := encDescs[int(blockType+1)][values[0]>>4]

		typeDesc := blockTypes[blockType]

		blockStats.inc(0, ts[0]>>4)
		blockStats.inc(int(blockType+1), values[0]>>4)
		blockStats.size(len(buf))

		if opts.filterKey != "" && !strings.Contains(invIds[id], opts.filterKey) {
			i += (12 + int64(length))
			blockCount++
			continue
		}

		fmt.Fprintln(tw, "  "+strings.Join([]string{
			strconv.FormatInt(blockCount, 10),
			strconv.FormatInt(i, 10),
			strconv.FormatInt(int64(len(buf)), 10),
			strconv.FormatUint(id, 10),
			typeDesc,
			startTime.UTC().Format(time.RFC3339Nano),
			strconv.FormatInt(int64(cnt), 10),
			fmt.Sprintf("%s/%s", tsEncoding, vEncoding),
			fmt.Sprintf("%d/%d", len(ts), len(values)),
		}, "\t"))

		i += (12 + int64(length))
		blockCount++
	}
	if opts.dumpBlocks {
		println("Blocks:")
		tw.Flush()
		println()
	}

	fmt.Printf("Statistics\n")
	fmt.Printf("  Blocks:\n")
	fmt.Printf("    Total: %d Size: %d Min: %d Max: %d Avg: %d\n",
		blockCount, blockSize, blockStats.min, blockStats.max, blockSize/blockCount)
	fmt.Printf("  Index:\n")
	fmt.Printf("    Total: %d Size: %d\n", len(index.blocks), indexSize)
	fmt.Printf("  Points:\n")
	fmt.Printf("    Total: %d", pointCount)
	println()

	println("  Encoding:")
	for i, counts := range blockStats.counts {
		if len(counts) == 0 {
			continue
		}
		fmt.Printf("    %s: ", strings.Title(fieldType[i]))
		for j, v := range counts {
			fmt.Printf("\t%s: %d (%d%%) ", encDescs[i][j], v, int(float64(v)/float64(blockCount)*100))
		}
		println()
	}
	fmt.Printf("  Compression:\n")
	fmt.Printf("    Per block: %0.2f bytes/point\n", float64(blockSize)/float64(pointCount))
	fmt.Printf("    Total: %0.2f bytes/point\n", float64(stat.Size())/float64(pointCount))

	if len(errors) > 0 {
		println()
		fmt.Printf("Errors (%d):\n", len(errors))
		for _, err := range errors {
			fmt.Printf("  * %v\n", err)
		}
		println()
	}
}

func cmdDumpTsm1dev(opts *tsdmDumpOpts) {
	var errors []error

	f, err := os.Open(opts.path)
	if err != nil {
		println(err.Error())
		os.Exit(1)
	}

	// Get the file size
	stat, err := f.Stat()
	if err != nil {
		println(err.Error())
		os.Exit(1)
	}
	b := make([]byte, 8)

	r, err := tsm1.NewTSMReaderWithOptions(tsm1.TSMReaderOptions{
		MMAPFile: f,
	})
	if err != nil {
		println("Error opening TSM files: ", err.Error())
	}
	defer r.Close()

	minTime, maxTime := r.TimeRange()
	keys := r.Keys()

	blockStats := &blockStats{}

	println("Summary:")
	fmt.Printf("  File: %s\n", opts.path)
	fmt.Printf("  Time Range: %s - %s\n",
		minTime.UTC().Format(time.RFC3339Nano),
		maxTime.UTC().Format(time.RFC3339Nano),
	)
	fmt.Printf("  Duration: %s ", maxTime.Sub(minTime))
	fmt.Printf("  Series: %d ", len(keys))
	fmt.Printf("  File Size: %d\n", stat.Size())
	println()

	tw := tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, "  "+strings.Join([]string{"Pos", "Min Time", "Max Time", "Ofs", "Size", "Key", "Field"}, "\t"))
	var pos int
	for _, key := range keys {
		for _, e := range r.Entries(key) {
			pos++
			split := strings.Split(key, "#!~#")

			// We dont' know know if we have fields so use an informative default
			var measurement, field string = "UNKNOWN", "UNKNOWN"

			// Possible corruption? Try to read as much as we can and point to the problem.
			measurement = split[0]
			field = split[1]

			if opts.filterKey != "" && !strings.Contains(key, opts.filterKey) {
				continue
			}
			fmt.Fprintln(tw, "  "+strings.Join([]string{
				strconv.FormatInt(int64(pos), 10),
				e.MinTime.UTC().Format(time.RFC3339Nano),
				e.MaxTime.UTC().Format(time.RFC3339Nano),
				strconv.FormatInt(int64(e.Offset), 10),
				strconv.FormatInt(int64(e.Size), 10),
				measurement,
				field,
			}, "\t"))
		}
	}

	if opts.dumpIndex {
		println("Index:")
		tw.Flush()
		println()
	}

	tw = tabwriter.NewWriter(os.Stdout, 8, 8, 1, '\t', 0)
	fmt.Fprintln(tw, "  "+strings.Join([]string{"Blk", "Chk", "Ofs", "Len", "Type", "Min Time", "Points", "Enc [T/V]", "Len [T/V]"}, "\t"))

	// Starting at 5 because the magic number is 4 bytes + 1 byte version
	i := int64(5)
	var blockCount, pointCount, blockSize int64
	indexSize := r.IndexSize()

	// Start at the beginning and read every block
	for _, key := range keys {
		for _, e := range r.Entries(key) {

			f.Seek(int64(e.Offset), 0)
			f.Read(b[:4])

			chksum := btou32(b[:4])

			buf := make([]byte, e.Size-4)
			f.Read(buf)

			blockSize += int64(e.Size)

			blockType := buf[0]

			encoded := buf[1:]

			var v []tsm1.Value
			v, err := tsm1.DecodeBlock(buf, v)
			if err != nil {
				fmt.Printf("error: %v\n", err.Error())
				os.Exit(1)
			}
			startTime := v[0].Time()

			pointCount += int64(len(v))

			// Length of the timestamp block
			tsLen, j := binary.Uvarint(encoded)

			// Unpack the timestamp bytes
			ts := encoded[int(j) : int(j)+int(tsLen)]

			// Unpack the value bytes
			values := encoded[int(j)+int(tsLen):]

			tsEncoding := timeEnc[int(ts[0]>>4)]
			vEncoding := encDescs[int(blockType+1)][values[0]>>4]

			typeDesc := blockTypes[blockType]

			blockStats.inc(0, ts[0]>>4)
			blockStats.inc(int(blockType+1), values[0]>>4)
			blockStats.size(len(buf))

			if opts.filterKey != "" && !strings.Contains(key, opts.filterKey) {
				i += blockSize
				blockCount++
				continue
			}

			fmt.Fprintln(tw, "  "+strings.Join([]string{
				strconv.FormatInt(blockCount, 10),
				strconv.FormatUint(uint64(chksum), 10),
				strconv.FormatInt(i, 10),
				strconv.FormatInt(int64(len(buf)), 10),
				typeDesc,
				startTime.UTC().Format(time.RFC3339Nano),
				strconv.FormatInt(int64(len(v)), 10),
				fmt.Sprintf("%s/%s", tsEncoding, vEncoding),
				fmt.Sprintf("%d/%d", len(ts), len(values)),
			}, "\t"))

			i += blockSize
			blockCount++
		}
	}

	if opts.dumpBlocks {
		println("Blocks:")
		tw.Flush()
		println()
	}

	var blockSizeAvg int64
	if blockCount > 0 {
		blockSizeAvg = blockSize / blockCount
	}
	fmt.Printf("Statistics\n")
	fmt.Printf("  Blocks:\n")
	fmt.Printf("    Total: %d Size: %d Min: %d Max: %d Avg: %d\n",
		blockCount, blockSize, blockStats.min, blockStats.max, blockSizeAvg)
	fmt.Printf("  Index:\n")
	fmt.Printf("    Total: %d Size: %d\n", blockCount, indexSize)
	fmt.Printf("  Points:\n")
	fmt.Printf("    Total: %d", pointCount)
	println()

	println("  Encoding:")
	for i, counts := range blockStats.counts {
		if len(counts) == 0 {
			continue
		}
		fmt.Printf("    %s: ", strings.Title(fieldType[i]))
		for j, v := range counts {
			fmt.Printf("\t%s: %d (%d%%) ", encDescs[i][j], v, int(float64(v)/float64(blockCount)*100))
		}
		println()
	}
	fmt.Printf("  Compression:\n")
	fmt.Printf("    Per block: %0.2f bytes/point\n", float64(blockSize)/float64(pointCount))
	fmt.Printf("    Total: %0.2f bytes/point\n", float64(stat.Size())/float64(pointCount))

	if len(errors) > 0 {
		println()
		fmt.Printf("Errors (%d):\n", len(errors))
		for _, err := range errors {
			fmt.Printf("  * %v\n", err)
		}
		println()
	}
}