Skip to content

how to generate code for switch option

eddid edited this page Sep 23, 2014 · 4 revisions

Summary

The condition expression of switch should be integer type in C language, so dose the case expression, since it uses one or more branch tables to deal with switch in assemble/machine code.

But both condition and case expressions can be Object type in JavaScript language, even can be complex expressions.

Analysis

According to Reference, Java use hash code to deal with String type in switch. So could jslang to deal with null, true, false, number, string, and other constant expressions.

Break expression can be used in case expression, but if there isn't a break expression at the end of case expression, it should flow to next case expression. So, each case expression should has two BasicBlocks, one for check, another to hold real instructions of the case expression, and the latter should be in the same order of the code.

In one word, the pseudo-code is:

    //switch(expr)
    //{
    //caseA:
    //    stmtA
    //caseD:
    //    stmtD
    //default:
    //    stmtdefault
    //casea:
    //    stmta
    //caseb:
    //    stmtb
    //casec:
    //    stmtc
    //
    // note: caseA and casea have the same hashCode
    //
    //Output as:
    //    expr
    //    switch [caseA.checkBB, caseD.checkBB, caseb.checkBB, casec.checkBB]
    //caseA.checkBB:
    //    cond (expr==exprA) & br caseA.realBB, casea.checkBB
    //caseD.checkBB:
    //    cond (expr==exprD) & br caseD.realBB, default.realBB
    //casea.checkBB:
    //    cond (expr==expra) & br casea.realBB, default.realBB
    //caseb.checkBB:
    //    cond (expr==exprb) & br caseb.realBB, default.realBB
    //casec.checkBB:
    //    cond (expr==exprc) & br casec.realBB, default.realBB
    //caseA.realBB:
    //    stmtA
    //caseD.realBB:
    //    stmtD
    //default.realBB:
    //    stmtdefault
    //casea.realBB:
    //    stmta
    //caseb.realBB:
    //    stmtb
    //casec.realBB:
    //    stmtc
    //endBB:
    //    ...

But I need another way to deal with Objects and other complex expressions, if/else is a suitable solution. Deal with complex expressions with if/else first, then create SwitchInst to deal with other case expressions if needed.

Description

  • Create a struct to hold extra data:
typedef struct CaseInfo
{
    CaseClauseNode *caseNode;
    SwitchType      type;
    unsigned int    hashCode;
    llvm::BasicBlock *checkBB; //the check block of the case, maybe shared block if have the same hashCode
    llvm::BasicBlock *realBB; //the real block of the case
    struct CaseInfo *next;
}CaseInfo;
`caseNode` is the case expression node; 
`type` could be `SwitchDouble`, `SwitchString`, `SwitchUnkown`, `SwitchDefault`; 
`hashCode` is used when `type` is `SwitchDouble`, `SwitchString`, to store the hash code of expression;
`checkBB` and `realBB` are described as above;
`next` to deal with no break expression case;
  • Init std::vector with the ClauseListNode list as follow:
static SwitchType initCaseInfo(ClauseListNode *list, std::vector<CaseInfo> &caseInfos)
{
    CaseInfo caseInfo;
    JSValue jsValue;
    BooleanNode *booleanNode;
    NumberNode *numberNode;
    StringNode *stringNode;
    SwitchType switchType = SwitchInteger;

    for (; list; list = list->next())
    {
        ExpressionNode *exprNode = list->getClause()->expr();
        memset((void *)&caseInfo, 0x00, sizeof(CaseInfo));
        caseInfo.caseNode = list->getClause();
        switch (exprNode->opcodeID())
        {
        case op_const_undefined:
            caseInfo.type = SwitchDouble;
            caseInfo.hashCode = hash6432shift(((uint64_t)JSVAL_TAG_UNDEFINED)<<32 | (uint64_t)0x00000000);
            break;
        case op_const_null:
            caseInfo.type = SwitchDouble;
            caseInfo.hashCode = hash6432shift(((uint64_t)JSVAL_TAG_NULL)<<32 | (uint64_t)0x00000000);
            break;
        case op_const_boolean:
            booleanNode = (BooleanNode *)exprNode;
            caseInfo.type = SwitchDouble;
            caseInfo.hashCode = hash6432shift(((uint64_t)JSVAL_TAG_BOOLEAN)<<32 | (uint64_t)booleanNode->value());
            break;
        case op_const_number:
            numberNode = (NumberNode *)exprNode;
            jsValue.asDouble = numberNode->value();
            caseInfo.type = SwitchDouble;
            caseInfo.hashCode = hash6432shift(jsValue.asInt64);
            break;
        case op_const_string:
            stringNode = (StringNode *)exprNode;
            caseInfo.type = SwitchString;
#if defined(JSLANG_UTF8)
            caseInfo.hashCode = bkdr_hash(stringNode->value().name.c_str());
#else
            caseInfo.hashCode = bkdr_hash(stringNode->value().name.c_str());
            #err "Can't deal with charset !"
#endif
            break;
        default:
            caseInfo.type = SwitchUnkown;
            break;
        }
        switchType = (SwitchType)((int)switchType | (int)caseInfo.type);
        caseInfos.push_back(caseInfo);
    }

    return switchType;
}
  • Init all cases in caseInfos, in original order: m_list1, m_defaultClause, m_list2(to deal with no break case)
    std::vector<CaseInfo> caseInfos;
    initCaseInfo(m_block->list1(), caseInfos);
    if (NULL != m_block->defaultClause())
    {
        CaseInfo caseInfo;
        memset((void *)&caseInfo, 0x00, sizeof(CaseInfo));
        caseInfo.caseNode = m_block->defaultClause();
        caseInfo.type = SwitchDefault;
        caseInfos.push_back(caseInfo);
    }
    initCaseInfo( m_block->list2(), caseInfos);
  • merge case via check haseCode, and create block for each case
    std::map<unsigned int, llvm::BasicBlock*> hashBBs;
    for (int indexX = realCount - 1; indexX >= 0; indexX--)
    {
        if (SwitchDefault == caseInfos[indexX].type)
        {
            // default case has no checkBB
            defaultBB = BasicBlock::Create(getGlobalContext(), "sw.default");
            caseInfos[indexX].realBB = defaultBB;
            continue;
        }
        else if (SwitchUnkown == caseInfos[indexX].type)
        {
            //link all non-const expressions together with next point
            caseInfos[indexX].checkBB = BasicBlock::Create(getGlobalContext(), "sw.nonconst.case");
            caseInfos[indexX].realBB = BasicBlock::Create(getGlobalContext(), "sw.real.case");
            for (int indexY = indexX + 1; indexY < realCount; indexY++)
            {
                if (SwitchUnkown == caseInfos[indexY].type)
                {
                    caseInfos[indexX].next = &caseInfos[indexY];
                    break;
                }
            }
            continue;
        }
        hashCode = caseInfos[indexX].hashCode % realCount;
        checkBB = hashBBs[hashCode];
        if (NULL == checkBB)
        {
            checkCount++;
        }
        else
        {
            for (int indexY = indexX + 1; indexY < realCount; indexY++)
            {
                if (hashCode == caseInfos[indexY].hashCode)
                {
                    caseInfos[indexX].next = &caseInfos[indexY];
                    break;
                }
            }
            if (NULL == caseInfos[indexX].next)
            {
                std::cout <<  __FUNCTION__ << " hash conflict but can't find it !" << std::endl;
                abort();
            }
        }
        checkBB = BasicBlock::Create(getGlobalContext(), "sw.check.case");
        hashBBs[hashCode] = checkBB;
        //store the real hashCode, so we can get hashCode directly atfer the first loop
        caseInfos[indexX].hashCode = hashCode;
        caseInfos[indexX].checkBB = checkBB;
        caseInfos[indexX].realBB = BasicBlock::Create(getGlobalContext(), "sw.real.case");
    }
  • init and push all non-const checkBBs
    for (int indexX = 0; indexX < realCount; indexX++)
    {
        if (SwitchUnkown != caseInfos[indexX].type)
        {
            continue;
        }

        ExpressionNode *caseExpr = (ExpressionNode *)caseInfos[indexX].caseNode->expr();
        //in non-const check:
        // current non-const expression's checkBB is used for next non-const expression
        // since the first non-const expression use "currentBlock"
        // the last non-const expression's checkBB is used as new "currentBlock"
        llvm::Value *condV = CodeGenJSValue32FunctionCmp(context, exprRef, caseExpr->emitBytecode(context, dst), op_stricteq);
        BranchInst::Create(caseInfos[indexX].realBB, caseInfos[indexX].checkBB, condV, context.currentBlock());
        currentFunction->getBasicBlockList().push_back(caseInfos[indexX].checkBB);
        context.pushBlock(caseInfos[indexX].checkBB);
    }
  • create switch inst at currentBB if needed, if there aren't any non-complex expressions, SwitchInst isn't needed, just branch to default case or the end.
    if (checkCount> 0)
    {
        llvm::IntegerType *int32Type = llvm::Type::getInt32Ty(llvm::getGlobalContext());
        RegisterID *hashV = CodeGenJSValue32FunctionGetHashCode(context, exprRef);
        BinaryOperator* switchV = BinaryOperator::Create(Instruction::URem, hashV, llvm::ConstantInt::get(int32Type, realCount), "mod", context.currentBlock());
        llvm::SwitchInst *switchInst = llvm::SwitchInst::Create(switchV, defaultBB, realCount, context.currentBlock());
    
        //init and push all const checkBBs
        for (int indexX = 0; indexX < realCount; indexX++)
        {
            if (SwitchDefault == caseInfos[indexX].type)
            {
                continue;
            }
            if (SwitchUnkown == caseInfos[indexX].type)
            {
                continue;
            }
    
            hashCode = caseInfos[indexX].hashCode;
            if (caseInfos[indexX].checkBB == hashBBs[hashCode])
            {
                //the merged entry a same hashCode, add to switchInst
                switchInst->addCase(llvm::ConstantInt::get(int32Type, hashCode), caseInfos[indexX].checkBB);
            }
            
            ExpressionNode *caseExpr = (ExpressionNode *)caseInfos[indexX].caseNode->expr();
            llvm::BasicBlock* falseBB = defaultBB;
            if (NULL != caseInfos[indexX].next)
            {
                falseBB = caseInfos[indexX].next->checkBB;
            }
            
            currentFunction->getBasicBlockList().push_back(caseInfos[indexX].checkBB);
            context.pushBlock(caseInfos[indexX].checkBB);
            llvm::Value *condV = CodeGenJSValue32FunctionCmp(context, exprRef, caseExpr->emitBytecode(context, dst), op_stricteq);
            BranchInst::Create(caseInfos[indexX].realBB, falseBB, condV, context.currentBlock());
            context.popBlock(caseInfos[indexX].checkBB);
        }
    }
    else
    {
        //if all the cases are non-const cases, no need to create SwitchInst, just branch to defaultBB(endBB if there is no defult) of the statement
        BranchInst::Create(defaultBB, context.currentBlock());
    }
  • init and push all realBBs in the original order
    context.setBreakBlock(endBB);
    for (int indexX = 0; indexX < realCount; indexX++)
    {
        SourceElements *caseStmt = (SourceElements *)caseInfos[indexX].caseNode->statments();
        llvm::BasicBlock* continueBB = endBB;
        if (indexX + 1 < realCount)
        {
            continueBB = caseInfos[indexX + 1].realBB;
        }

        currentFunction->getBasicBlockList().push_back(caseInfos[indexX].realBB);
        context.pushBlock(caseInfos[indexX].realBB);
        caseStmt->emitBytecode(context, dst);
        if (NULL == context.currentBlock()->getTerminator())
        {
            BranchInst::Create(continueBB, context.currentBlock());
        }
        context.popBlock(caseInfos[indexX].realBB);
    }
  • push endBB
    currentFunction->getBasicBlockList().push_back(endBB);
    context.pushBlock(endBB);

Conclusion

Okay, let's test with it, IR code generated by jslang for JavaScript language:

var a = 0;
switch (a)
{
case null:
    console.log("null");
    break;
case undefined=1:
    console.log("undefined");
    break;
case a:
    console.log("number1");
    //break;
default:
    console.log("unknown");
    break;
case "0":
    console.log("string");
    break;
case false:
    console.log("boolean");
    break;
case 0:
    console.log("number");
    break;
}

define i32 @main(i32, i8**) {
entry:
  call void @jsextern_print_tick()
  call void @AllInOneInit()
  %asDouble = bitcast %union.JSValue* @a to double*
  store double 0.000000e+00, double* %asDouble, align 8
  %asDouble1 = bitcast %union.JSValue* @undefined to double*
  store double 1.000000e+00, double* %asDouble1, align 8
  %2 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* @undefined)
  br i1 %2, label %sw.real.case13, label %sw.nonconst.case

sw.nonconst.case:                                 ; preds = %entry
  br i1 true, label %sw.real.case19, label %sw.nonconst.case2

sw.nonconst.case2:                                ; preds = %sw.nonconst.case
  %3 = call i32 @JSValueHash(%union.JSValue* @a)
  %mod = urem i32 %3, 7
  switch i32 %mod, label %sw.default [
    i32 5, label %sw.check.case
    i32 6, label %sw.check.case3
    i32 4, label %sw.check.case4
  ]

sw.check.case:                                    ; preds = %sw.nonconst.case2
  %4 = alloca %union.JSValue, align 8
  %asIntPtr = bitcast %union.JSValue* %4 to i32*
  %5 = load i32* @jsValue32TagPart
  %tagRef = getelementptr i32* %asIntPtr, i32 %5
  store i32 -121, i32* %tagRef, align 4
  %6 = load i32* @jsValue32PayloadPart
  %payloadRef = getelementptr i32* %asIntPtr, i32 %6
  store i32 0, i32* %payloadRef, align 4
  %7 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %4)
  br i1 %7, label %sw.real.case, label %sw.end

sw.check.case3:                                   ; preds = %sw.nonconst.case2
  %8 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([2 x i8]* @"0", i32 0, i32 0), i32 1)
  %jsValueRightRef = alloca %union.JSValue, align 8
  %asInt64 = bitcast %union.JSValue* %jsValueRightRef to i64*
  store i64 %8, i64* %asInt64, align 8
  %9 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %jsValueRightRef)
  br i1 %9, label %sw.real.case30, label %sw.check.case8

sw.check.case4:                                   ; preds = %sw.nonconst.case2
  %10 = alloca %union.JSValue, align 8
  %asIntPtr5 = bitcast %union.JSValue* %10 to i32*
  %11 = load i32* @jsValue32TagPart
  %tagRef6 = getelementptr i32* %asIntPtr5, i32 %11
  store i32 -123, i32* %tagRef6, align 4
  %12 = load i32* @jsValue32PayloadPart
  %payloadRef7 = getelementptr i32* %asIntPtr5, i32 %12
  store i32 0, i32* %payloadRef7, align 4
  %13 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %10)
  br i1 %13, label %sw.real.case36, label %sw.end

sw.check.case8:                                   ; preds = %sw.check.case3
  %jsValueRightRef9 = alloca %union.JSValue, align 8
  %asDouble10 = bitcast %union.JSValue* %jsValueRightRef9 to double*
  store double 0.000000e+00, double* %asDouble10, align 8
  %14 = call i1 @js_op_strict_eq(%union.JSValue* @a, %union.JSValue* %jsValueRightRef9)
  br i1 %14, label %sw.real.case42, label %sw.end

sw.real.case:                                     ; preds = %sw.check.case
  %15 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log3, i32 0, i32 0))
  %log = alloca %union.JSValue, align 8
  %asInt6411 = bitcast %union.JSValue* %log to i64*
  store i64 %15, i64* %asInt6411, align 8
  %16 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([5 x i8]* @null, i32 0, i32 0), i32 4)
  %array = alloca [1 x %union.JSValue]
  %17 = bitcast [1 x %union.JSValue]* %array to %union.JSValue*
  %argvIndex = getelementptr %union.JSValue* %17, i32 0
  %asInt6412 = bitcast %union.JSValue* %argvIndex to i64*
  store i64 %16, i64* %asInt6412, align 8
  %18 = call i64 @js_invoke_closure(%union.JSValue* %log, %union.JSValue* @js_console, i32 1, %union.JSValue* %17)
  br label %sw.end

sw.real.case13:                                   ; preds = %entry
  %19 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log4, i32 0, i32 0))
  %log14 = alloca %union.JSValue, align 8
  %asInt6415 = bitcast %union.JSValue* %log14 to i64*
  store i64 %19, i64* %asInt6415, align 8
  %20 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([10 x i8]* @undefined5, i32 0, i32 0), i32 9)
  %array16 = alloca [1 x %union.JSValue]
  %21 = bitcast [1 x %union.JSValue]* %array16 to %union.JSValue*
  %argvIndex17 = getelementptr %union.JSValue* %21, i32 0
  %asInt6418 = bitcast %union.JSValue* %argvIndex17 to i64*
  store i64 %20, i64* %asInt6418, align 8
  %22 = call i64 @js_invoke_closure(%union.JSValue* %log14, %union.JSValue* @js_console, i32 1, %union.JSValue* %21)
  br label %sw.end

sw.real.case19:                                   ; preds = %sw.nonconst.case
  %23 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log6, i32 0, i32 0))
  %log20 = alloca %union.JSValue, align 8
  %asInt6421 = bitcast %union.JSValue* %log20 to i64*
  store i64 %23, i64* %asInt6421, align 8
  %24 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([8 x i8]* @number1, i32 0, i32 0), i32 7)
  %array22 = alloca [1 x %union.JSValue]
  %25 = bitcast [1 x %union.JSValue]* %array22 to %union.JSValue*
  %argvIndex23 = getelementptr %union.JSValue* %25, i32 0
  %asInt6424 = bitcast %union.JSValue* %argvIndex23 to i64*
  store i64 %24, i64* %asInt6424, align 8
  %26 = call i64 @js_invoke_closure(%union.JSValue* %log20, %union.JSValue* @js_console, i32 1, %union.JSValue* %25)
  br label %sw.default

sw.default:                                       ; preds = %sw.real.case19, %sw.nonconst.case2
  %27 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log7, i32 0, i32 0))
  %log25 = alloca %union.JSValue, align 8
  %asInt6426 = bitcast %union.JSValue* %log25 to i64*
  store i64 %27, i64* %asInt6426, align 8
  %28 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([8 x i8]* @unknown, i32 0, i32 0), i32 7)
  %array27 = alloca [1 x %union.JSValue]
  %29 = bitcast [1 x %union.JSValue]* %array27 to %union.JSValue*
  %argvIndex28 = getelementptr %union.JSValue* %29, i32 0
  %asInt6429 = bitcast %union.JSValue* %argvIndex28 to i64*
  store i64 %28, i64* %asInt6429, align 8
  %30 = call i64 @js_invoke_closure(%union.JSValue* %log25, %union.JSValue* @js_console, i32 1, %union.JSValue* %29)
  br label %sw.end

sw.real.case30:                                   ; preds = %sw.check.case3
  %31 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log8, i32 0, i32 0))
  %log31 = alloca %union.JSValue, align 8
  %asInt6432 = bitcast %union.JSValue* %log31 to i64*
  store i64 %31, i64* %asInt6432, align 8
  %32 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([7 x i8]* @string, i32 0, i32 0), i32 6)
  %array33 = alloca [1 x %union.JSValue]
  %33 = bitcast [1 x %union.JSValue]* %array33 to %union.JSValue*
  %argvIndex34 = getelementptr %union.JSValue* %33, i32 0
  %asInt6435 = bitcast %union.JSValue* %argvIndex34 to i64*
  store i64 %32, i64* %asInt6435, align 8
  %34 = call i64 @js_invoke_closure(%union.JSValue* %log31, %union.JSValue* @js_console, i32 1, %union.JSValue* %33)
  br label %sw.end

sw.real.case36:                                   ; preds = %sw.check.case4
  %35 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log9, i32 0, i32 0))
  %log37 = alloca %union.JSValue, align 8
  %asInt6438 = bitcast %union.JSValue* %log37 to i64*
  store i64 %35, i64* %asInt6438, align 8
  %36 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([8 x i8]* @boolean, i32 0, i32 0), i32 7)
  %array39 = alloca [1 x %union.JSValue]
  %37 = bitcast [1 x %union.JSValue]* %array39 to %union.JSValue*
  %argvIndex40 = getelementptr %union.JSValue* %37, i32 0
  %asInt6441 = bitcast %union.JSValue* %argvIndex40 to i64*
  store i64 %36, i64* %asInt6441, align 8
  %38 = call i64 @js_invoke_closure(%union.JSValue* %log37, %union.JSValue* @js_console, i32 1, %union.JSValue* %37)
  br label %sw.end

sw.real.case42:                                   ; preds = %sw.check.case8
  %39 = call i64 @js_object_getprop_utf8(%union.JSValue* @js_console, i8* getelementptr inbounds ([4 x i8]* @log11, i32 0, i32 0))
  %log43 = alloca %union.JSValue, align 8
  %asInt6444 = bitcast %union.JSValue* %log43 to i64*
  store i64 %39, i64* %asInt6444, align 8
  %40 = call i64 @js_string_new_utf8_len(i8* getelementptr inbounds ([7 x i8]* @number, i32 0, i32 0), i32 6)
  %array45 = alloca [1 x %union.JSValue]
  %41 = bitcast [1 x %union.JSValue]* %array45 to %union.JSValue*
  %argvIndex46 = getelementptr %union.JSValue* %41, i32 0
  %asInt6447 = bitcast %union.JSValue* %argvIndex46 to i64*
  store i64 %40, i64* %asInt6447, align 8
  %42 = call i64 @js_invoke_closure(%union.JSValue* %log43, %union.JSValue* @js_console, i32 1, %union.JSValue* %41)
  br label %sw.end

sw.end:                                           ; preds = %sw.real.case42, %sw.real.case36, %sw.real.case30, %sw.default, %sw.real.case13, %sw.real.case, %sw.check.case8, %sw.check.case4, %sw.check.case
  call void @jsextern_print_tick()
  ret i32 0
}

The output is:

---0(ms)---
litterendian
AllInOneInit:115
number1
unknown
---11(ms)---

Reference