In [1]:
## from week 6 lab
def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)

    Relevance is positive real values.  Can use binary
    as the previous methods.

    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114

    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]

    Returns:
        Discounted cumulative gain
    """
    import numpy as np
    r = np.asfarray(r)[:k]
    if r.size: ## why is this r.size? when will this be false?
        return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
    return 0.

In [2]:
## from week 6 lab
def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)

    Relevance is positive real values.  Can use binary
    as the previous methods.

    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0

    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]

    Returns:
        Normalized discounted cumulative gain
    """
    import numpy as np

    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    print('For k is {}, DCG scorce is {}'.format(k,dcg_at_k(r, k, method)))
    print('For k is {}, IDCG scorce is {}'.format(k,dcg_max))
    return dcg_at_k(r, k, method) / dcg_max

In [3]:
def assigningBM25ScoreToRelevantAndRetrieved(_bm25ScoreDf, relevantDocsList):
    """[summary]
    This function helps to assign zero values to those non-relevant and retrieved documents
    It retents the score of those relevant and retrieved

    Args:
        _bm25ScoreDf ([dataframe]): [a dataframe where rows are modules and columns is the bm25 scores]
        relevantAndRetrievedDocs ([list]): [list of modules based on the golden standard(idea outcome based on survey)]
    """
    df = _bm25ScoreDf.copy(deep = False)
    irrelevantAndRetrievedDocsList = list(set(df.index) - set(relevantDocsList))
    
    for relevantAndRetrievedDoc in irrelevantAndRetrievedDocsList:
        df.loc[relevantAndRetrievedDoc]['bm25Score'] = 0
    """[summary]
    output is a df with score that are retrieved and relevant(relevant depends on the gold standard)
    """
    return(df)

In [4]:
def NDCGWithVariousK(retrievedDocsDf,listOfRelevantDocs, exportResults = 0, queryNum = '', fileName = 'test'):
    """[summary]
    This function compute the NDGC at vaious K

    Args:
        retrievedDocsDf ([dataframe]): [dataframe of retrieved documents and it's bm25 score]
        listOfRelevantDocs ([list]): [list of relevant Documents based on gold standard]
        exportResults (int, optional): [to determine to export ndcg results]. Defaults to 0 and 1 to export ndgc score
        fileName (str, optional): [fileName to be exported ideally it should be the "ndcg_score_'model name']. Defaults to 'test'.
    """
    ## assign zero values to those non-relevant and retrieved documents, It retain the score of those relevant and retrieved
    BM25ScoreToRelevantAndRetrieved = assigningBM25ScoreToRelevantAndRetrieved(retrievedDocsDf,listOfRelevantDocs)
    ## obtain the score of the BM25 of the relevant and retrieved modules
    BM25ScoreToRelevantAndRetrievedScoreList = list(BM25ScoreToRelevantAndRetrieved.bm25Score)
    
    ## dict to save NDCGScore ie {k(ranking):NDCG Score}
    NDCGScoreDict = {}
    for i in range(1,len(BM25ScoreToRelevantAndRetrievedScoreList)+1):
        ndcg_at_kScore = ndcg_at_k(BM25ScoreToRelevantAndRetrievedScoreList,i)
        print('For k is {}, NDCG scorce is {}\n'.format(i,ndcg_at_kScore))
        NDCGScoreDict[i] = ndcg_at_kScore
    
    ## convert dict to df for easier sorting analysis of the scores and exporting it to csv
    import pandas as pd
    NDCGDf = pd.DataFrame.from_dict(NDCGScoreDict,orient='index',columns=['NDCGScore{}'.format(queryNum)])
    NDCGDf.reset_index(inplace = True)
    ## rename the column to k columns 
    NDCGDf.rename(columns={"index": "k"}, inplace = True)
    
    ## to export the ndcg scores to csv if exportResults == 1
    if exportResults == 1:
        fileName = 'ndcg_score_{}.csv'.format(fileName)
        NDCGDf.to_csv('../results/ndcg_score/{}'.format(fileName))
    return(NDCGDf)
    

# Toy Problem formulation

In [9]:
if __name__ == "__main__":
    "Test Case : the retrievedDocScore"
    ## assume docs are not in bm25 scorce order
    retrievedDocs = ['D','C', 'B','A'] 
    retrievedDocsScore = [0.43, 0.26, 0.03, 0.37]
    ## I realised that the score should be in ascending order of bm25 score hence I made some changes to fit our use case
    # retrievedDocsScore = [0.43,  0.37, 0.26, 0.03]

    ## creating a retrievedDocsDf for test cases
    ## this should be the same format of the bm25 output
    retrievedDocsDict = {}
    for index in range(len(retrievedDocs)):
        retrievedDocsDict[retrievedDocs[index]] = retrievedDocsScore[index]
    import pandas as pd
    retrievedDocsDf1 = pd.DataFrame.from_dict(retrievedDocsDict,orient='index',columns = ['bm25Score'])

    print('BM25 output:')
    retrievedDocsDf1

    "Test Case : the retrievedDocScore"
    ## assume docs are not in bm25 scorce order
    retrievedDocs = ['C','D', 'B','A'] 
    retrievedDocsScore = [0.5, 0.3, 0.2, 0.1]
    ## I realised that the score should be in ascending order of bm25 score hence I made some changes to fit our use case
    # retrievedDocsScore = [0.43,  0.37, 0.26, 0.03]

    ## creating a retrievedDocsDf for test cases
    ## this should be the same format of the bm25 output
    retrievedDocsDict = {}
    for index in range(len(retrievedDocs)):
        retrievedDocsDict[retrievedDocs[index]] = retrievedDocsScore[index]
    import pandas as pd
    retrievedDocsDf2 = pd.DataFrame.from_dict(retrievedDocsDict,orient='index',columns = ['bm25Score'])

    print('BM25 output:')
    retrievedDocsDf2
    "Test Case : The Relevant Docs"
    relevantDocs1 = ['B','D','E']
    print('List of relevant Docs: {}'.format(relevantDocs1))
    relevantDocs2 = ['A','C']
    print('List of relevant Docs: {}'.format(relevantDocs2))

    retrievedlist = [retrievedDocsDf1,retrievedDocsDf2]
    relevantlist =[relevantDocs1,relevantDocs2]
    
    ################################
    ################################
    ################################################################
    
    ## test case
    import pandas as pd
    ## this index is meant to keep track of the NDCG score of each query
    queryIndex = 0
    for retrieved in retrievedlist:
    ## to compute the NDCG of a single query
        NDCGWithVariousKdf = NDCGWithVariousK(retrieved,relevantlist[queryIndex],0,queryIndex)
    ## if this is 1st NDCG score been compute, make it's df to NDCG df else merge with the current overall NDGC df
        if queryIndex == 0:
            NDCGDf = NDCGWithVariousKdf
        else:
            NDCGDf = pd.merge(NDCGDf, NDCGWithVariousKdf, on=["k"])
        queryIndex += 1

For k is 1, DCG scorce is 0.43
For k is 1, IDCG scorce is 0.43
For k is 1, NDCG scorce is 1.0

For k is 2, DCG scorce is 0.43
For k is 2, IDCG scorce is 0.45999999999999996
For k is 2, NDCG scorce is 0.9347826086956522

For k is 3, DCG scorce is 0.4489278926071437
For k is 3, IDCG scorce is 0.45999999999999996
For k is 3, NDCG scorce is 0.9759302013198777

For k is 1, DCG scorce is 0.5
For k is 1, IDCG scorce is 0.5
For k is 1, NDCG scorce is 1.0

For k is 2, DCG scorce is 0.5
For k is 2, IDCG scorce is 0.6
For k is 2, NDCG scorce is 0.8333333333333334

For k is 3, DCG scorce is 0.5
For k is 3, IDCG scorce is 0.6
For k is 3, NDCG scorce is 0.8333333333333334



In [10]:
def clean_elective_names(relevant_results):
    # clean up the relevant course names 

    #https://stackoverflow.com/questions/2582138/finding-and-replacing-elements-in-a-list
    try:
        relevant_results = relevant_results.split(',')
        relevant_results = [x.replace("'",'') for x in relevant_results]
        relevant_results = [x.replace("[",'') for x in relevant_results]
        relevant_results = [x.replace("]",'') for x in relevant_results]
    ## this is required as apart from the index 0 module the other modules still retain a space inform of them
        relevant_results2 = [x.replace(" ",'',1) for x in relevant_results if x != relevant_results[0]]
    ## thus the next 2 lines of code help to reinsert the 0th index modules and reassign relevant_results2 to relevant_results
        relevant_results2.insert(0,relevant_results[0])
        relevant_results = relevant_results2
    except:
        pass
    replacements = {
        ' 50.035 Computer Vision': '50.035 Computer Vision'
        ,'50.043 Database Systems / Database and Big Data Systems (for class 2021)': '50.043 Database Systems'
        }

    relevant_results = [replacements.get(x, x) for x in relevant_results]
    
    if '40.302 Advanced Optim/ 40.305 Advanced Stochastic' in relevant_results:
        relevant_results.remove('40.302 Advanced Optim/ 40.305 Advanced Stochastic')
        relevant_results.append('40.302 Advanced Topics in Optimisation#')
        relevant_results.append('40.305 Advanced Topics in Stochastic Modelling#')
    return relevant_results

In [11]:
## function to compute the NDCG for cosine simliarities for model 1
def get_NDCG_cosine_no_expan(query_val,tf):
    import CosineSimilarity_no_query_expan
    ## compute Cosine simliarities score
    cosineSimDf = CosineSimilarity_no_query_expan.rankedModuleOfCosineSim(query_val,tf)
    cosineSimDf = cosineSimDf.T

    ## this section help to compute and obtain the NDCG for each query and store in df
    import pandas as pd
    queryCount = 0
    NDCGDf = 0
    for query,row in cosineSimDf.iterrows():
        ## create the df for retrieved docs and it's score
            retrievedDocsDict = {}
            cleanedElectives = clean_elective_names(row['topModules'])
            for index in range(len(row['topModules'])):
                retrievedDocsDict[cleanedElectives[index]] = row['topModulesScore'][index]
            import pandas as pd
            retrievedDocsDf = pd.DataFrame.from_dict(retrievedDocsDict,orient='index',columns = ['bm25Score'])
        
        ## cleaned golden/vaildation set modules
            print(retrievedDocsDf[list(retrievedDocsDf)[0]])
            print(query_val['expectedElectivesInOrder'][queryCount])
            validModules = clean_elective_names(query_val['expectedElectivesInOrder'][queryCount])
            print(validModules)
        ## to compute the NDCG of a single query
            NDCGWithVariousKdf = NDCGWithVariousK(retrievedDocsDf,validModules,0,queryCount)
        ## if this is 1st NDCG score been compute, make it's df to NDCG df else merge with the current overall NDGC df
            if queryCount == 0:
                NDCGDf = NDCGWithVariousKdf
            else:
                NDCGDf = pd.merge(NDCGDf, NDCGWithVariousKdf, on=["k"])
            queryCount += 1
            
    ## return a df with all the ndcg results
    
    return(NDCGDf)

In [12]:
## function to compute the NDCG for cosine simliarities for model 2 and 3
def get_NDCG_cosine(query_val,tf):
    import CosineSimilarity
    ## compute Cosine simliarities score
    cosineSimDf = CosineSimilarity.rankedModuleOfCosineSim(query_val,tf)
    cosineSimDf = cosineSimDf.T

    ## this section help to compute and obtain the NDCG for each query and store in df
    import pandas as pd
    queryCount = 0
    NDCGDf = 0
    for query,row in cosineSimDf.iterrows():
        ## create the df for retrieved docs and it's score
            retrievedDocsDict = {}
            cleanedElectives = clean_elective_names(row['topModules'])
            for index in range(len(row['topModules'])):
                retrievedDocsDict[cleanedElectives[index]] = row['topModulesScore'][index]
            import pandas as pd
            retrievedDocsDf = pd.DataFrame.from_dict(retrievedDocsDict,orient='index',columns = ['bm25Score'])
        
        ## cleaned golden/vaildation set modules
            print(retrievedDocsDf[list(retrievedDocsDf)[0]])
            print(query_val['expectedElectivesInOrder'][queryCount])
            validModules = clean_elective_names(query_val['expectedElectivesInOrder'][queryCount])
            print(validModules)
        ## to compute the NDCG of a single query
            NDCGWithVariousKdf = NDCGWithVariousK(retrievedDocsDf,validModules,0,queryCount)
        ## if this is 1st NDCG score been compute, make it's df to NDCG df else merge with the current overall NDGC df
            if queryCount == 0:
                NDCGDf = NDCGWithVariousKdf
            else:
                NDCGDf = pd.merge(NDCGDf, NDCGWithVariousKdf, on=["k"])
            queryCount += 1
            
    ## return a df with all the ndcg results
    
    return(NDCGDf)

In [15]:
## for Cosine Similarity (without and with query expansion, course information + (50% survey))
import pandas as pd
tf = pd.read_csv('../data/course_info_scores/course_info_tf.csv', index_col = 0)
query_val= pd.read_csv('../data/survey/vaildation_sample_query.csv',index_col = 0)
model1NDCG = get_NDCG_cosine_no_expan(query_val,tf)
model1NDCGAverage = model1NDCG.iloc[:, 1:].mean(axis=1)
model1NDCGAverage.to_csv('../results/ndcg_score/ndcg_score_mdoel1.csv')

tf = pd.read_csv('../data/course_info_scores/course_info_tf.csv', index_col = 0)
query_val= pd.read_csv('../data/survey/vaildation_sample_query.csv',index_col = 0)
model2NDCG = get_NDCG_cosine(query_val,tf)
model2NDCGAverage = model2NDCG.iloc[:, 1:].mean(axis=1)
model2NDCGAverage.to_csv('../results/ndcg_score/ndcg_score_mdoel2.csv')

tf = pd.read_csv('../data/course_info_with_survey_scores/course_info_with_survey_tf.csv', index_col = 0)
query_val= pd.read_csv('../data/survey/vaildation_sample_query.csv',index_col = 0)
model3NDCG = get_NDCG_cosine(query_val,tf)
model3NDCGAverage = model3NDCG.iloc[:, :].mean(axis=1)
model3NDCGAverage.to_csv('../results/ndcg_score/ndcg_score_mdoel3.csv')



Current computing Query: network, term, model, technology, probability
Number of terms in corpus: 8

Current computing Query: term, different, skill, mongodb, long


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: logistics, analysis, operation, basic, r


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: esd, network, evaluate, program, r


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: operation, basic, evaluation, price, evaluate


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 13

Current computing Query: infrastructure, metric, pytorch, model, client


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: approach, logistics, shag, infrastructure, equity


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: data, sql, different, analytics, model


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: computational, best, server, certain, ec


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: sklearn, metric, demand, schedule, fundamental


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: digitalisation, skill, long, technology, aviation


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: r, hidden, artificial, schedule, simulate


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 5

Current computing Query: optimisation, decision, risk, aws, jupyter


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: decentralizedapp, equity, math, ethereum, technology


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: financial, value, metric, optimize, analysis


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: risk, certain, computational, approach, c


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: markov, supply, hidden, computational, payoff


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: finance, value, long, average, business


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: financial, spark, science, focus, search


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: supply, notebook, fundamental, know, basic


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: r, linear, ethereum, c, machine


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 4

Current computing Query: science, future, problem, demand, pytorch


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: r, ec, kera, opponent, model


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 3

Current computing Query: markov, descent, algebra, math, research


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: clojurescript, ai, analysis, background, science


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: optimisation, opencv, risk, chain, urban


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: maing, descent, science, clojurescript, knowledge


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 4

Current computing Query: schedule, gradient, airport, analysis, system


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: logistics, business, strategy, analytics, math


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: program, future, kera, ec, concept


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6
50.012 Networks                                                  0.33558
01.104 Networked Life                                            0.27537
40.319 Statistical and Machine Learning                          0.26261
01.117 Brain-Inspired Computing and its Applications (Term 8)    0.25574
40.305 Advanced Topics in Stochastic Modelling#                  0.25503
50.020 Network Security                                          0.22059
40.232 Water Resources Management                                0.21678
50.035 Computer Vision                                           0.20015
01.107 Urban Transportation                                      0.18570
50.039 Theory and Practice of Deep Learning                      0.17916
Name: bm25Score, dtype: float64
['50.007 Machine Learning', '50.037 Blockchain Technology', '40.302 Advanced Optim/ 40.305 Advanced Stochastic', '50.039 Theory and Practice of Deep Learning', '50.021 Artificial Intelligence', ' 50.035 Comput

  dist = 1.0 - uv / np.sqrt(uu * vv)


40.320 Airport Systems Planning and Design                       0.19562
40.318 Supply Chain Digitalisation and Design                    0.14556
40.260 Supply Chain Management                                   0.13064
40.242 Derivative Pricing and Risk Management                    0.11203
50.045 Information Retrieval                                     0.10411
50.038 Computational Data Science                                0.09552
50.017 Graphics and Visualisation                                0.09412
50.040 Natural Language Processing                               0.09104
01.117 Brain-Inspired Computing and its Applications (Term 8)    0.08922
50.048 Computational Fabrication                                 0.08790
Name: bm25Score, dtype: float64
['40.260 Supply Chain Management', '40.240 Investment Science', '40.317 Financial Systems Design', '50.039 Theory and Practice of Deep Learning', '40.319 Statistical and Machine Learning', '50.007 Machine Learning', '40.242 Derivative Pri

  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: logistics, analysis, operation, basic, r


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: esd, network, evaluate, program, r


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: operation, basic, evaluation, price, evaluate


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 13

Current computing Query: infrastructure, metric, pytorch, model, client


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: approach, logistics, shag, infrastructure, equity


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: data, sql, different, analytics, model


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: computational, best, server, certain, ec


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: sklearn, metric, demand, schedule, fundamental


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: digitalisation, skill, long, technology, aviation


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: r, hidden, artificial, schedule, simulate


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 5

Current computing Query: optimisation, decision, risk, aws, jupyter


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: decentralizedapp, equity, math, ethereum, technology


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: financial, value, metric, optimize, analysis


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: risk, certain, computational, approach, c


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: markov, supply, hidden, computational, payoff


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: finance, value, long, average, business


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: financial, spark, science, focus, search


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: supply, notebook, fundamental, know, basic


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: r, linear, ethereum, c, machine


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 4

Current computing Query: science, future, problem, demand, pytorch


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: r, ec, kera, opponent, model


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 3

Current computing Query: markov, descent, algebra, math, research


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: clojurescript, ai, analysis, background, science


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: optimisation, opencv, risk, chain, urban


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: maing, descent, science, clojurescript, knowledge


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 4

Current computing Query: schedule, gradient, airport, analysis, system


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: logistics, business, strategy, analytics, math


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: program, future, kera, ec, concept


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6
50.012 Networks                                                  0.33558
01.104 Networked Life                                            0.27537
40.319 Statistical and Machine Learning                          0.26261
01.117 Brain-Inspired Computing and its Applications (Term 8)    0.25574
40.305 Advanced Topics in Stochastic Modelling#                  0.25503
50.020 Network Security                                          0.22059
40.232 Water Resources Management                                0.21678
50.035 Computer Vision                                           0.20015
01.107 Urban Transportation                                      0.18570
50.039 Theory and Practice of Deep Learning                      0.17916
Name: bm25Score, dtype: float64
['50.007 Machine Learning', '50.037 Blockchain Technology', '40.302 Advanced Optim/ 40.305 Advanced Stochastic', '50.039 Theory and Practice of Deep Learning', '50.021 Artificial Intelligence', ' 50.035 Comput

  dist = 1.0 - uv / np.sqrt(uu * vv)


40.317 Financial Systems Design                    0.12985
40.240 Investment Science                          0.12949
50.038 Computational Data Science                  0.10132
40.305 Advanced Topics in Stochastic Modelling#    0.06816
01.116 AI for Healthcare (Term 7)                  0.04796
40.232 Water Resources Management                  0.04635
50.021 Artificial Intelligence                     0.04129
40.324 Fundamentals of Investing                   0.03432
40.316 Game Theory                                 0.03161
40.230 Sustainable Engineering                     0.02882
Name: bm25Score, dtype: float64
['50.043 Database Systems / Database and Big Data Systems (for class 2021)', '50.021 Artificial Intelligence', '50.037 Blockchain Technology', '50.039 Theory and Practice of Deep Learning', '50.007 Machine Learning', ' 50.035 Computer Vision', '50.038 Computational Data Science', '40.324 Fundamentals of Investing', '40.316 Game Theory', '40.321 Airport Systems Modelling and S

  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: logistics, analysis, operation, basic, r


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: esd, network, evaluate, program, r


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: operation, basic, evaluation, price, evaluate


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 14

Current computing Query: infrastructure, metric, pytorch, model, client


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: approach, logistics, shag, infrastructure, equity


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: data, sql, different, analytics, model


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 11

Current computing Query: computational, best, server, certain, ec


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: sklearn, metric, demand, schedule, fundamental


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: digitalisation, skill, long, technology, aviation


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: r, hidden, artificial, schedule, simulate


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 5

Current computing Query: optimisation, decision, risk, aws, jupyter


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: decentralizedapp, equity, math, ethereum, technology


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 6

Current computing Query: financial, value, metric, optimize, analysis


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: risk, certain, computational, approach, c


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: markov, supply, hidden, computational, payoff


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: finance, value, long, average, business


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: financial, spark, science, focus, search


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: supply, notebook, fundamental, know, basic


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 10

Current computing Query: r, linear, ethereum, c, machine


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 4

Current computing Query: science, future, problem, demand, pytorch


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: r, ec, kera, opponent, model


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 4

Current computing Query: markov, descent, algebra, math, research


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: clojurescript, ai, analysis, background, science


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: optimisation, opencv, risk, chain, urban


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 8

Current computing Query: maing, descent, science, clojurescript, knowledge


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 4

Current computing Query: schedule, gradient, airport, analysis, system


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7

Current computing Query: logistics, business, strategy, analytics, math


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 9

Current computing Query: program, future, kera, ec, concept


  dist = 1.0 - uv / np.sqrt(uu * vv)


Number of terms in corpus: 7
50.012 Networks                                                  0.33558
01.104 Networked Life                                            0.27537
01.117 Brain-Inspired Computing and its Applications (Term 8)    0.25574
50.020 Network Security                                          0.23720
40.305 Advanced Topics in Stochastic Modelling#                  0.23017
40.232 Water Resources Management                                0.21678
50.035 Computer Vision                                           0.19097
01.107 Urban Transportation                                      0.18570
40.323 Equity Valuation                                          0.18498
50.039 Theory and Practice of Deep Learning                      0.17775
Name: bm25Score, dtype: float64
['50.007 Machine Learning', '50.037 Blockchain Technology', '40.302 Advanced Optim/ 40.305 Advanced Stochastic', '50.039 Theory and Practice of Deep Learning', '50.021 Artificial Intelligence', ' 50.035 Comput

  dist = 1.0 - uv / np.sqrt(uu * vv)



For k is 1, IDCG scorce is 0.27154
For k is 1, NDCG scorce is 1.0

For k is 2, DCG scorce is 0.27154
For k is 2, IDCG scorce is 0.35039
For k is 2, NDCG scorce is 0.7749650389565913

For k is 3, DCG scorce is 0.27154
For k is 3, IDCG scorce is 0.390819978608859
For k is 3, NDCG scorce is 0.6947955960863584

For k is 4, DCG scorce is 0.310965
For k is 4, IDCG scorce is 0.390819978608859
For k is 4, NDCG scorce is 0.7956732434889682

For k is 5, DCG scorce is 0.33856275384134304
For k is 5, IDCG scorce is 0.390819978608859
For k is 5, NDCG scorce is 0.8662882461804337

For k is 6, DCG scorce is 0.33856275384134304
For k is 6, IDCG scorce is 0.390819978608859
For k is 6, NDCG scorce is 0.8662882461804337

For k is 7, DCG scorce is 0.33856275384134304
For k is 7, IDCG scorce is 0.390819978608859
For k is 7, NDCG scorce is 0.8662882461804337

For k is 8, DCG scorce is 0.33856275384134304
For k is 8, IDCG scorce is 0.390819978608859
For k is 8, NDCG scorce is 0.8662882461804337

For k is 9,