In [None]:
#!/usr/bin/env python
# coding: utf-8
from __future__ import print_function

__doc__="""
Added jaccard similarity function
Settable list of tags and list of document numbers
doc0 set to 0 set to a given input file
Compute cosine similarity between all pairs of documents that share a given tag or is doc 0, and cos_sim of corresponding (tag set pairs)
Graph cos_sim(target doc, doc_no) vs doc no  and  cos_sim(tag label set(target doc), tagset(doc_no)) vs doc_no for each target tag / target doc graph pair
Graph corresponding cos(doc(x)) vs cos(tagset(x))
Output directory set to ..SIMILARITY_PLOTS/CC_PLOTS6_2
Computes mean and variance of cosine similarity for each target document/target tag graph pair.
Produces cc_statlog6_2_{start time}.csv & console log
"""

print(__doc__)

__source__ = 'cc_combined6_3.py'
__author__ = "Frank J. Greco"
__copyright__ = "Copyright 2015-2018, Frank J. Greco"
__credits__ = []
__license__ = "Apache"
__version__ = "1.0.1"
__email__ = ""
__status__ = "Development"

In [None]:
%load_ext autoreload
%autoreload 2
#%reload_ext autoreload
import sys
sys.path.append('/Users/fjgreco/Dev-Atlas')

In [4]:
import os
import sys
import collections

import re

import logging
import datetime

from bokeh.plotting import figure, output_file, show
from bokeh.layouts import row
from bokeh.io import save
from bokeh.models import ColumnDataSource, Range1d, LabelSet, Label

import statistics

from scipy.stats.stats import pearsonr
from scipy.stats.stats import spearmanr

import numpy as numpy

import pandas as pd

from cc_metrics import calculate_similarities

from preprocess import create_tag_dict
from preprocess import create_tag_sub_dict
from preprocess import create_text_dict_l

from bkcharts import HeatMap, show, output_file
from bokeh.palettes import RdGy11 as palette  # @UnresolvedImport
from bokeh.models import HoverTool
from bokeh.models import CrosshairTool

#
# Plot Results
#
def heatmap3(c3,title="Cosine Similarity",output_filename='cossim_heatmap.html'):


    crosshair = CrosshairTool()

    hover = HoverTool(tooltips=[
        ("index", "$index"),
        ("(x,y)", "(@x,@y)"),
        ("score", "@values")
    ])

    score = []

    nba = pd.DataFrame(c3)

    for x in nba.apply(tuple):
        score.extend(x)

    #print("\nscore({}):{}".format(title,score))

    data = {
        'transcript-x': list(nba.index) * len(nba.columns),
        'transcript-y': [item for item in list(nba.columns) for i in range(len(nba.index))],
        'score': score
    }

    output_file(output_filename)

    hm = HeatMap(data, x='transcript-x', y='transcript-y', values='score', title=title, stat=None, tools=[hover,crosshair], palette=palette)

    #show(hm)
    return hm

def heatmap4(title="Test pattern",output_filename='test_heatmap2.html'):

    crosshair = CrosshairTool()

    hover = HoverTool(tooltips=[
        ("index", "$index"),
        ("(x,y)", "(@x,@y)"),
        ("score", "@values")
    ])

    score = []

    c3=[[1,0,0],[0,1,0],[0,0,1]]

    nba = pd.DataFrame(c3)

    for x in nba.apply(tuple):
        score.extend(x)

    #print("\nscore({}):{}".format(title,score))

    data = {
        'Index': list(nba.index) * len(nba.columns),
        'Cosine': [item for item in list(nba.columns) for i in range(len(nba.index))],
        'score': score
    }

    output_file(output_filename)
    hm = HeatMap(data, x='Index', y='Cosine', values='score', title=title, stat=None, tools=[hover,crosshair], palette=palette)

    #show(hm)
    return hm

#
# Stat Results
#
def stat_results(doc_ids,
                 doc_no,
                 target_tag,
                 cosine_scores_text,
                 cosine_scores_tags,
                 jaccard_scores_text,
                 jaccard_scores_tags,
                 statlog,
                 cos_means,
                 jaccard_means):


    print("doc_no:", doc_no)
    doc_key = doc_ids[doc_no]
    print("doc_key:", doc_key)

    # Plot p1
    x1 = range(len(doc_ids))
    y1 = cosine_scores_text[doc_no]

    x2 = range(len(doc_ids))
    # y2 = cosine_scores_tags[doc_no]
    y2 = jaccard_scores_tags[doc_no]

    # Plot p2
    x3 = cosine_scores_text[doc_no]
    y3 = jaccard_scores_text[doc_no]

    # Plot p3
    x4 = doc_ids
    y4 = jaccard_scores_tags[doc_no]

    a = []

    index = 0

    for item in y1:
        index += 1
        if index != doc_no + 1:
            a.append(item)

    statlog.write(','.join(['doc-' + str(doc_key), target_tag, str(statistics.mean(a)), str(statistics.variance(a))]))

    cos_means[target_tag].append(statistics.mean(a))

    a2 = []
    index = 0

    for item in y4:
        index += 1
        if index != doc_no + 1:
            a2.append(item)

    statlog.write(',')

    statlog.write(','.join([str(statistics.mean(a2)), str(statistics.variance(a2))]))

    jaccard_means[target_tag].append(statistics.mean(a2))

    try:
        pearson=pearsonr(x3, y2)
        spearman=spearmanr(x3, y2)
        print("Pearsonr Result: ", pearson)
        print(spearman)
        statlog.write(','.join(["pearson:", str(pearson[0]), str(pearson[1]), str(spearmanr(x3, y2))]))

    except:
        print("Error Summary Stats(1)", sys.exc_info())
        pass

    try:
        print("Correlation Coefficients:\n", numpy.corrcoef(x3, y3))
    except:
        print("Error: correlation coefficients:", sys.exc_info())
        pass

    statlog.write('\n')

    return

#
# Plot Results
#
def plot_results(doc_ids,
                 doc_no,
                 target_tag,
                 cosine_scores_text,
                 cosine_scores_tags,
                 jaccard_scores_text,
                 jaccard_scores_tags,
                 plot_dir):

    print("doc_no:",doc_no)
    doc_key=doc_ids[doc_no]
    print("doc_key:",doc_key)

    # Plot p1
    x1 = range(len(doc_ids))
    y1 = cosine_scores_text[doc_no]

    x2 = range(len(doc_ids))
    #y2 = cosine_scores_tags[doc_no]
    y2 = jaccard_scores_tags[doc_no]

    # Plot p2
    x3 = cosine_scores_text[doc_no]
    y3 = jaccard_scores_text[doc_no]

    # Plot p3
    x4 = doc_ids
    y4 = jaccard_scores_tags[doc_no]

    #########################################
    # create  plot 1
    #########################################
    a=[]

    index=0

    for item in y1:
        index += 1
        if index != doc_no+1:
            a.append(item)

    stats= "n: " + str(len(a)) + " mean: " + str(statistics.mean(a)) +  ' variance: ' + str(statistics.variance(a))

    citation = Label(x=100, y=400, x_units='screen', y_units='screen',
                     text=stats, render_mode='css',
                     border_line_color='black', border_line_alpha=1.0,
                     background_fill_color='white', background_fill_alpha=1.0)

    p1 = figure(title="Cosine Similarity between Doc Index: " + str(doc_no)+' CaseId: '+ str(doc_key) + " and each case tagged with:" + target_tag,
                x_axis_label='Doc Index', y_axis_label='Cosine(Blue)/Jaccard(Red)')

    p1.add_layout(citation)

    p1.circle(x1, y1, color="blue", size=5, legend="text")

    p1.circle(x2, y2, color="red", size=2, legend="tags")

    #########################################
    # create  plot 2
    #########################################

    p2 = figure(title="COS similarity vs Jaccard similarity; CaseId " + str(doc_key), x_axis_label='COS metric', y_axis_label='Jaccard metric')

    p2.circle(x3, y3, color="green")

    #########################################
    # create  plot 3
    #########################################

    a3=[]
    index=0

    for item in y4:
        index += 1
        if index != doc_no+1:
            a3.append(item)

    stats3= "n: " + str(len(a3)) + " mean: " + str(statistics.mean(a3)) +  ' variance: ' + str(statistics.variance(a3))

    citation = Label(x=100, y=400, x_units='screen', y_units='screen',
                     text=stats3, render_mode='css',
                     border_line_color='black', border_line_alpha=1.0,
                     background_fill_color='white', background_fill_alpha=1.0)


    p3 = figure(title="Jaccard similarity between Doc Index: " + str(doc_no)+' CaseId: '+ str(doc_key) + " and each case tagged with:" + target_tag,
                x_axis_label='Case ID', y_axis_label='Jaccard Metric')
    p3.add_layout(citation)

    p3.circle(x4, y4, color="blue")

    ##########################################################################
    # output to HTML file
    ##########################################################################
    output_file(plot_dir+"/cc_cluster_"+target_tag+"_doc_"+str(doc_key)+".html")

    save(row(p1,p2,p3))

    return


Added jaccard similarity function
Settable list of tags and list of document numbers
doc0 set to 0 set to a given input file
Compute cosine similarity between all pairs of documents that share a given tag or is doc 0, and cos_sim of corresponding (tag set pairs)
Graph cos_sim(target doc, doc_no) vs doc no  and  cos_sim(tag label set(target doc), tagset(doc_no)) vs doc_no for each target tag / target doc graph pair
Graph corresponding cos(doc(x)) vs cos(tagset(x))
Output directory set to ..SIMILARITY_PLOTS/CC_PLOTS6_2
Computes mean and variance of cosine similarity for each target document/target tag graph pair.
Produces cc_statlog6_2_{start time}.csv & console log



In [6]:
def main():

    global transcript_dir

    cos_means = dict()

    jaccard_means = dict()

    statlog = open(statlog_filename, 'w')
    statlog.write(','.join(
        ['doc_key', 'target_tag', 'cs-mean', 'cs-variance', "js_mean", "js_variance"]))
    statlog.write('\n')

    tag_dict=create_tag_dict(xls_pathname,v=True)


    #
    # Analyze tags and text
    #

    for target_tag in target_tags:

        cos_means[target_tag]=[]

        jaccard_means[target_tag]=[]

        tag_sub_dict = create_tag_sub_dict(tag_dict, target_tag)

        train_sub_dict= create_text_dict_l(transcript_dir, tag_sub_dict=tag_sub_dict)

        tag_train_set=[]

        text_train_set=[]

        for x in train_sub_dict.keys():
            tag_train_set.append(train_sub_dict[x][1])
            text_train_set.append(train_sub_dict[x][0])

        #print ('\ntag_train_set',tag_train_set)
        #print('\ntext_train_set', text_train_set)

        print ("\nCalculate tag_train_set similarities:")
        cosine_scores_tags, jaccard_scores_tags = calculate_similarities(tag_train_set)

        print("\nCalculate text_train_set similarities:")
        cosine_scores_text, jaccard_scores_text = calculate_similarities(text_train_set)


        heatmap_fn='heatmap_'+target_tag+'.html'
        
        print (heatmap_fn)

        h1=heatmap3(cosine_scores_text,title='cosine_scores_text: '+ target_tag)

        h2=heatmap3(jaccard_scores_tags,title='jaccard_scores_tags: '+ target_tag)

        #h3=heatmap4()

        save(row(h1,h2))

        #save(row(h3))

        print("\ndoc_ids:",train_sub_dict.keys())

        for doc_no in range(len(train_sub_dict.keys())):

            print('\nmain: doc_no:', doc_no, 'target_tag', target_tag)

            try:

                stat_results(train_sub_dict.keys(),
                             doc_no,
                             target_tag,
                             cosine_scores_text,
                             cosine_scores_tags,
                             jaccard_scores_text,
                             jaccard_scores_tags,
                             statlog,
                             cos_means,
                             jaccard_means)

            except:
                print("main: stat_results error:", "target_tag", target_tag, "doc_no", doc_no,sys.exc_info())
                pass


            try:

                plot_results(train_sub_dict.keys(),
                             doc_no,
                             target_tag,
                             cosine_scores_text,
                             cosine_scores_tags,
                             jaccard_scores_text,
                             jaccard_scores_tags,
                             plot_dir)

            except:
                print("main: plot_results error:", "target_tag", target_tag, "doc_no", doc_no,sys.exc_info())
                pass

    #
    # Test for correlation
    #
    sim_js=[]

    sim_cs=[]

    for item in sorted(cos_means.keys()):
        try:
            statlog.write(','.join(["Target tag:",item, "n:",str(len(cos_means[item]))]))
            statlog.write(',')
            mean_cos_means=statistics.mean(cos_means[item])
            statlog.write(
                ','.join(["Mean of Cosine Similarity means:  ", str(mean_cos_means), ' variance: ',
                             str(statistics.variance(cos_means[item]))]))
            statlog.write(',')
            mean_jaccard_means=statistics.mean(jaccard_means[item])
            statlog.write(
                ','.join(["Mean of Jaccard Similarity means: ", str(mean_jaccard_means), ' variance: ',
                          str(statistics.variance(jaccard_means[item]))]))

            statlog.write('\n')
            sim_cs.append(mean_cos_means)
            sim_js.append(mean_jaccard_means)

        except:
            print ("main:", "error printing summary stats", "item:", item, sys.exc_info())
            statlog.write('\n')
            pass

    #
    # Print Correlation Statistics for full run
    #
    if (len(sim_cs)>1):

        print ("\nsim_cs:",sim_cs)
        statlog.write(','.join(["sim_cs:",str(sim_cs)]))
        statlog.write('\n')

        print("\nsim_js",sim_js)
        statlog.write(','.join(["sim_js:", str(sim_js)]))
        statlog.write('\n')

        print  ("\nPearsonr RESULT: ", pearsonr(sim_cs, sim_js))
        statlog.write(','.join(["Pearsonr RESULT:", str(pearsonr(sim_cs, sim_js))]))
        statlog.write('\n')

        print ("\nSpearmanr RESULT:",spearmanr(sim_cs, sim_js))
        statlog.write(','.join(["Spearmanr RESULT:",str(spearmanr(sim_cs, sim_js))]))
        statlog.write('\n')

        print ("\nCorrelation Coefficients\n",numpy.corrcoef(sim_cs, sim_js))


    et = 'End time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())
    print (et)

    statlog.close()

In [7]:
if __name__ == "__main__":

    st = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())

    print("cc_combined6_3: Start time:", st)

    statlog_filename = "../SIMILARITY_PLOTS/cc_combined6_3_statlog_" + st + '.csv'
    print ('statlog:',statlog_filename)

    xls_pathname = '../CC_FILES_TAGLIST/TW Case List.xlsx'
    print ('Case List',xls_pathname)

    transcript_dir = '../CC_TRANSCRIPTS3'
    print ('transcript_dir',transcript_dir)

    #target_tags = ['TW01', 'TW02', 'TW03', 'TW04', 'TW05', 'TW06', 'TW07', 'TW08', 'TW09', 'TW10', 'TW11', 'TW12',
    # 'TW13', 'TW14', 'TW15', 'TW16', 'TW17', 'TW18', 'TW19', 'TW20', 'TW21', 'TW22', 'TW23', 'TW24']
    #target_tags = ['all']
    target_tags = ['TW02','TW05']
    print ("target tags:", target_tags)

    plot_dir = '../SIMILARITY_PLOTS/CC_PLOTS6_3_'+st
    print ("plot_dir",plot_dir)

    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)

    main()

cc_combined6_3: Start time: 2018-03-24 09:26:02
statlog: ../SIMILARITY_PLOTS/cc_combined6_3_statlog_2018-03-24 09:26:02.csv
Case List ../CC_FILES_TAGLIST/TW Case List.xlsx
transcript_dir ../CC_TRANSCRIPTS3
target tags: ['TW02', 'TW05']
plot_dir ../SIMILARITY_PLOTS/CC_PLOTS6_3_2018-03-24 09:26:02

*** Begin process_worksheet ***

[u'CASE IDS', u'Sheet1', u'caseannotation-20170613T220819']

preprocess_worksheet: len(tag_dict.keys()):225
1 TW08,TW24,TW06,TW15
2 TW05,TW13,TW10,TW09,TW02
3 TW24TW06,TW13,TW01,TW17,TW15,TW08
4 TW24,TW13,TW01,TW17,TW13,TW01
5 TW24,TW24,TW24,TW15,TW16,TW01
6 TW24,TW06,TW08,TW05,TW16,TW03
9 TW24,TW24,TW15,TW19,TW11,TW05,TW02,TW18,TW11,TW13,TW19,TW16,TW19,TW17,TW11,TW01,TW04
10 TW16,TW17,TW13,TW01,TW11,TW18,TW02
12 TW24TW06,TW05,TW02,TW06,TW17,TW01
14 TW05,TW24,TW24,TW06,TW16,TW16,TW19,TW02,TW02,TW01
20 TW08
21 TW24,TW05,TW09,TW02
22 TW11,TW08,TW02
24 TW24,TW17,TW04,TW03,TW06,TW11,TW06,TW06,TW01
26 TW24,TW24,TW09,TW02,TW09,TW02
27 TW24,TW05,TW08,TW02
28 TW24TW17,

analyze_text: #keys: 131
['2', '9', '10', '12', '14', '21', '22', '26', '27', '29', '33', '34', '42', '43', '45', '48', '51', '53', '61', '64', '66', '68', '74', '75', '76', '86', '90', '94', '96', '99', '102', '104', '106', '107', '112', '115', '118', '120', '122', '123', '128', '129', '134', '137', '142', '146', '148', '150', '151', '153', '157', '158', '160', '161', '163', '164', '165', '166', '167', '171', '173', '177', '188', '191', '192', '194', '195', '207', '213', '214', '216', '217', '218', '221', '243', '244', '261', '268', '274', '280', '285', '289', '293', '297', '302', '448', '459', '460', '590', '595', '597', '603', '606', '608', '610', '611', '616', '621', '796', '797', '812', '813', '825', '826', '828', '837', '839', '840', '845', '846', '981', '990', '996', '1008', '1015', '1080', '1360', '1362', '1363', '1364', '1366', '1372', '1373', '1378', '1379', '1835', '1837', '1838', '1839', '1846', '1857']

Calculate tag_train_set similarities:

jaccard: len(m): 131

Calculate


main: doc_no: 22 target_tag TW02
doc_no: 22
doc_key: 74
Pearsonr Result:  (0.27018296702529115, 0.0018023500558797885)
SpearmanrResult(correlation=0.06843817021347953, pvalue=0.43732718213295296)
Correlation Coefficients:
 [[1.         0.29549792]
 [0.29549792 1.        ]]
doc_no: 22
doc_key: 74

main: doc_no: 23 target_tag TW02
doc_no: 23
doc_key: 75
Pearsonr Result:  (0.18279811194645795, 0.036634846251779825)
SpearmanrResult(correlation=0.11349329907556957, pvalue=0.19680246434514787)
Correlation Coefficients:
 [[1.         0.55926309]
 [0.55926309 1.        ]]
doc_no: 23
doc_key: 75

main: doc_no: 24 target_tag TW02
doc_no: 24
doc_key: 76
Pearsonr Result:  (0.26936153360157783, 0.001863609749263582)
SpearmanrResult(correlation=0.14057123106082445, pvalue=0.10927647131940525)
Correlation Coefficients:
 [[1.         0.56010353]
 [0.56010353 1.        ]]
doc_no: 24
doc_key: 76

main: doc_no: 25 target_tag TW02
doc_no: 25
doc_key: 86
Pearsonr Result:  (0.13642214860271193, 0.120248513


main: doc_no: 50 target_tag TW02
doc_no: 50
doc_key: 157
Pearsonr Result:  (0.1998486943162053, 0.022103272720330743)
SpearmanrResult(correlation=0.08720984410393388, pvalue=0.32193740247882513)
Correlation Coefficients:
 [[1.         0.38899664]
 [0.38899664 1.        ]]
doc_no: 50
doc_key: 157

main: doc_no: 51 target_tag TW02
doc_no: 51
doc_key: 158
Pearsonr Result:  (0.13526825317756, 0.12344662079615555)
SpearmanrResult(correlation=0.022213640984961796, pvalue=0.8011639013756218)
Correlation Coefficients:
 [[1.         0.37847621]
 [0.37847621 1.        ]]
doc_no: 51
doc_key: 158

main: doc_no: 52 target_tag TW02
doc_no: 52
doc_key: 160
Pearsonr Result:  (0.011635546135330382, 0.8950610117295443)
SpearmanrResult(correlation=-0.06083338001189952, pvalue=0.4900455591799826)
Correlation Coefficients:
 [[1.        0.5407323]
 [0.5407323 1.       ]]
doc_no: 52
doc_key: 160

main: doc_no: 53 target_tag TW02
doc_no: 53
doc_key: 161
Pearsonr Result:  (0.09166914941446293, 0.2977177421286


main: doc_no: 78 target_tag TW02
doc_no: 78
doc_key: 274
Pearsonr Result:  (0.17051504390185826, 0.05150905856915706)
SpearmanrResult(correlation=0.004239434276258965, pvalue=0.9616702502368987)
Correlation Coefficients:
 [[1.         0.40380529]
 [0.40380529 1.        ]]
doc_no: 78
doc_key: 274

main: doc_no: 79 target_tag TW02
doc_no: 79
doc_key: 280
Pearsonr Result:  (0.2982475456936069, 0.0005402286731802025)
SpearmanrResult(correlation=0.12024529966408473, pvalue=0.17129665184399442)
Correlation Coefficients:
 [[1.       0.558432]
 [0.558432 1.      ]]
doc_no: 79
doc_key: 280

main: doc_no: 80 target_tag TW02
doc_no: 80
doc_key: 285
Pearsonr Result:  (0.19339806609625823, 0.026879273182183146)
SpearmanrResult(correlation=0.08191779801970885, pvalue=0.3522827899098071)
Correlation Coefficients:
 [[1.         0.15579745]
 [0.15579745 1.        ]]
doc_no: 80
doc_key: 285

main: doc_no: 81 target_tag TW02
doc_no: 81
doc_key: 289
Pearsonr Result:  (0.07808212829766563, 0.3753589485273


main: doc_no: 106 target_tag TW02
doc_no: 106
doc_key: 839
Pearsonr Result:  (0.07784649202554009, 0.37680603548475844)
SpearmanrResult(correlation=-0.08561941223225025, pvalue=0.3308745533302363)
Correlation Coefficients:
 [[1.         0.15836832]
 [0.15836832 1.        ]]
doc_no: 106
doc_key: 839

main: doc_no: 107 target_tag TW02
doc_no: 107
doc_key: 840
Pearsonr Result:  (0.1692298599385454, 0.05331982030419483)
SpearmanrResult(correlation=0.044088371473937564, pvalue=0.6170599889051985)
Correlation Coefficients:
 [[1.         0.25534129]
 [0.25534129 1.        ]]
doc_no: 107
doc_key: 840

main: doc_no: 108 target_tag TW02
doc_no: 108
doc_key: 845
Pearsonr Result:  (0.09160616025782489, 0.2980512442563379)
SpearmanrResult(correlation=-0.03670105498977951, pvalue=0.6772804164409756)
Correlation Coefficients:
 [[1.         0.18285968]
 [0.18285968 1.        ]]
doc_no: 108
doc_key: 845

main: doc_no: 109 target_tag TW02
doc_no: 109
doc_key: 846
Pearsonr Result:  (0.1351254051335063, 


jaccard: len(m): 71
heatmap_TW05.html

doc_ids: ['2', '6', '9', '12', '14', '21', '27', '28', '29', '45', '61', '68', '75', '86', '90', '94', '96', '98', '102', '104', '106', '107', '120', '128', '129', '134', '137', '146', '148', '150', '151', '158', '163', '165', '166', '167', '177', '188', '194', '207', '212', '213', '214', '221', '243', '274', '285', '289', '293', '302', '448', '459', '590', '595', '597', '603', '608', '618', '812', '828', '839', '840', '845', '1008', '1033', '1360', '1364', '1382', '1837', '1838', '1846']

main: doc_no: 0 target_tag TW05
doc_no: 0
doc_key: 2
Pearsonr Result:  (0.2802839385847468, 0.017910656023090964)
SpearmanrResult(correlation=0.1972738736346698, pvalue=0.09914705396132813)
Correlation Coefficients:
 [[1.         0.58569261]
 [0.58569261 1.        ]]
doc_no: 0
doc_key: 2

main: doc_no: 1 target_tag TW05
doc_no: 1
doc_key: 6
Pearsonr Result:  (0.0320172761521341, 0.7909619524526779)
SpearmanrResult(correlation=-0.06274888270767676, pvalue=0.6031


main: doc_no: 27 target_tag TW05
doc_no: 27
doc_key: 146
Pearsonr Result:  (0.08037736427813033, 0.5052005302109757)
SpearmanrResult(correlation=0.09457446682456329, pvalue=0.4327383408304175)
Correlation Coefficients:
 [[1.         0.67953789]
 [0.67953789 1.        ]]
doc_no: 27
doc_key: 146

main: doc_no: 28 target_tag TW05
doc_no: 28
doc_key: 148
Pearsonr Result:  (0.20087664595922436, 0.09300754571805701)
SpearmanrResult(correlation=0.08888209422652514, pvalue=0.461060965178531)
Correlation Coefficients:
 [[1.         0.65856769]
 [0.65856769 1.        ]]
doc_no: 28
doc_key: 148

main: doc_no: 29 target_tag TW05
doc_no: 29
doc_key: 150
Pearsonr Result:  (0.15129980658410233, 0.20784581881766706)
SpearmanrResult(correlation=0.05730039769746545, pvalue=0.6350422058637937)
Correlation Coefficients:
 [[1.         0.35757052]
 [0.35757052 1.        ]]
doc_no: 29
doc_key: 150

main: doc_no: 30 target_tag TW05
doc_no: 30
doc_key: 151
Pearsonr Result:  (0.13488302117910875, 0.26207292972


main: doc_no: 55 target_tag TW05
doc_no: 55
doc_key: 603
Pearsonr Result:  (0.388728086354962, 0.0008077079452066083)
SpearmanrResult(correlation=0.0897713275366607, pvalue=0.45657091117017923)
Correlation Coefficients:
 [[1.        0.5642005]
 [0.5642005 1.       ]]
doc_no: 55
doc_key: 603

main: doc_no: 56 target_tag TW05
doc_no: 56
doc_key: 608
Pearsonr Result:  (0.4128855681267478, 0.0003457786785989412)
SpearmanrResult(correlation=0.22954207634301624, pvalue=0.05415164741376232)
Correlation Coefficients:
 [[1.         0.67232764]
 [0.67232764 1.        ]]
doc_no: 56
doc_key: 608

main: doc_no: 57 target_tag TW05
doc_no: 57
doc_key: 618
Pearsonr Result:  (0.42801657624532086, 0.00019656355691307584)
SpearmanrResult(correlation=0.186980774981326, pvalue=0.118437060498846)
Correlation Coefficients:
 [[1.         0.62331627]
 [0.62331627 1.        ]]
doc_no: 57
doc_key: 618

main: doc_no: 58 target_tag TW05
doc_no: 58
doc_key: 812
Pearsonr Result:  (0.2963877130847304, 0.012081789166