In [1]:
%matplotlib notebook

import os, sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))
import database
from snowballing.operations import reload, work_by_varname, load_work_map_all_years, find_citation
from snowballing.strategies import Strategy, State
import custom_strategies
from functools import reduce
from matplotlib_venn import venn2, venn2_circles
from matplotlib import pyplot as plt
import pandas as pd
from collections import OrderedDict
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

custom_strategies.LIMIT_YEAR = 2015
reload()
# !pip install matplotlib-venn

In [2]:
selected = [(varname, w) for varname, w in load_work_map_all_years() if w.category == "snowball"]
names = [varname for varname, w in selected]

In [3]:

order = OrderedDict([
 ('waina2001a', 0),
 ('wilkie2007a', 1),
 ('becker2008a', 2),
 ('petersen2015a', 3),
 ('kaneko2011a', 4),
 ('barreto2010a', 5),
 ('trienekens2009a', 6),
 ('guzmán2010a', 7),
 ('basili2010a', 8),
 ('sommerville1999a', 9),
 ('wang2005a', 10),
 ('martins2008a', 11),
 ('plösch2011a', 12),
 ('albuquerque2009a', 13),
 ('reiblein1997a', 14),
 ('mandić2010a', 15),
 ('trienekens2005a', 16),
 ('esfahani2011a', 17),
 ('becker2008b', 18),
 ('oConnor2015a', 19),
 ('kautz2000a', 20),
 ('mandić2010b', 21),
])
id_to_varname = OrderedDict(sorted([
  (index, varname) for varname, index in order.items()
]))
selected = [(varname, work_by_varname(varname)) for index, varname in id_to_varname.items()]
names = ['{} S{}'.format(w.year, order[varname] + 1) for varname, w in selected]

In [6]:
matrix = [
    [varname] + ['-' if cited.year > citer.year else 
     'x' if find_citation(citer, cited) else ''
     for _, cited in selected]
     for varname, citer in selected
]
df = pd.DataFrame(matrix)
#df.set_index(names)
df.set_axis(0, names)
df.set_axis(1, ["id"] + names)
df = df.rename_axis("cited", axis="columns")
df = df.rename_axis("ref", axis="rows")
def highlight_max(s):
    return [
        'background-color: grey' if k == s.name else
        'background-color: green' if v == 'x' else ''
        for k, v in s.iteritems()
    ]
df_style = df.style.apply(highlight_max).set_properties(**{'text-align': 'center'}).set_table_styles([
    dict(selector="th", props=[("text-align", "center")]),
])
df_style

cited,id,2001 S1,2007 S2,2008 S3,2015 S4,2011 S5,2010 S6,2009 S7,2010 S8,2010 S9,1999 S10,2005 S11,2008 S12,2011 S13,2009 S14,1997 S15,2010 S16,2005 S17,2011 S18,2008 S19,2015 S20,2000 S21,2010 S22
ref,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2001 S1,waina2001a,,-,-,-,-,-,-,-,-,,-,-,-,-,,-,-,-,-,-,,-
2007 S2,wilkie2007a,,,-,-,-,-,-,-,-,,,-,-,-,,-,,-,-,-,,-
2008 S3,becker2008a,,,,-,-,-,-,-,-,,,,-,-,,-,,-,,-,,-
2015 S4,petersen2015a,,,,,x,,,,x,,,,,,,,,,,,,x
2011 S5,kaneko2011a,,,,-,,,,,,,,,,,,,,,,-,,
2010 S6,barreto2010a,,,,-,-,,,,,,,,-,,,,,-,x,-,,
2009 S7,trienekens2009a,,,,-,-,-,,-,-,,,,-,,,-,,-,,-,,-
2010 S8,guzmán2010a,,,,-,-,,x,,x,,,,-,,,,,-,,-,,
2010 S9,basili2010a,,,,-,-,,,,,,,,-,,,,,-,,-,,
1999 S10,sommerville1999a,-,-,-,-,-,-,-,-,-,,-,-,-,-,,-,-,-,-,-,-,-


In [27]:
# Exemplo: No backward, 2010 S8	guzmán2010a encontra S7 e S9. E no forward, S5 (kaneko2011a) cita (2015 S4 petersen2015a)

In [16]:
with open("output/table.html", "wb") as html:
    html.write(df_style.render().encode("utf-8"))

In [None]:
!pip install pyside


In [26]:
strategy = Strategy({})
matrix = [[
    "S", "varname",
    "total backward", "selected backward",
    "total forward", "selected forward",
    "backward list", "selected backward list",
    "forward list", "selected forward list"
]]
for varname, index in order.items():
    work = work_by_varname(varname)
    backward = strategy.ref[work]
    selected_backward = [x for x in backward if x.category == "snowball"]
    forward = strategy.rev_ref[work]
    selected_forward = [x for x in forward if x.category == "snowball"]
    row = [
        "S{}".format(index + 1), varname,
        len(backward), len(selected_backward),
        len(forward), len(selected_forward),
        ", ".join(x.metakey for x in backward),
        ", ".join(x.metakey for x in selected_backward),
        ", ".join(x.metakey for x in forward),
        ", ".join(x.metakey for x in selected_forward),
    ]
    
    matrix.append(row)
pd.set_option('display.max_colwidth',50)
#pd.set_option('display.max_colwidth',1000)
df = pd.DataFrame(matrix)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,S,varname,total backward,selected backward,total forward,selected forward,backward list,selected backward list,forward list,selected forward list
1,S1,waina2001a,26,0,2,0,"whitney1994a, dunaway1996a, dedolph1994a, team...",,"dounos2010a, ye2010a",
2,S2,wilkie2007a,20,0,10,0,"anacleto2004a, anacleto2004b, chrissis2003a, c...",,"napier2009a, lester2010a, laporte2009a, zarour...",
3,S3,becker2008a,24,0,1,0,"akao1990a, biró1999a, brodman1995a, chrissis20...",,spósito2016a,
4,S4,petersen2015a,57,3,11,0,"ali2012a, ardimento2006a, baca2013a, basili200...","basili2010a, kaneko2011a, mandić2010b","petersen2014a, tahir2016a, trinkenreich2017a, ...",
5,S5,kaneko2011a,11,0,15,1,"kathuria2007a, basili2007b, basili2009a, basil...",,"kobori2014a, aoki2016a, kobori2016a, razón2014...",petersen2015a
6,S6,barreto2010a,22,1,10,0,"conradi2002a, becker2008b, softex2010a, rocha2...",becker2008b,"lepmets2012a, barcellos2010a, barreto2010b, va...",
7,S7,trienekens2009a,13,0,22,1,"balla2001a, boldyreff1997a, boltzmann2000a, ha...",,"guzmán2010a, colomo2011a, haigh2010a, lee2013a...",guzmán2010a
8,S8,guzmán2010a,45,2,21,0,"basili2010a, asgarkhani2006a, basili2009b, nee...","basili2010a, trienekens2009a","lepmets2012a, becker2012a, pryor2011a, vasconc...",
9,S9,basili2010a,11,0,142,4,"basili1994a, basili2007a, kaplan1992a, usddusa...",,"wallace2014a, paternoster2014a, novais2013a, l...","guzmán2010a, mandić2010a, mandić2010b, peterse..."
