# PyFLAGR

In [2]:
# Import the PyFLAGR modules for rank aggregation
import pyflagr.Linear as Linear
import pyflagr.Majoritarian as Majoritarian
import pyflagr.MarkovChains as MarkovChains
import pyflagr.Kemeny as Kemeny
import pyflagr.RRA as RRA
import pyflagr.Weighted as Weighted

import numpy as np
import pandas as pd

In [3]:
voter1_preferences = pd.DataFrame({
    "query": ["Q1", "Q1", "Q1", "Q1"],
    "feature": ["A", "B", "C", "D"],
    "score": [4.0, 3.0, 2.0, 1.0]
})

voter2_preferences = pd.DataFrame({
    "query": ["Q1", "Q1", "Q1", "Q1"],
    "feature": ["A", "B", "C", "D"],
    "score": [1.0, 2.0, 3.0, 4.0]
})

voter3_preferences = pd.DataFrame({
    "query": ["Q1", "Q1", "Q1", "Q1"],
    "feature": ["A", "B", "C", "D"],
    "score": [2.0, 1.0, 4.0, 3.0]
})

testdata = pd.DataFrame({
    "Query": [],
    "Voter": [],
    "Item Code": [],
    "Item Score": [],
    "Algorithm/Dataset": []
})

for i in range(0, len(voter1_preferences)):
    testdata = pd.concat([testdata, pd.DataFrame({
        "Query": ["Q1"] * 3,
        "Voter": ["V1", "V2", "V3"],
        "Item Code": [voter1_preferences["feature"][i]] * 3,
        "Item Score": [voter1_preferences["score"][i], voter2_preferences["score"][i], voter3_preferences["score"][i]],
        "Algorithm/Dataset": ["test"] * 3
    })], ignore_index=True)

display(testdata)

Unnamed: 0,Query,Voter,Item Code,Item Score,Algorithm/Dataset
0,Q1,V1,A,4.0,test
1,Q1,V2,A,1.0,test
2,Q1,V3,A,2.0,test
3,Q1,V1,B,3.0,test
4,Q1,V2,B,2.0,test
5,Q1,V3,B,1.0,test
6,Q1,V1,C,2.0,test
7,Q1,V2,C,3.0,test
8,Q1,V3,C,4.0,test
9,Q1,V1,D,1.0,test


In [4]:
testdata.to_csv("out.csv", index=False, header=False)


In [5]:

testdata.columns = [None] * len(testdata.columns)
display(testdata)

Unnamed: 0,None,None.1,None.2,None.3,None.4
0,Q1,V1,A,4.0,test
1,Q1,V2,A,1.0,test
2,Q1,V3,A,2.0,test
3,Q1,V1,B,3.0,test
4,Q1,V2,B,2.0,test
5,Q1,V3,B,1.0,test
6,Q1,V1,C,2.0,test
7,Q1,V2,C,3.0,test
8,Q1,V3,C,4.0,test
9,Q1,V1,D,1.0,test


In [6]:
agg = Weighted.Agglomerative(c1=0.1, c2=0.2, eval_pts=7)

agg.aggregate(input_df=testdata)

(      Query Voter  ItemID      Score
 Q1  PyFLAGR     C       1   2.503846
 Q1  PyFLAGR     A       2   1.773077
 Q1  PyFLAGR     D       3   0.246154
 Q1  PyFLAGR     B       4  -0.484615
 Q1  PyFLAGR   NaN       5 -10.000000,
 Empty DataFrame
 Columns: []
 Index: [])

# ranx

In [7]:
from ranx import Run, fuse, evaluate, optimize_fusion

In [8]:
run1 = Run.from_df(voter1_preferences, q_id_col="query", doc_id_col="feature", score_col="score")
run2 = Run.from_df(voter2_preferences, q_id_col="query", doc_id_col="feature", score_col="score")
run3 = Run.from_df(voter3_preferences, q_id_col="query", doc_id_col="feature", score_col="score")

In [9]:
combined_run = fuse(
    runs=[run1, run2],  # A list of Run instances to fuse
    norm="min-max",       # The normalization strategy to apply before fusion
    method="w_condorcet",         # The fusion algorithm to use 
    params={"weights": [2, 1]}      # The weights to apply to the runs before fusion
)

In [10]:
combined_run.to_dataframe()

Unnamed: 0,q_id,doc_id,score
0,Q1,A,4.0
1,Q1,B,3.0
2,Q1,C,2.0
3,Q1,D,1.0
