In [12]:
import pandas as pd

data = pd.read_csv('ratp.csv')
display(data)

Unnamed: 0,peak-entering-passengers/h,peak-passing-passengers/h,off-peak-entering-passengers/h,off-peak-passing-passengers/h,"strategic priority [0,10]","Station degradation level ([0,20] scale)","connectivity index [0,100]",name
0,85000,8100,35500,3450,75,16.2,88,Odéon (Ligne 4)
1,81000,8100,37500,3150,67,17.6,95,Place d'Italie (Lign 6)
2,74000,8900,37000,4050,68,16.8,79,Jussieu (Ligne 7)
3,74000,7100,42000,4550,77,15.2,73,Nation (Ligne 9)
4,72000,7500,33000,4250,88,13.2,93,La Motte Picquet-Grenelle (Ligne 10)
5,71000,7300,31500,4600,76,15.8,93,Porte d'Orléans (Ligne 4)
6,79000,6900,39000,3800,67,16.8,79,Daumenil (Ligne 6)
7,57000,7600,40500,3800,82,17.2,77,Vaugirard (Ligne 12)
8,84000,7900,34000,3300,74,15.8,85,Oberkampf (Ligne 9)
9,72000,8700,36000,4000,66,16.6,78,Reuilly-Diderot (Ligne 1)


In [13]:
WEIGHTS = [0.021, 0.188, 0.038, 0.322, 16.124, 67.183, 16.124]

def score(row):
    s = 0
    for i, w in enumerate(WEIGHTS):
        s += row.iloc[i] * w
    return s

data['score'] = data.apply(lambda row: score(row), axis=1)

# sort by score descending
data = data.sort_values(by='score', ascending=False)
display(data[['score'] + list(data.columns[:-1])])


Unnamed: 0,score,peak-entering-passengers/h,peak-passing-passengers/h,off-peak-entering-passengers/h,off-peak-passing-passengers/h,"strategic priority [0,10]","Station degradation level ([0,20] scale)","connectivity index [0,100]",name
0,9484.2766,85000,8100,35500,3450,75,16.2,88,Odéon (Ligne 4)
1,9457.6088,81000,8100,37500,3150,67,17.6,95,Place d'Italie (Lign 6)
2,9436.2024,74000,8900,37000,4050,68,16.8,79,Jussieu (Ligne 7)
3,9389.6816,74000,7100,42000,4550,77,15.2,73,Nation (Ligne 9)
4,9349.7596,72000,7500,33000,4250,88,13.2,93,La Motte Picquet-Grenelle (Ligne 10)
5,9328.0474,71000,7300,31500,4600,76,15.8,93,Porte d'Orléans (Ligne 4)
9,9240.6938,72000,8700,36000,4000,66,16.6,78,Reuilly-Diderot (Ligne 1)
8,9229.0074,84000,7900,34000,3300,74,15.8,85,Oberkampf (Ligne 9)
6,9144.5784,79000,6900,39000,3800,67,16.8,79,Daumenil (Ligne 6)
7,9107.6636,57000,7600,40500,3800,82,17.2,77,Vaugirard (Ligne 12)


In [None]:
def get_record(data, index):
    return data.iloc[index].tolist()[:-2]

def get_two_consecutive_records(data):
    for i in range(len(data) - 1):
        yield data.iloc[i].tolist()[:-2], data.iloc[i + 1].tolist()[:-2]    

# map: record(tuple) -> name
record_dict = {}
for i in range(len(data)):
    record = get_record(data, i)
    record_dict[tuple(record)] = data.iloc[i]['name']

In [11]:
ONE_TO_MANY_EXPLAINED = 0
MANY_TO_ONE_EXPLAINED = 0
COMBINED_EXPLAINED = 0

EXPLANATION_TYPES = ["1-m", "m-1", ["1-m", "m-1"]]

INFEASIBLE_RECORDS = []


from script import find_explanation

for record1, record2 in get_two_consecutive_records(data):

    for explanation_type in EXPLANATION_TYPES:
        explanation = find_explanation(record1, record2, explanation_type, verbose=False)
        if explanation_type == "1-m" and explanation["status"] != "infeasible":
            ONE_TO_MANY_EXPLAINED += 1
        elif explanation_type == "m-1" and explanation["status"] != "infeasible":
            MANY_TO_ONE_EXPLAINED += 1
        elif explanation_type == ["1-m", "m-1"] and explanation["status"] != "infeasible":
            COMBINED_EXPLAINED += 1
        else:
            INFEASIBLE_RECORDS.append((record1, record2, explanation_type))


print(f"One-to-Many Explained: {ONE_TO_MANY_EXPLAINED / 10 * 100}%")
print(f"Many-to-One Explained: {MANY_TO_ONE_EXPLAINED / 10 * 100}%")
print(f"Combined Explained: {COMBINED_EXPLAINED / 10 * 100}%")
print("=" * 40)
print(f"Infeasible Records: {len(INFEASIBLE_RECORDS)}")
for rec1, rec2, expl_type in INFEASIBLE_RECORDS:
    print(f"Record1: {rec1}")
    print(f"Record2: {rec2}")
    print(f"Explanation Type: {expl_type}")
    print("-" * 40)



One-to-Many Explained: 20.0%
Many-to-One Explained: 30.0%
Combined Explained: 50.0%
Infeasible Records: 17
Record1: [81000.0, 8100.0, 37500.0, 3150.0, 67.0, 17.6, 95.0]
Record2: [74000.0, 8900.0, 37000.0, 4050.0, 68.0, 16.8, 79.0]
Explanation Type: 1-m
----------------------------------------
Record1: [81000.0, 8100.0, 37500.0, 3150.0, 67.0, 17.6, 95.0]
Record2: [74000.0, 8900.0, 37000.0, 4050.0, 68.0, 16.8, 79.0]
Explanation Type: m-1
----------------------------------------
Record1: [81000.0, 8100.0, 37500.0, 3150.0, 67.0, 17.6, 95.0]
Record2: [74000.0, 8900.0, 37000.0, 4050.0, 68.0, 16.8, 79.0]
Explanation Type: ['1-m', 'm-1']
----------------------------------------
Record1: [74000.0, 8900.0, 37000.0, 4050.0, 68.0, 16.8, 79.0]
Record2: [74000.0, 7100.0, 42000.0, 4550.0, 77.0, 15.2, 73.0]
Explanation Type: 1-m
----------------------------------------
Record1: [74000.0, 8900.0, 37000.0, 4050.0, 68.0, 16.8, 79.0]
Record2: [74000.0, 7100.0, 42000.0, 4550.0, 77.0, 15.2, 73.0]
Explanatio

In [8]:
from script import find_explanation

for record1, record2 in get_two_consecutive_records(data):

    explanation = find_explanation(record1, record2)
    print(f"Explanation: {explanation}")
    print("-" * 40)
    break


WEIGHTED CONTRIBUTION TABLE (Record 1 - Record 2)
|   A    |   B    |   C    |   D    |   E    |   F    |   G    |
|---------|---------|---------|---------|---------|---------|---------|
| +84.000 |  +0.000 | -76.000 | +96.600 | +128.992 | -94.056 | -112.868 |
|---------|---------|---------|---------|---------|---------|---------|

Total weighted difference: +26.668
[✓] Record 1 is preferred to Record 2 (positive total)

PROS AND CONS ANALYSIS

Pros (positive contributions):
  A: +84.000
  D: +96.600
  E: +128.992

Cons (negative contributions):
  C: -76.000
  F: -94.056
  G: -112.868

SOLVING FOR (1-m) EXPLANATION

OPTIMAL (1-m) EXPLANATION FOUND!

Minimum number of trade-offs: 3

--------------------------------------------------------------------------------
TRADE-OFFS:
--------------------------------------------------------------------------------

  Trade-off: (A, {C})
    - Pro contribution [A]: +84.000
    - Con contribution [C]: -76.000
    - Total: +8.000
    - Valid: True



In [None]:
from script import find_explanation

record1 = [85, 81, 71, 69, 75, 81, 88]
record2 = [81, 81, 75, 63, 67, 88, 95]

# For (1-m) explanation
result = find_explanation(record1, record2, explanation_type="1-m")

# For (m-1) explanation
result = find_explanation(record1, record2, explanation_type="m-1")

# For hybrid explanation
result = find_explanation(record1, record2, explanation_type=["1-m", "m-1"])

