# Data Envelopment Analysis

In [100]:
import pandas as pd
from pulp import *

pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [101]:
df_input = pd.read_csv("data/inputs.csv", sep=';')
input_columns = list(df_input.columns[1:])
df_input.columns = ['name'] + input_columns

df_output = pd.read_csv("data/outputs.csv", sep=';')
output_columns = list(df_output.columns[1:])
df_output.columns = ['name'] + output_columns

df = pd.merge(df_input, df_output, on='name')
df

Unnamed: 0,name,i1,i2,i3,i4,o1,o2
0,WAW,10.5,36,129.4,7.0,9.5,129.7
1,KRK,3.1,19,31.6,7.9,2.9,31.3
2,KAT,3.6,32,57.4,10.5,2.4,21.1
3,WRO,1.5,12,18.0,3.0,1.5,18.8
4,POZ,1.5,10,24.0,4.0,1.3,16.2
5,LCJ,0.6,12,24.0,3.9,0.3,4.2
6,GDN,1.0,15,42.9,2.5,2.0,23.6
7,SZZ,0.7,10,25.7,1.9,0.3,4.2
8,BZG,0.3,6,3.4,1.2,0.3,6.2
9,RZE,0.6,6,11.3,2.7,0.3,3.5


## Obliczanie efektywności

In [102]:
def efficiency():
    solutions = {}
    for idx_dmu, dmu in df.iterrows():
        
        problem = LpProblem("dea", LpMinimize)
        
        theta = LpVariable("theta", 0)
        
        decision_variables = {}
        for idx in df.index:
            decision_variables[idx] = LpVariable(f"x_{idx}", 0)
    
        for column in df.columns:
            if column in input_columns:
                problem += lpSum(value*decision_variables[idx] for value, idx in zip(df.loc[:, column], df.index)) <= dmu[column]*theta
            if column in output_columns:
                problem += lpSum(value*decision_variables[idx] for value, idx in zip(df.loc[:, column], df.index)) >= dmu[column]
    
        problem += theta
        problem.solve(solver=GLPK(msg=False))
        solution = {variable.name: variable.varValue for variable in problem.variables()}
        solutions[idx_dmu] = solution

    return {idx: solution['theta'] for idx, solution in solutions.items()}

In [103]:
results = efficiency()
for idx, eff in results.items():
    print(f"DMU index: {idx}, DMU name: {df.loc[idx, 'name']}, Efficiency: {eff:.3f}")

DMU index: 0, DMU name: WAW, Efficiency: 1.000
DMU index: 1, DMU name: KRK, Efficiency: 1.000
DMU index: 2, DMU name: KAT, Efficiency: 0.591
DMU index: 3, DMU name: WRO, Efficiency: 1.000
DMU index: 4, DMU name: POZ, Efficiency: 0.800
DMU index: 5, DMU name: LCJ, Efficiency: 0.300
DMU index: 6, DMU name: GDN, Efficiency: 1.000
DMU index: 7, DMU name: SZZ, Efficiency: 0.271
DMU index: 8, DMU name: BZG, Efficiency: 1.000
DMU index: 9, DMU name: RZE, Efficiency: 0.409
DMU index: 10, DMU name: IEG, Efficiency: 0.258


# Obliczanie HCU i poprawek

In [104]:
for idx, eff in results.items():
    header = f"DMU index: {idx}, DMU name: {df.loc[idx, 'name']}"
    print("-"*len(header))
    print(header)
    for input_column in input_columns:
        hcu = eff * df.loc[idx, input_column]
        improvement = df.loc[idx, input_column] - hcu
        print(f"For input: {input_column}: HCU: {hcu:.3f}, improvement needed: {improvement:.3f}")

---------------------------
DMU index: 0, DMU name: WAW
For input: i1: HCU: 10.500, improvement needed: 0.000
For input: i2: HCU: 36.000, improvement needed: 0.000
For input: i3: HCU: 129.400, improvement needed: 0.000
For input: i4: HCU: 7.000, improvement needed: 0.000
---------------------------
DMU index: 1, DMU name: KRK
For input: i1: HCU: 3.100, improvement needed: 0.000
For input: i2: HCU: 19.000, improvement needed: 0.000
For input: i3: HCU: 31.600, improvement needed: 0.000
For input: i4: HCU: 7.900, improvement needed: 0.000
---------------------------
DMU index: 2, DMU name: KAT
For input: i1: HCU: 2.128, improvement needed: 1.472
For input: i2: HCU: 18.919, improvement needed: 13.081
For input: i3: HCU: 33.935, improvement needed: 23.465
For input: i4: HCU: 6.208, improvement needed: 4.292
---------------------------
DMU index: 3, DMU name: WRO
For input: i1: HCU: 1.500, improvement needed: 0.000
For input: i2: HCU: 12.000, improvement needed: 0.000
For input: i3: HCU: 18.

## Obliczanie superefektywności

In [105]:
solutions = {}
for idx_dmu in df.index:
    
    problem = LpProblem("dea", LpMaximize)
    
    decision_variables_v = {column: LpVariable(f"v_{column}", 0) for column in input_columns}
    decision_variables_u = {column: LpVariable(f"u_{column}", 0) for column in output_columns}
    
    problem += lpSum(value*variable for value, variable in zip(df.loc[idx_dmu, input_columns], decision_variables_v.values())) == 1

    for idx, dmu2 in df.iterrows():
        if idx == idx_dmu:
            continue
        problem += lpSum(value*variable for value, variable in zip(df.loc[idx, output_columns], decision_variables_u.values())) <= lpSum(value*variable for value, variable in zip(df.loc[idx, input_columns], decision_variables_v.values()))

    problem += lpSum(value*variable for value, variable in zip(df.loc[idx_dmu, output_columns], decision_variables_u.values()))
    problem.solve(solver=GLPK(msg=False))
    solution = {variable.name: variable.varValue for variable in problem.variables()}
    solutions[idx_dmu] = solution

In [106]:
for idx, solution in solutions.items():
    super_efficiency = sum(value*variable for value, variable in zip(df.loc[idx, output_columns], [v for k, v in solution.items() if k.startswith('u')]))
    print(f"DMU index: {idx}, DMU name: {df.loc[idx, 'name']}, Efficiency: {super_efficiency:.3f}")

DMU index: 0, DMU name: WAW, Efficiency: 2.278
DMU index: 1, DMU name: KRK, Efficiency: 1.124
DMU index: 2, DMU name: KAT, Efficiency: 0.591
DMU index: 3, DMU name: WRO, Efficiency: 1.040
DMU index: 4, DMU name: POZ, Efficiency: 0.800
DMU index: 5, DMU name: LCJ, Efficiency: 0.300
DMU index: 6, DMU name: GDN, Efficiency: 2.000
DMU index: 7, DMU name: SZZ, Efficiency: 0.271
DMU index: 8, DMU name: BZG, Efficiency: 1.746
DMU index: 9, DMU name: RZE, Efficiency: 0.409
DMU index: 10, DMU name: IEG, Efficiency: 0.258


# Obliczanie efektywności krzyżowej

In [110]:
df_cross = pd.DataFrame(0.0, index=df.index, columns=df.index)
efficiencies = efficiency()
for idx_dmu in df.index:
    decision_variables_v = {column: LpVariable(f"v_{column}", 0) for column in input_columns}
    decision_variables_u = {column: LpVariable(f"u_{column}", 0) for column in output_columns}

    sums_inputs = {column: df.drop(index=idx_dmu)[column].sum() for column in input_columns}
    sums_outputs = {column: df.drop(index=idx_dmu)[column].sum() for column in output_columns}

    problem = LpProblem("dea", LpMinimize)

    problem += lpSum(value*variable for value, variable in zip(sums_inputs.values(), decision_variables_v.values())) == 1

    for idx, dmu2 in df.iterrows():
        if idx == idx_dmu:
            problem += lpSum(value*variable for value, variable in zip(df.loc[idx, output_columns], decision_variables_u.values())) == efficiencies[idx_dmu]*lpSum(value*variable for value, variable in zip(df.loc[idx, input_columns], decision_variables_v.values()))
        else:
            problem += lpSum(value*variable for value, variable in zip(df.loc[idx, output_columns], decision_variables_u.values())) <= lpSum(value*variable for value, variable in zip(df.loc[idx, input_columns], decision_variables_v.values()))

    problem += lpSum(value*variable for value, variable in zip(sums_outputs.values(), decision_variables_u.values()))
    problem.solve(solver=GLPK(msg=False))
    solution = {variable.name: variable.varValue for variable in problem.variables()}

    for idx_dmu2 in df.index:
        if idx_dmu == idx_dmu2:
            df_cross.loc[idx_dmu, idx_dmu2] = efficiencies[idx_dmu]
        else:
            numerator = sum(value*variable for value, variable in zip(df.loc[idx_dmu2, output_columns], [v for k, v in solution.items() if k.startswith('u')]))
            denominator = sum(value*variable for value, variable in zip(df.loc[idx_dmu2, input_columns], [v for k, v in solution.items() if k.startswith('v')]))
            df_cross.loc[idx_dmu, idx_dmu2] = round(numerator/denominator, 3)
    
df_cross.index = df['name']
df_cross.columns = df['name']
df_cross

name,WAW,KRK,KAT,WRO,POZ,LCJ,GDN,SZZ,BZG,RZE,IEG
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
WAW,1.0,0.214,0.108,0.338,0.219,0.058,0.509,0.119,0.279,0.07,0.011
KRK,0.8,1.0,0.456,0.908,0.59,0.136,0.508,0.127,0.961,0.289,0.001
KAT,0.913,1.0,0.591,1.0,0.774,0.259,1.0,0.238,0.973,0.409,0.002
WRO,0.997,1.0,0.47,1.0,0.649,0.153,0.615,0.156,1.0,0.295,0.003
POZ,1.0,1.0,0.563,1.0,0.8,0.255,1.0,0.243,0.909,0.403,0.006
LCJ,0.595,0.491,0.278,0.605,0.512,0.3,1.0,0.261,1.0,0.273,0.078
GDN,0.452,0.468,0.333,0.5,0.433,0.25,1.0,0.214,0.5,0.25,0.025
SZZ,1.0,0.755,0.371,0.856,0.737,0.273,1.0,0.271,1.0,0.346,0.036
BZG,0.55,0.543,0.202,0.573,0.37,0.096,0.302,0.09,1.0,0.17,0.005
RZE,0.903,0.996,0.591,1.0,0.77,0.261,1.0,0.238,1.0,0.409,0.002


In [111]:
df_cross.mean()

name
WAW   0.794
KRK   0.718
KAT   0.383
WRO   0.756
POZ   0.574
LCJ   0.213
GDN   0.812
SZZ   0.201
BZG   0.863
RZE   0.287
IEG   0.039
dtype: float64