# Data Envelopment Analysis

In [17]:
import pandas as pd
from pulp import *

In [18]:
df_input = pd.read_csv("data/inputs_test.csv", sep=';')
input_columns = list(df_input.columns[1:])
df_input.columns = ['name'] + input_columns

df_output = pd.read_csv("data/outputs_test.csv", sep=';')
output_columns = list(df_output.columns[1:])
df_output.columns = ['name'] + output_columns

df = pd.merge(df_input, df_output, on='name')
df

Unnamed: 0,name,i1,o1,o2
0,A,160,48,68
1,B,160,12,80
2,C,160,45,87
3,D,160,31,100
4,E,160,20,70
5,F,160,20,105
6,G,160,36,53
7,H,160,15,65


## Obliczanie efektywności

In [19]:
solutions = {}
for idx_dmu, dmu in df.iterrows():
    
    problem = LpProblem("dea", LpMinimize)
    
    theta = LpVariable("theta", 0)
    
    decision_variables = {}
    for idx in df.index:
        decision_variables[idx] = LpVariable(f"x_{idx}", 0)

    for column in df.columns:
        if column in input_columns:
            problem += lpSum(value*decision_variables[idx] for value, idx in zip(df.loc[:, column], df.index)) <= dmu[column]*theta
        if column in output_columns:
            problem += lpSum(value*decision_variables[idx] for value, idx in zip(df.loc[:, column], df.index)) >= dmu[column]

    problem += theta
    problem.solve(solver=GLPK(msg=False))
    solution = {variable.name: variable.varValue for variable in problem.variables()}
    solutions[idx_dmu] = solution

In [20]:
for idx, solution in solutions.items():
    print(f"DMU index: {idx}, DMU name: {df.loc[idx, 'name']}, Efficiency: {solution['theta']:.3f}")

DMU index: 0, DMU name: A, Efficiency: 1.000
DMU index: 1, DMU name: B, Efficiency: 0.762
DMU index: 2, DMU name: C, Efficiency: 1.000
DMU index: 3, DMU name: D, Efficiency: 1.000
DMU index: 4, DMU name: E, Efficiency: 0.693
DMU index: 5, DMU name: F, Efficiency: 1.000
DMU index: 6, DMU name: G, Efficiency: 0.755
DMU index: 7, DMU name: H, Efficiency: 0.629


# Obliczanie HCU i poprawek

In [21]:
for idx, solution in solutions.items():
    header = f"DMU index: {idx}, DMU name: {df.loc[idx, 'name']}"
    print("-"*len(header))
    print(header)
    for input_column in input_columns:
        hcu = solution['theta'] * df.loc[idx, input_column]
        improvement = df.loc[idx, input_column] - hcu
        print(f"For input: {input_column}: HCU: {hcu:.3f}, improvement needed: {improvement:.3f}")

-------------------------
DMU index: 0, DMU name: A
For input: i1: HCU: 160.000, improvement needed: 0.000
-------------------------
DMU index: 1, DMU name: B
For input: i1: HCU: 121.905, improvement needed: 38.095
-------------------------
DMU index: 2, DMU name: C
For input: i1: HCU: 160.000, improvement needed: 0.000
-------------------------
DMU index: 3, DMU name: D
For input: i1: HCU: 160.000, improvement needed: 0.000
-------------------------
DMU index: 4, DMU name: E
For input: i1: HCU: 110.916, improvement needed: 49.084
-------------------------
DMU index: 5, DMU name: F
For input: i1: HCU: 160.000, improvement needed: 0.000
-------------------------
DMU index: 6, DMU name: G
For input: i1: HCU: 120.860, improvement needed: 39.140
-------------------------
DMU index: 7, DMU name: H
For input: i1: HCU: 100.717, improvement needed: 59.283


## Obliczanie superefektywności

In [23]:
solutions = {}
for idx_dmu, dmu in df.iterrows():
    
    problem = LpProblem("dea", LpMaximize)
    
    decision_variables_v = {}
    decision_variables_u = {}
    for idx_col, column in enumerate(df.columns):
        if column in input_columns:
            decision_variables_v[column] = LpVariable(f"v_{idx_col}", 0)
        if column in output_columns:
            decision_variables_u[column] = LpVariable(f"u_{idx_col}", 0)

    for idx, dmu2 in df.iterrows():
        if idx == idx_dmu:
            continue
        problem += lpSum(value*variable for value, variable in zip(df.loc[idx, output_columns], decision_variables_u.values())) <= lpSum(value*variable for value, variable in zip(df.loc[idx, input_columns], decision_variables_v.values()))

    problem += lpSum(value*variable for value, variable in zip(df.loc[idx_dmu, input_columns], decision_variables_v.values())) == 1
    
    problem += lpSum(value*variable for value, variable in zip(df.loc[idx_dmu, output_columns], decision_variables_u.values()))
    problem.solve(solver=GLPK(msg=False))
    solution = {variable.name: variable.varValue for variable in problem.variables()}
    solutions[idx_dmu] = solution

In [26]:
for idx, solution in solutions.items():
    super_efficiency = sum(value*variable for value, variable in zip(df.loc[idx, output_columns], [v for k, v in solution.items() if k.startswith('u')]))
    print(f"DMU index: {idx}, DMU name: {df.loc[idx, 'name']}, Efficiency: {super_efficiency:.3f}")

DMU index: 0, DMU name: A, Efficiency: 1.067
DMU index: 1, DMU name: B, Efficiency: 0.762
DMU index: 2, DMU name: C, Efficiency: 1.084
DMU index: 3, DMU name: D, Efficiency: 1.024
DMU index: 4, DMU name: E, Efficiency: 0.693
DMU index: 5, DMU name: F, Efficiency: 1.050
DMU index: 6, DMU name: G, Efficiency: 0.755
DMU index: 7, DMU name: H, Efficiency: 0.629


# Obliczanie efektywności krzyżowej