In [24]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

In [25]:
def get_weights(X):
    weights  = np.zeros(X.shape[1])
    sum_row = np.sum(X, axis = 1)
    suma_all = np.sum(sum_row)
    weights  = sum_row/suma_all
    print(np.round(weights,3))
    return weights

In [26]:
def ratio_consistency(n):
    if n < 3:
        return 0
    elif 3 <= n <= 15:
        return np.array([ 0.58, 0.9, 1.11, 1.24, 1.32, 1.41, 1.45, 1.49, 1.51, 1.48, 1.56, 1.57, 1.59])[n - 3]
    else:
        Y = np.array([ 0.58, 0.9, 1.11, 1.24, 1.32, 1.41, 1.45, 1.49, 1.51, 1.48, 1.56, 1.57, 1.59])
        X = np.arange(3, len(Y) + 3)
        X = sm.add_constant(X)
        model = sm.OLS(Y**6, X)
        results = model.fit()
        return results.predict([1, n])[0]**(1/6)


In [27]:
def ahp_method(dataset, tol = 0.008):
    X = np.copy(dataset)
    print('#' *50)
    print('#' *18 + ' AHP Method ' + '#'*20)
    print('#' *50)
    previous_weights = get_weights(X)
    while True:
        X_pow = np.matmul(X, X)
        actual_weights = get_weights(X_pow)
        norma = np.max(np.abs(actual_weights - previous_weights))
        print('Norm Inf: ', norma)
        if  norma < tol:
            break
        previous_weights = actual_weights
        X = X_pow.copy()
    weights = actual_weights
    X = np.copy(dataset)
    V1 = np.matmul(X, weights)
    V1_per  =  V1/weights
    lamb_max = np.mean(V1_per)
    cons_ind = (lamb_max - X.shape[1])/(X.shape[1] - 1)
    indicador = ratio_consistency(X.shape[1])
    rc = cons_ind/indicador
    print('#' *50)
    print('lamb_max :', round(lamb_max, 3)  )
    print('CI :', round(cons_ind, 3) )
    print('CR : ', round(rc, 5) )
    print('#' *50)
    # Weigths
    for i in range(0, weights.shape[0]):
        print('w(g'+str(i+1)+'): ', round(weights[i], 3))
    print('#' *50)
    return weights, rc 


In [28]:
dataset_original = pd.read_excel('imdbmoviesdata.xlsx')
columnas_interes = dataset_original.columns[1:9]
dataset_original.index = dataset_original['Nombre']
dataset_original = dataset_original.drop(['Nombre'], axis=1)

In [30]:
dataset_normalized = dataset_original.copy()

# Normalize
def norm_column_min(col):
    return (col.max() - col)/(col.max() - col.min())

def norm_column_max(col):
    return (col - col.min())/(col.max() - col.min())

column_norm_max = ['Popularidad', 'Ventas', 'Duración', 'PromedioVotos', 'AñoEstreno', 'Votos']
column_norm_min = ['Presupuesto']

for col in column_norm_max:
    dataset_normalized[col] = norm_column_max(dataset_normalized[col])

for col in column_norm_min:
    dataset_normalized[col] = norm_column_min(dataset_normalized[col])

#### ponderación simple

In [31]:
pond_simple = dataset_normalized.copy()

In [32]:
dict_pond_simple_gen = {
    'Animation' : 16,
    'Fantasy' : 15,
    'Science Fiction' : 14,
    'Adventure' : 13,
    'Action' : 12,
    'War' : 11,
    'Mystery' : 10,
    'Horror': 9,
    'Thriller' : 8,
    'Crime' : 7,
    'Familiy' : 6,
    'Comedy' : 5,
    'Romance' : 4,
    'Drama' : 3,
    'Western' : 2,
    'Music' : 1
}

pond_simple['Género'] = pond_simple['Género'].map(dict_pond_simple_gen)
pond_simple['Género'] = norm_column_max(pond_simple['Género'])

In [33]:
pond_fields = [7, 1, 3, 2, 4, 8, 6 ,5]
# normalize array of pond_fields
pond_fields = np.array(pond_fields)
pond_fields = pond_fields/np.sum(pond_fields)

pond_simple['Ranking'] = pond_simple[columnas_interes].dot(pond_fields)
pond_simple.sort_values(by=['Ranking'], ascending=False, inplace=True)

In [34]:
pond_simple

Unnamed: 0_level_0,Popularidad,Presupuesto,Ventas,Duración,Género,Votos,PromedioVotos,AñoEstreno,Ranking
Nombre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Interstellar,0.756284,0.611786,0.223503,0.458333,0.800000,0.664958,0.921569,0.981818,0.734753
Jurassic World,1.000000,0.647082,0.544124,0.314103,0.733333,0.569027,0.627451,1.000000,0.726609
Inception,0.283645,0.623551,0.296756,0.391026,0.733333,1.000000,0.901961,0.909091,0.699220
Mad Max: Fury Road,0.861539,0.647082,0.136023,0.301282,0.733333,0.632879,0.745098,1.000000,0.698762
Avatar,0.285741,0.442369,1.000000,0.435897,0.733333,0.865840,0.745098,0.890909,0.697209
...,...,...,...,...,...,...,...,...,...
Perfect,0.003811,0.955328,0.004609,0.285256,0.133333,0.000307,0.196078,0.454545,0.154204
Cool as Ice,0.010723,0.985917,0.000393,0.208333,0.133333,0.000410,0.117647,0.563636,0.153875
One-Eyed Jacks,0.004955,0.985917,0.001510,0.368590,0.066667,0.000717,0.549020,0.018182,0.150548
The Night of the Iguana,0.005234,0.992976,0.004278,0.317308,0.133333,0.000512,0.470588,0.072727,0.150046


#### AHP

In [35]:
dict_gen = {'Action-Adventure': 2.0,
 'Action-Animation': 0.5,
 'Action-Comedy': 4.0,
 'Action-Crime': 5.0,
 'Action-Drama': 6.0,
 'Action-Familiy': 7.0,
 'Action-Fantasy': 4.0,
 'Action-Horror': 3.0,
 'Action-Music': 5.0,
 'Action-Mystery': 2.0,
 'Action-Romance': 8.0,
 'Action-Science Fiction': 2.0,
 'Action-Thriller': 5.0,
 'Action-War': 2.0,
 'Action-Western': 6.0,
 'Adventure-Animation': 2.0,
 'Adventure-Comedy': 5.0,
 'Adventure-Crime': 5.0,
 'Adventure-Drama': 7.0,
 'Adventure-Familiy': 7.0,
 'Adventure-Fantasy': 3.0,
 'Adventure-Horror': 4.0,
 'Adventure-Music': 6.0,
 'Adventure-Mystery': 3.0,
 'Adventure-Romance': 7.0,
 'Adventure-Science Fiction': 2.0,
 'Adventure-Thriller': 4.0,
 'Adventure-War': 3.0,
 'Adventure-Western': 4.0,
 'Animation-Comedy': 5.0,
 'Animation-Crime': 6.0,
 'Animation-Drama': 2.0,
 'Animation-Familiy': 5.0,
 'Animation-Fantasy': 2.0,
 'Animation-Horror': 3.0,
 'Animation-Music': 6.0,
 'Animation-Mystery': 2.0,
 'Animation-Romance': 7.0,
 'Animation-Science Fiction': 4.0,
 'Animation-Thriller': 7.0,
 'Animation-War': 4.0,
 'Animation-Western': 5.0,
 'Comedy-Crime': 3.0,
 'Comedy-Drama': 4.0,
 'Comedy-Familiy': 4.0,
 'Comedy-Fantasy': 1.0,
 'Comedy-Horror': 3.0,
 'Comedy-Music': 2.0,
 'Comedy-Mystery': 6.0,
 'Comedy-Romance': 3.0,
 'Comedy-Science Fiction': 7.0,
 'Comedy-Thriller': 2.0,
 'Comedy-War': 2.0,
 'Comedy-Western': 3.0,
 'Crime-Drama': 4.0,
 'Crime-Familiy': 6.0,
 'Crime-Fantasy': 4.0,
 'Crime-Horror': 7.0,
 'Crime-Music': 2.0,
 'Crime-Mystery': 5.0,
 'Crime-Romance': 1.0,
 'Crime-Science Fiction': 0.5,
 'Crime-Thriller': 7.0,
 'Crime-War': 8.0,
 'Crime-Western': 2.0,
 'Drama-Familiy': 5.0,
 'Drama-Fantasy': 4.0,
 'Drama-Horror': 8.0,
 'Drama-Music': 2.0,
 'Drama-Mystery': 3.0,
 'Drama-Romance': 4.0,
 'Drama-Science Fiction': 5.0,
 'Drama-Thriller': 6.0,
 'Drama-War': 3.0,
 'Drama-Western': 4.0,
 'Familiy-Fantasy': 3.0,
 'Familiy-Horror': 4.0,
 'Familiy-Music': 5.0,
 'Familiy-Mystery': 4.0,
 'Familiy-Romance': 2.0,
 'Familiy-Science Fiction': 3.0,
 'Familiy-Thriller': 4.0,
 'Familiy-War': 5.0,
 'Familiy-Western': 3.0,
 'Fantasy-Horror': 2.0,
 'Fantasy-Music': 4.0,
 'Fantasy-Mystery': 5.0,
 'Fantasy-Romance': 6.0,
 'Fantasy-Science Fiction': 3.0,
 'Fantasy-Thriller': 2.0,
 'Fantasy-War': 3.0,
 'Fantasy-Western': 4.0,
 'Horror-Music': 5.0,
 'Horror-Mystery': 6.0,
 'Horror-Romance': 7.0,
 'Horror-Science Fiction': 5.0,
 'Horror-Thriller': 43.0,
 'Horror-War': 2.0,
 'Horror-Western': 4.0,
 'Music-Mystery': 5.0,
 'Music-Romance': 6.0,
 'Music-Science Fiction': 6.0,
 'Music-Thriller': 7.0,
 'Music-War': 7.0,
 'Music-Western': 5.0,
 'Mystery-Romance': 3.0,
 'Mystery-Science Fiction': 2.0,
 'Mystery-Thriller': 3.0,
 'Mystery-War': 4.0,
 'Mystery-Western': 5.0,
 'Romance-Science Fiction': 6.0,
 'Romance-Thriller': 7.0,
 'Romance-War': 1.0,
 'Romance-Western': 2.0,
 'Science Fiction-Thriller': 3.0,
 'Science Fiction-War': 4.0,
 'Science Fiction-Western': 5.0,
 'Thriller-War': 6.0,
 'Thriller-Western': 7.0,
 'War-Western': 8.0}


In [36]:
def create_matrix(lista_, dict_gen):
    n_les = len(lista_)
    matriz = np.eye(n_les)
    for i in range(0, n_les):
        for j in range(0, n_les):
            if i < j:
                matriz[i, j] = dict_gen[lista_[i]+'-'+lista_[j]]
                matriz[j, i] = 1/matriz[i,j]
    return matriz

In [37]:
lista_generos = dataset_normalized['Género'].unique()
matriz_generos = create_matrix(lista_generos, dict_gen)
weights, rc = ahp_method(matriz_generos)

##################################################
################## AHP Method ####################
##################################################
[0.104 0.105 0.102 0.069 0.08  0.077 0.058 0.055 0.125 0.066 0.035 0.034
 0.03  0.028 0.023 0.008]
[0.125 0.134 0.118 0.076 0.096 0.09  0.061 0.052 0.091 0.045 0.024 0.025
 0.024 0.015 0.015 0.01 ]
Norm Inf:  0.03398684501632282
[0.139 0.146 0.128 0.079 0.088 0.078 0.052 0.047 0.073 0.043 0.026 0.025
 0.028 0.018 0.019 0.012]
Norm Inf:  0.018501364840947443
[0.136 0.144 0.126 0.078 0.089 0.08  0.053 0.049 0.076 0.043 0.026 0.025
 0.028 0.018 0.019 0.011]
Norm Inf:  0.0035543197717770036
##################################################
lamb_max : 23.875
CI : 0.525
CR :  0.33863
##################################################
w(g1):  0.136
w(g2):  0.144
w(g3):  0.126
w(g4):  0.078
w(g5):  0.089
w(g6):  0.08
w(g7):  0.053
w(g8):  0.049
w(g9):  0.076
w(g10):  0.043
w(g11):  0.026
w(g12):  0.025
w(g13):  0.028
w(g14):  0.018
w(g15):  0

In [38]:
# dict = generos : weight
dict_weights = {}
for i in range(len(weights)):
    dict_weights[lista_generos[i]] = weights[i]

def get_weight(genero):
    return dict_weights[genero]

dataset_normalized['Género'] = dataset_normalized['Género'].apply(get_weight)

In [40]:
campos_peliculas = dataset_normalized.columns

In [41]:
dict_peliculas = {'Popularidad-Presupuesto': 1,
 'Popularidad-Ventas': 3,
 'Popularidad-Duración':6,
 'Popularidad-Género': 9,
 'Popularidad-Votos': 1,
 'Popularidad-PromedioVotos': 1,
 'Popularidad-AñoEstreno': 9,
 'Presupuesto-Ventas': 1,
 'Presupuesto-Duración': 0.3,
 'Presupuesto-Género': 0.2,
 'Presupuesto-Votos': 0.5,
 'Presupuesto-PromedioVotos': 1,
 'Presupuesto-AñoEstreno': 0.1,
 'Ventas-Duración': 5,
 'Ventas-Género': 1,
 'Ventas-Votos': 0.6,
 'Ventas-PromedioVotos': 0.1,
 'Ventas-AñoEstreno': 0.7,
 'Duración-Género': 5,
 'Duración-Votos': 3,
 'Duración-PromedioVotos': 3,
 'Duración-AñoEstreno': 1,
 'Género-Votos': 0.5,
 'Género-PromedioVotos': 0.7,
 'Género-AñoEstreno': 0.4,
 'Votos-PromedioVotos': 1,
 'Votos-AñoEstreno': 0.8,
 'PromedioVotos-AñoEstreno': 4}

In [42]:
matriz_peliculas = create_matrix(campos_peliculas, dict_peliculas)
weights_peliculas, rc = ahp_method(matriz_peliculas)

##################################################
################## AHP Method ####################
##################################################
[0.262 0.043 0.082 0.141 0.075 0.083 0.167 0.148]
[0.302 0.055 0.096 0.133 0.053 0.087 0.175 0.099]
Norm Inf:  0.0490350847147385
[0.267 0.06  0.094 0.137 0.062 0.09  0.178 0.113]
Norm Inf:  0.03461006884371598
[0.271 0.06  0.095 0.135 0.061 0.09  0.178 0.11 ]
Norm Inf:  0.004265877785073535
##################################################
lamb_max : 11.93
CI : 0.561
CR :  0.39815
##################################################
w(g1):  0.271
w(g2):  0.06
w(g3):  0.095
w(g4):  0.135
w(g5):  0.061
w(g6):  0.09
w(g7):  0.178
w(g8):  0.11
##################################################


In [61]:
dataset_normalized['Ranking'] = dataset_normalized[campos_peliculas].dot(weights_peliculas)
dataset_normalized = dataset_normalized.sort_values(by=['Ranking'], ascending=False).head(10)

In [63]:
pond_simple.head(10)

Unnamed: 0_level_0,Popularidad,Presupuesto,Ventas,Duración,Género,Votos,PromedioVotos,AñoEstreno,Ranking
Nombre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Interstellar,0.756284,0.611786,0.223503,0.458333,0.8,0.664958,0.921569,0.981818,0.734753
Jurassic World,1.0,0.647082,0.544124,0.314103,0.733333,0.569027,0.627451,1.0,0.726609
Inception,0.283645,0.623551,0.296756,0.391026,0.733333,1.0,0.901961,0.909091,0.69922
Mad Max: Fury Road,0.861539,0.647082,0.136023,0.301282,0.733333,0.632879,0.745098,1.0,0.698762
Avatar,0.285741,0.442369,1.0,0.435897,0.733333,0.86584,0.745098,0.890909,0.697209
The Avengers,0.231307,0.48237,0.546291,0.375,0.866667,0.911448,0.784314,0.945455,0.685605
Star Wars: The Force Awakens,0.338518,0.52943,0.743537,0.352564,0.733333,0.541355,0.823529,1.0,0.640004
Guardians of the Galaxy,0.433683,0.600021,0.277993,0.304487,0.733333,0.574152,0.901961,0.981818,0.636838
The Dark Knight Rises,0.199571,0.411779,0.388631,0.445513,0.733333,0.688019,0.823529,0.945455,0.610323
The Lord of the Rings: The Return of the King,0.215679,0.778851,0.402239,0.560897,0.8,0.576612,0.901961,0.781818,0.604191


In [64]:
op1 = dataset_normalized.head(10).index 
op2 = pond_simple.head(10).index

In [69]:
# not in intersection

op1.difference(op2), op2.difference(op1)

(Index(['The Dark Knight'], dtype='object', name='Nombre'),
 Index(['The Dark Knight Rises'], dtype='object', name='Nombre'))