# Análisis de resultados de varias carteras y posibilidad de representación gráfica

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import recall_score
from sklearn.metrics import make_scorer

from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("../tablas/dataformodel.csv", usecols=['Price_d', 'Price_d+180',
                                                        'quantile_PER', 'var_quantile_PER','PER','var_PER',
                                                        'quantile_PBC', 'var_quantile_PBC','var_PBC',
                                                        'quantile_ROA', 'var_quantile_ROA','var_ROA','ROA',
                                                        'Etiqueta', 'Periodo','Ticker','Sector'])

df=df.replace([np.inf, -np.inf], np.nan)
for column in df.columns:
    df=df[df[column].notnull()]
df=df.reset_index(drop=True)

In [3]:
def resultado(row):
    if row['Prediction'] == 'Peor':
        return (row['Price_d'] - row['Price_d+180'])/row['Price_d']
    else:
        return (row['Price_d+180'] - row['Price_d'])/row['Price_d']

## Sector energético

In [4]:
#df_sector.Ticker.nunique()*0.15

In [5]:
df_sector=df[df.Sector=='Energy']
df_var_indice = pd.read_csv("../tablas/var_indice_energy.csv")
lista_periodos=df_sector.Periodo.unique()[10:-2]
titulos = 9
inversion = 100
resumen = pd.DataFrame(columns=['Periodo','Rentabilidad'])
ratios = ['quantile_PER','var_quantile_PER','quantile_PBC','var_quantile_PBC']

for trim_seleccionado in lista_periodos:
    df_train = df_sector[df_sector.Periodo<trim_seleccionado]
    df_test = df_sector[df_sector.Periodo==trim_seleccionado]
    
    X_train=df_train[ratios].values
    
    y_train=df_train['Etiqueta'].values
    
    
    X_test=df_test[ratios].values
    
    y_test=df_test['Etiqueta'].values
    
    clf=LogisticRegression(max_iter=200)
    clf.fit(X_train,y_train)
      
    df_prob=pd.DataFrame({'Probabilidad':clf.predict_proba(X_test)[:,0],'Prediction':clf.predict(X_test),'Actual':y_test})
    df_prob.index=df_test.index

#    best=df_prob.sort_values('Probabilidad').tail(titulos)
#    worst=df_prob.sort_values('Probabilidad').head(titulos)
 #   cartera=best.append(worst)
    cartera=df_prob.sort_values('Probabilidad').tail(titulos)
    
    cartera = cartera.join(df_test)
    res_cartera=cartera[['Prediction','Price_d','Price_d+180']]
    
    res_cartera['Resultado']=res_cartera.apply(resultado,axis=1)*inversion
    
    
    resumen = resumen.append({'Periodo':trim_seleccionado,'Rentabilidad':res_cartera.Resultado.mean()}, ignore_index=True)
    


resumen = resumen.merge(df_var_indice, 
                              left_on=('Periodo'),
                              right_on=('Periodo'), 
                              how='left')

resumen.drop(['Indice_d_100','Indice_d+180_100'], axis=1, inplace=True)

resumen
    

Unnamed: 0,Periodo,Rentabilidad,Sector,Rent_indice
0,2016Q3,3.659661,Energy,3.321205
1,2016Q4,-13.855297,Energy,-16.350641
2,2017Q1,-10.140135,Energy,-1.705814
3,2017Q2,11.044763,Energy,14.107515
4,2017Q3,36.769337,Energy,7.73418
5,2017Q4,32.065747,Energy,13.109785
6,2018Q1,-10.142635,Energy,-6.353095
7,2018Q2,-23.569042,Energy,-17.100456
8,2018Q3,6.276862,Energy,-2.773689
9,2018Q4,-16.292797,Energy,-12.314615


In [6]:
resumen.Rentabilidad.sum()

15.816463563093016

In [7]:
resumen.Rent_indice.sum()

-18.32562437835808

In [8]:
cartera

Unnamed: 0,Probabilidad,Prediction,Actual,Ticker,Sector,Periodo,Price_d,Price_d+180,Etiqueta,PER,ROA,var_PER,var_PBC,var_ROA,quantile_PER,quantile_PBC,quantile_ROA,var_quantile_PER,var_quantile_PBC,var_quantile_ROA
8297,0.501745,Mejor,Peor,OII,Energy,2018Q4,15.45,12.29,Peor,-23.735896,-0.022704,0.197946,0.161586,-0.00604,15.885947,12.627291,12.830957,-0.173656,-4.922378,-3.725334
3311,0.502978,Mejor,Peor,CXO,Energy,2018Q4,110.65,72.1,Peor,14.544953,0.057542,-9.93439,0.279857,-1.137182,27.087576,15.885947,93.482688,19.968371,-8.617364,70.30388
4804,0.511431,Mejor,Mejor,GEOS,Energy,2018Q4,14.79,15.02,Mejor,-948.136028,-0.00104,-0.960527,-0.088836,23.577721,2.03666,13.441955,23.421589,-12.532877,0.196922,7.858675
10720,0.51269,Mejor,Mejor,TRGP,Energy,2018Q4,37.52,32.08,Mejor,-62.988784,-0.008106,2.064487,0.238698,-0.597939,10.590631,14.052953,18.329939,5.623744,-7.801351,-6.835624
7083,0.512723,Mejor,Peor,MDR,Energy,2018Q4,8.0,4.47,Peor,-0.523253,-0.293346,4426.306998,-0.531838,-0.999717,24.032587,20.977597,0.814664,23.535898,13.361703,-26.66878
3608,0.514011,Mejor,Mejor,DRQ,Energy,2018Q4,43.35,45.22,Mejor,-20.738119,-0.062819,6.271546,-0.074831,-0.872453,16.293279,21.588595,7.535642,10.001888,1.886608,-15.312041
7611,0.519783,Mejor,Mejor,MTRX,Energy,2018Q4,23.65,20.5,Mejor,-43.220081,-0.026312,1.034938,-0.313981,-0.636818,13.034623,29.327902,12.016293,3.928663,8.301412,-10.003574
2833,0.537275,Mejor,Mejor,CQP,Energy,2018Q4,38.21,39.88,Mejor,37.948735,0.019528,-0.056258,-0.117178,-0.10351,34.215886,94.501018,59.266802,-1.711266,0.957972,0.491968
5959,0.556885,Mejor,Peor,IO,Energy,2018Q4,12.18,8.57,Peor,-8.830705,-0.066118,1.057897,-0.766927,-0.568196,20.162933,95.723014,7.331976,1.123198,24.199835,-7.237561


In [9]:
res_cartera

Unnamed: 0,Prediction,Price_d,Price_d+180,Resultado
8297,Mejor,15.45,12.29,-20.453074
3311,Mejor,110.65,72.1,-34.839584
4804,Mejor,14.79,15.02,1.555105
10720,Mejor,37.52,32.08,-14.498934
7083,Mejor,8.0,4.47,-44.125
3608,Mejor,43.35,45.22,4.313725
7611,Mejor,23.65,20.5,-13.319239
2833,Mejor,38.21,39.88,4.370584
5959,Mejor,12.18,8.57,-29.638752


## Sector tecnológico

In [10]:
df_sector=df[df.Sector=='Technology']
df_var_indice = pd.read_csv("../tablas/var_indice_tec.csv")
lista_periodos=df_sector.Periodo.unique()[10:-2]
titulos = 9
inversion = 100
resumen = pd.DataFrame(columns=['Periodo','Rentabilidad'])
ratios = ['quantile_PER','var_quantile_PER','quantile_PBC','var_quantile_PBC']

for trim_seleccionado in lista_periodos:
    df_train = df_sector[df_sector.Periodo<trim_seleccionado]
    df_test = df_sector[df_sector.Periodo==trim_seleccionado]
    
    X_train=df_train[ratios].values
    
    y_train=df_train['Etiqueta'].values
    
    
    X_test=df_test[ratios].values
    
    y_test=df_test['Etiqueta'].values
    
    clf=LogisticRegression(max_iter=200)
    clf.fit(X_train,y_train)
      
    df_prob=pd.DataFrame({'Probabilidad':clf.predict_proba(X_test)[:,0],'Prediction':clf.predict(X_test),'Actual':y_test})
    df_prob.index=df_test.index

#    best=df_prob.sort_values('Probabilidad').tail(titulos)
#    worst=df_prob.sort_values('Probabilidad').head(titulos)
 #   cartera=best.append(worst)
    cartera=df_prob.sort_values('Probabilidad').tail(titulos)
    
    cartera = cartera.join(df_test)
    res_cartera=cartera[['Prediction','Price_d','Price_d+180']]
    
    res_cartera['Resultado']=res_cartera.apply(resultado,axis=1)*inversion
    
    
    resumen = resumen.append({'Periodo':trim_seleccionado,'Rentabilidad':res_cartera.Resultado.mean()}, ignore_index=True)
    


resumen = resumen.merge(df_var_indice, 
                              left_on=('Periodo'),
                              right_on=('Periodo'), 
                              how='left')

resumen.drop(['Indice_d_100','Indice_d+180_100'], axis=1, inplace=True)

resumen
    

Unnamed: 0,Periodo,Rentabilidad,Sector,Rent_indice
0,2016Q3,30.212795,Technology,26.219028
1,2016Q4,18.251501,Technology,14.742426
2,2017Q1,15.072055,Technology,14.64383
3,2017Q2,15.20305,Technology,10.98294
4,2017Q3,18.370199,Technology,6.243837
5,2017Q4,22.228729,Technology,13.469898
6,2018Q1,4.950108,Technology,1.861147
7,2018Q2,-5.391662,Technology,-6.22383
8,2018Q3,16.404222,Technology,4.363272
9,2018Q4,24.205029,Technology,-1.587486


In [11]:
resumen.Rentabilidad.sum()

159.50602562515957

In [12]:
resumen.Rent_indice.sum()

84.71506275039

## Sector sanitario

In [13]:
df_sector=df[df.Sector=='Healthcare']
df_var_indice = pd.read_csv("../tablas/var_indice_health.csv")
lista_periodos=df_sector.Periodo.unique()[10:-2]
titulos = 9
inversion = 100
resumen = pd.DataFrame(columns=['Periodo','Rentabilidad'])
ratios = ['quantile_PER','var_quantile_PER','quantile_PBC','var_quantile_PBC']

for trim_seleccionado in lista_periodos:
    df_train = df_sector[df_sector.Periodo<trim_seleccionado]
    df_test = df_sector[df_sector.Periodo==trim_seleccionado]
    
    X_train=df_train[ratios].values
    
    y_train=df_train['Etiqueta'].values
    
    
    X_test=df_test[ratios].values
    
    y_test=df_test['Etiqueta'].values
    
    clf=LogisticRegression(max_iter=200)
    clf.fit(X_train,y_train)
      
    df_prob=pd.DataFrame({'Probabilidad':clf.predict_proba(X_test)[:,0],'Prediction':clf.predict(X_test),'Actual':y_test})
    df_prob.index=df_test.index

#    best=df_prob.sort_values('Probabilidad').tail(titulos)
#    worst=df_prob.sort_values('Probabilidad').head(titulos)
 #   cartera=best.append(worst)
    cartera=df_prob.sort_values('Probabilidad').tail(titulos)
    
    cartera = cartera.join(df_test)
    res_cartera=cartera[['Prediction','Price_d','Price_d+180']]
    
    res_cartera['Resultado']=res_cartera.apply(resultado,axis=1)*inversion
    
    
    resumen = resumen.append({'Periodo':trim_seleccionado,'Rentabilidad':res_cartera.Resultado.mean()}, ignore_index=True)
    


resumen = resumen.merge(df_var_indice, 
                              left_on=('Periodo'),
                              right_on=('Periodo'), 
                              how='left')

resumen.drop(['Indice_d_100','Indice_d+180_100'], axis=1, inplace=True)

resumen
    

Unnamed: 0,Periodo,Rentabilidad,Sector,Rent_indice
0,2016Q3,18.517093,Healthcare,12.645788
1,2016Q4,21.349966,Healthcare,13.015997
2,2017Q1,13.249167,Healthcare,17.794434
3,2017Q2,0.105274,Healthcare,18.158181
4,2017Q3,10.052847,Healthcare,10.637817
5,2017Q4,30.914916,Healthcare,14.147483
6,2018Q1,5.328366,Healthcare,4.683635
7,2018Q2,-0.254736,Healthcare,-4.906961
8,2018Q3,8.509617,Healthcare,-0.782594
9,2018Q4,20.272186,Healthcare,-0.748025


In [14]:
resumen.Rentabilidad.sum()

128.04469649828624

In [15]:
resumen.Rent_indice.sum()

84.64575429344129

## Sector industrial

In [16]:
df_sector=df[df.Sector=='Industrials']
df_var_indice = pd.read_csv("../tablas/var_indice_ind.csv")
lista_periodos=df_sector.Periodo.unique()[10:-2]
titulos = 9
inversion = 100
resumen = pd.DataFrame(columns=['Periodo','Rentabilidad'])
ratios = ['quantile_PER','var_quantile_PER','quantile_PBC','var_quantile_PBC']

for trim_seleccionado in lista_periodos:
    df_train = df_sector[df_sector.Periodo<trim_seleccionado]
    df_test = df_sector[df_sector.Periodo==trim_seleccionado]
    
    X_train=df_train[ratios].values
    
    y_train=df_train['Etiqueta'].values
    
    
    X_test=df_test[ratios].values
    
    y_test=df_test['Etiqueta'].values
    
    clf=LogisticRegression(max_iter=200)
    clf.fit(X_train,y_train)
      
    df_prob=pd.DataFrame({'Probabilidad':clf.predict_proba(X_test)[:,0],'Prediction':clf.predict(X_test),'Actual':y_test})
    df_prob.index=df_test.index

#    best=df_prob.sort_values('Probabilidad').tail(titulos)
#    worst=df_prob.sort_values('Probabilidad').head(titulos)
 #   cartera=best.append(worst)
    cartera=df_prob.sort_values('Probabilidad').tail(titulos)
    
    cartera = cartera.join(df_test)
    res_cartera=cartera[['Prediction','Price_d','Price_d+180']]
    
    res_cartera['Resultado']=res_cartera.apply(resultado,axis=1)*inversion
    
    
    resumen = resumen.append({'Periodo':trim_seleccionado,'Rentabilidad':res_cartera.Resultado.mean()}, ignore_index=True)
    


resumen = resumen.merge(df_var_indice, 
                              left_on=('Periodo'),
                              right_on=('Periodo'), 
                              how='left')

resumen.drop(['Indice_d_100','Indice_d+180_100'], axis=1, inplace=True)

resumen
    

Unnamed: 0,Periodo,Rentabilidad,Sector,Rent_indice
0,2016Q3,18.96528,Industrials,16.337043
1,2016Q4,14.385398,Industrials,5.145122
2,2017Q1,14.55216,Industrials,9.158635
3,2017Q2,11.714803,Industrials,12.086988
4,2017Q3,10.534991,Industrials,5.721835
5,2017Q4,16.549017,Industrials,10.367824
6,2018Q1,-6.414947,Industrials,3.020887
7,2018Q2,2.012536,Industrials,-3.011448
8,2018Q3,24.993994,Industrials,12.265015
9,2018Q4,-1.673825,Industrials,10.899798


In [17]:
resumen.Rentabilidad.sum()

105.61940695417346

In [18]:
resumen.Rent_indice.sum()

81.99169861220004