# Análisis de resultados de varias carteras y posibilidad de representación gráfica

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import recall_score
from sklearn.metrics import make_scorer

from sklearn.tree import DecisionTreeClassifier

import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("../tablas/dataformodel.csv", usecols=['Price_d', 'Price_d+180',
                                                        'quantile_PER', 'var_quantile_PER','PER','var_PER',
                                                        'quantile_PBC', 'var_quantile_PBC','var_PBC',
                                                        'quantile_ROA', 'var_quantile_ROA','var_ROA',
                                                        'Etiqueta', 'Periodo','Ticker'])

df=df.replace([np.inf, -np.inf], np.nan)
for column in df.columns:
    df=df[df[column].notnull()]
df=df.reset_index(drop=True)
df

Unnamed: 0,Ticker,Periodo,Price_d,Price_d+180,Etiqueta,PER,var_PER,var_PBC,var_ROA,quantile_PER,quantile_PBC,quantile_ROA,var_quantile_PER,var_quantile_PBC,var_quantile_ROA
0,AE,2011Q3,19.32,37.15,Mejor,9.027693,1.753120,0.194199,-0.608789,14.893617,8.510638,68.085106,-4.255319,-6.382979,42.553191
1,AE,2011Q4,34.38,30.50,Peor,30.636161,-0.705326,-0.425966,1.043235,38.297872,31.914894,51.063830,23.404255,23.404255,-17.021277
2,AE,2012Q1,33.81,26.10,Peor,21.687745,0.412602,0.077265,-0.216129,14.583333,27.083333,56.250000,-23.714539,-4.831560,5.186170
3,AE,2012Q2,29.68,31.31,Peor,23.241413,-0.066849,0.191476,0.018057,27.083333,22.916667,50.000000,12.500000,-4.166667,-6.250000
4,AE,2012Q3,26.89,49.19,Mejor,13.725179,0.693341,0.178121,-0.263640,20.833333,10.416667,75.000000,-6.250000,-12.500000,25.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1334,XOM,2017Q4,66.45,71.99,Peor,33.772142,1.298990,0.119661,-0.527251,68.750000,52.083333,72.916667,10.416667,0.000000,-12.500000
1335,XOM,2018Q1,67.15,70.54,Mejor,61.662473,-0.452306,-0.010888,0.802848,56.250000,47.916667,58.333333,-12.500000,-4.166667,-14.583333
1336,XOM,2018Q2,70.82,64.68,Mejor,76.575246,-0.194747,-0.058745,0.177094,56.250000,41.666667,72.916667,0.000000,-6.250000,14.583333
1337,XOM,2018Q3,74.34,70.53,Peor,50.882394,0.504946,-0.031116,-0.356392,50.000000,56.250000,72.916667,-6.250000,14.583333,0.000000


In [4]:
def resultado(row):
    if row['Prediction'] == 'Peor':
        return (row['Price_d'] - row['Price_d+180'])/row['Price_d']
    else:
        return (row['Price_d+180'] - row['Price_d'])/row['Price_d']

In [5]:
lista_periodos=df.Periodo.unique()[20:-1]
titulos = 7
inversion = 100
resumen = pd.DataFrame(columns=['Periodo','Rentabilidad'])
ratios = ['quantile_PER','var_quantile_PER','quantile_PBC','var_quantile_PBC']

for trim_seleccionado in lista_periodos:
    df_train = df[df.Periodo<trim_seleccionado]
    df_test = df[df.Periodo==trim_seleccionado]
    
    X_train=df_train[ratios].values
    
    y_train=df_train['Etiqueta'].values
    
    
    X_test=df_test[ratios].values
    
    y_test=df_test['Etiqueta'].values
    
    clfTree = GridSearchCV(DecisionTreeClassifier(),
                      param_grid = {"max_depth":np.arange(2,10),
                                   "min_samples_leaf": np.arange(2,20,1)},
                      cv=5,
                       scoring="accuracy"
                      )
    
    clfTree.fit(X_train,y_train)
      
    best_max_depth=clfTree.best_params_.get('max_depth')
    best_min_samples_leaf=clfTree.best_params_.get('min_samples_leaf')
    
    clfTree = DecisionTreeClassifier(random_state=0, max_depth=best_max_depth,
                                     min_samples_leaf=best_min_samples_leaf)
    
    clfTree.fit(X_train,y_train)
    
    
    
    
    df_prob=pd.DataFrame({'Probabilidad':clfTree.predict_proba(X_test)[:,0],
                          'Prediction':clfTree.predict(X_test),'Actual':y_test})
    df_prob.index=df_test.index

    best=df_prob.sort_values('Probabilidad').head(titulos)
    worst=df_prob.sort_values('Probabilidad').tail(titulos)
    cartera=best.append(worst)
    
    cartera = cartera.join(df_test)
    res_cartera=cartera[['Prediction','Price_d','Price_d+180']]
    
    res_cartera['Resultado']=res_cartera.apply(resultado,axis=1)*inversion
    
    
    resumen = resumen.append({'Periodo':trim_seleccionado,'Rentabilidad':res_cartera.Resultado.sum()},
                             ignore_index=True)
    

df_var_indice = pd.read_csv("../tablas/var_indice.csv")
resumen = resumen.merge(df_var_indice, 
                              left_on=('Periodo'),
                              right_on=('Periodo'), 
                              how='left')

resumen.drop(['Indice_d_100','Indice_d+180_100'], axis=1, inplace=True)

resumen

Unnamed: 0,Periodo,Rentabilidad,Rent_indice
0,2016Q3,61.005093,4.913342
1,2016Q4,2.552403,-13.766386
2,2017Q1,-74.563742,-1.002842
3,2017Q2,-502.718377,14.547461
4,2017Q3,-238.246779,9.638056
5,2017Q4,31.422908,13.341812
6,2018Q1,-26.610977,-5.813577
7,2018Q2,80.198545,-12.743212
8,2018Q3,-89.931965,1.828721
9,2018Q4,351.713878,-9.070494


In [6]:
resumen.Rentabilidad.sum()

-405.1790128934525

In [7]:
resumen.Rent_indice.sum()

1.872881041126309