# Análisis de resultados de varias carteras y posibilidad de representación gráfica

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.metrics import confusion_matrix, classification_report

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import recall_score
from sklearn.metrics import make_scorer

from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("../tablas/dataformodel.csv", usecols=['Price_d', 'Price_d+180',
                                                        'quantile_PER', 'var_quantile_PER','PER','var_PER',
                                                        'quantile_PBC', 'var_quantile_PBC','var_PBC',
                                                        'quantile_ROA', 'var_quantile_ROA','var_ROA','ROA',
                                                        'quantile_Dot_dudosos', 'var_quantile_Dot_dudosos',
                                                        'Dot_dudosos', 'var_Dot_dudosos',
                                                        'Etiqueta', 'Periodo','Ticker'])

df=df.replace([np.inf, -np.inf], np.nan)
for column in df.columns:
    df=df[df[column].notnull()]
df=df.reset_index(drop=True)

In [3]:
def resultado(row):
    if row['Prediction'] == 'Peor':
        return (row['Price_d'] - row['Price_d+180'])/row['Price_d']
    else:
        return (row['Price_d+180'] - row['Price_d'])/row['Price_d']

In [21]:
lista_periodos=df.Periodo.unique()[22:-1]
titulos = 7
inversion = 100
resumen = pd.DataFrame(columns=['Periodo','Rentabilidad'])
ratios = ['quantile_PER','var_quantile_PER','quantile_PBC','var_quantile_PBC']

for trim_seleccionado in lista_periodos:
    df_train = df[df.Periodo<trim_seleccionado]
    df_test = df[df.Periodo==trim_seleccionado]
    
    X_train=df_train[ratios].values
    
    y_train=df_train['Etiqueta'].values
    
    
    X_test=df_test[ratios].values
    
    y_test=df_test['Etiqueta'].values
    
    clf=LogisticRegression(max_iter=200)
    clf.fit(X_train,y_train)
      
    df_prob=pd.DataFrame({'Probabilidad':clf.predict_proba(X_test)[:,0],'Prediction':clf.predict(X_test),'Actual':y_test})
    df_prob.index=df_test.index

    best=df_prob.sort_values('Probabilidad').head(titulos)
    worst=df_prob.sort_values('Probabilidad').tail(titulos)
    cartera=best.append(worst)
    
    cartera = cartera.join(df_test)
    res_cartera=cartera[['Prediction','Price_d','Price_d+180']]
    
    res_cartera['Resultado']=res_cartera.apply(resultado,axis=1)*inversion
    
    
    resumen = resumen.append({'Periodo':trim_seleccionado,'Rentabilidad':res_cartera.Resultado.mean()}, ignore_index=True)
    

df_var_indice = pd.read_csv("../tablas/var_indice.csv")
resumen = resumen.merge(df_var_indice, 
                              left_on=('Periodo'),
                              right_on=('Periodo'), 
                              how='left')

resumen.drop(['Indice_d_100','Indice_d+180_100'], axis=1, inplace=True)

resumen
    

Unnamed: 0,Periodo,Rentabilidad,Rent_indice
0,2017Q1,2.938579,6.457547
1,2017Q2,1.991315,14.952894
2,2017Q3,-0.540114,9.71882
3,2017Q4,0.74719,4.260848
4,2018Q1,0.01076,-10.306496
5,2018Q2,0.001522,-15.166144
6,2018Q3,2.251273,0.75516
7,2018Q4,0.483783,-10.178698


In [22]:
resumen.Rentabilidad.mean()

0.9855384619575344

In [23]:
resumen.Rent_indice.mean()

0.061741332757341505

In [24]:
res_cartera

Unnamed: 0,Prediction,Price_d,Price_d+180,Resultado
844,Peor,67.18,59.85,10.910985
813,Peor,14.28,16.02,-12.184874
931,Peor,39.75,32.57,18.062893
85,Peor,47.55,39.2,17.560463
785,Peor,64.13,59.33,7.484797
58,Peor,93.71,82.23,12.25056
725,Peor,45.78,43.27,5.482744
901,Mejor,46.78,43.04,-7.99487
174,Mejor,25.83,24.57,-4.878049
233,Mejor,11.32,9.99,-11.749117
