In [84]:
import yahooquery as yq
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [8]:
ibov_comp = pd.read_csv('IBOVDia_10-07-23.csv', sep=';', encoding='latin-1')
ibov_comp = ibov_comp.iloc[0:86, 0:2]
ibov_comp = ibov_comp['Código'] + '.SA'

rj_emp = pd.read_csv('emprj.csv', sep=';', encoding='latin-1')
rj_emp = rj_emp.iloc[:,0:1] + '.SA'

rj_emp, ibov_comp = rj_emp.values.tolist() , ibov_comp.values.tolist()

empresas = ibov_comp + rj_emp

empresas = pd.Series(empresas).drop_duplicates().tolist()


In [29]:
#TotalAssets StockholdersEquity TotalLiabilitiesNetMinorityInterest Balance Sheet
#Netincome TotalReveneu Income Statement

df = []
for ticker in empresas:
    try:
        data = yq.Ticker(ticker).balance_sheet()
        TotalAssets, StockHoldersEquity = data['TotalAssets'].iloc[-1], data['StockholdersEquity'].iloc[-1]
        data2 = yq.Ticker(ticker).income_statement()
        NetIncome, TotalRevenue = data2['NetIncome'].iloc[-1], data2['TotalRevenue'].iloc[-1]
        df.append([ticker, TotalAssets, StockHoldersEquity, NetIncome, TotalRevenue])
    except:
        print('erro', ticker)
        
#dict to dataframe

df = pd.DataFrame(df, columns=['ticker', 'TotalAssets', 'StockHoldersEquity', 'NetIncome', 'TotalRevenue'])
df['solvencia'] = df['ticker'].apply(lambda x : 0 if x in rj_emp else 1)


erro RAIZ4.SA


In [None]:
df_logit = df.copy()
df_logit['ROA'] = df_logit['NetIncome']/df_logit['TotalAssets']
df_logit['GA'] = df_logit['TotalRevenue']/df_logit['TotalAssets']
df_logit['EG'] = df_logit['StockHoldersEquity']/df_logit['TotalAssets']
df_logit = df_logit[['ticker', 'solvencia', 'ROA', 'GA', 'EG']]

df_logit.dropna(inplace=True)
df_logit

In [90]:

previsores = df_logit.iloc[:, 2:5].values
classe = df_logit.iloc[:, 1].values

X_train, X_test, y_train, y_test = train_test_split(previsores, classe, test_size=0.3, random_state=0)

logreg = LogisticRegression()

parameters = {'C': [0.01, 0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2'], 'solver': ['liblinear', 'saga']}

grid_search = GridSearchCV(estimator=logreg, param_grid=parameters, scoring='accuracy', cv=10, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test)

accuracy = best_model.score(X_test, y_test)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(accuracy))

# Obtém os parâmetros estimados
params = best_model.coef_
intercept = best_model.intercept_
# Imprime os parâmetros estimados
for i, param in enumerate(params[0]):
    print(f'Parâmetro do recurso {i+1}: {param}')

print(intercept)


Accuracy of logistic regression classifier on test set: 0.84
Parâmetro do recurso 1: 0.4519605030771912
Parâmetro do recurso 2: 1.7497392491787216
Parâmetro do recurso 3: 3.45762964923258
[0.46746027]


In [103]:
df_logit['Pi'] = (1) / (1 + np.exp(-(intercept + params[0][0]*df_logit['ROA'] + params[0][1]*df_logit['GA'] + params[0][2]*df_logit['EG'])))
df_logit['solvencia'] = df_logit['solvencia'].astype('bool')

df_logit['pred'] = np.where(np.logical_and(df_logit['solvencia'] == True, df_logit['Pi'] > 0.5), 1, 0)
acerto = df_logit['pred'].mean()
print('A precisão do modelo foi de: ',acerto)

A precisão do modelo foi de:  0.7904761904761904
