# Risk Factors :

Sob a perspectiva de evitar fazer cálculos desnecessários, aumentando o risco de erros de validação, e tratamento dos dados de mercado, foram utilizados o conjunto de dados a respeito dos Fatores fornecidos pelo *NEFIN* : https://nefin.com.br/data/risk_factors.html . O conjunto de dados é dividido em 6 arquivos separados, desde $02$ de Janeiro de 2001

In case there is an error in the read_excel of ".xls" files:

### Library

In [None]:
# Initial Imports:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import warnings

warnings.filterwarnings("ignore")


# To run models:
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from joblib import dump, load

# Import Linear Regression Model from SKLearn:
from sklearn.linear_model import LinearRegression

# For visualizations:
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline


# Project files
from fico.evaluation import *
from fico.portfolio import *

### Data 

In [None]:
factors = pd.read_csv("../data/risk_factors/factors.csv", index_col="date")

### Functions:

In [None]:
stock = choose_stock("ITUB3")

In [None]:
stock.head(20)

In [None]:
stock_prepared = process_stock(stock)

In [None]:
stock_prepared.head()

In [None]:
stock.columns

In [None]:
# concatenating two Dataframes:
combined_df = merge_portifolio(stock_prepared, factors)

In [None]:
combined_df.loc[combined_df["Returns"] == 0]

## Split Train / Test Method:


In order to preserve the temporal order of the data:

In [None]:
# Calling function to split data:
X_train, X_test, y_train, y_test, close_test = split_data(combined_df, rate=0.8)

## Make predictions:

In [None]:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test["Predictions"] = predictions
y_test["Close"] = close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test["Buy Signal"] = np.where(y_test["Predictions"] > y_test["Returns"], 1.0, 0.0)

# Drop nulls:
y_test = y_test.dropna()

y_test.head()

In [None]:
y_test.head(10)

In [None]:
# Generate and view signals dataframe using generate signals function
signals_df = generate_signals(y_test)
display(signals_df.head())

In [None]:
display(algo_evaluation(signals_df))
# Generate Metrics for Function vs. Buy-and-Hold Strategy:
display(algo_vs_underlying(signals_df))
# Generate Evaluation table:
trade_evaluation_df = trade_evaluation(signals_df)
display(trade_evaluation_df)

## ANOVA Table / Other Visualizations for 3-Factor Models:

### ATT

In [None]:
# To run models:
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from joblib import dump, load

In [None]:
# Set X and y variables:
y = combined_df.loc[:, "Returns"]
X = combined_df.drop("Returns", axis=1)
X = X.drop("Close", axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[:split]
X_test = X[split:]
y_train = y[:split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig=plt.figure(figsize=(12, 8)))
plt.show()

In [None]:
# Plot P&L Histrogram:
trade_evaluation_df["Profit/Loss"].hist(bins=20)

In [None]:
trade_evaluation_df.head()

In [None]:
trade_evaluation_df.tail()

In [None]:
# Generate Cumulative Return plot using above defined function:
underlying_returns(signals_df).plot(figsize=(20, 10))

# VALE3


In [None]:
ticker = "VALE3"
stock = choose_stock(ticker)
stock.head()

In [None]:
stock_prepared = process_stock(stock)
stock_prepared.head()

In [None]:
# concatenating two Dataframes:
combined_df = merge_portifolio(stock_prepared, factors)
combined_df.head()

In [None]:
X_train, X_test, y_train, y_test, close_test = split_data(combined_df, rate=0.8)

In [None]:
# Create, train, and predict model:
lin_reg_model = LinearRegression(fit_intercept=True)
lin_reg_model = lin_reg_model.fit(X_train, y_train)
predictions = lin_reg_model.predict(X_test)

In [None]:
# Convert y_test to a dataframe:
y_test = y_test.to_frame()

In [None]:
signals_df = y_test.copy()

# Add "predictions" to dataframe:
y_test["Predictions"] = predictions
y_test["Close"] = close_test

# Add "Buy Signal" column based on whether day's predictions were greater than the day's actual returns:
y_test["Buy Signal"] = np.where(y_test["Predictions"] > y_test["Returns"], 1.0, 0.0)

# Drop nulls:
y_test = y_test.dropna()

y_test.head()

In [None]:
# Generate signals Dataframe using generate signals function
signals_df = generate_signals(y_test)
# Generate Metrics table for Algorithm:
display(algo_evaluation(signals_df))
# Generate Metrics table for Algorithm vs. Buy-and-Hold Strategy:
display(algo_vs_underlying(signals_df))
# Generate Metrics table for Stock using pre-defined function:
trade_evaluation_df = trade_evaluation(signals_df)
display(trade_evaluation_df)

In [None]:
# Set X and y variables:
y = combined_df.loc[:, "Returns"]
X = combined_df.drop("Returns", axis=1)
X = X.drop("Close", axis=1)

# Add "Constant" column of "1s" to DataFrame to act as an intercept, using StatsModels:
X = sm.add_constant(X)

# Split into Training/Testing data:
split = int(0.8 * len(X))
X_train = X[:split]
X_test = X[split:]
y_train = y[:split]
y_test = y[split:]

# Run Ordinary Least Squares (OLS )Model:
model = sm.OLS(y_test, X_test)
model_results = model.fit()
print(model_results.summary())

In [None]:
# Plot Partial Regression Plot:
fig = sm.graphics.plot_partregress_grid(model_results, fig=plt.figure(figsize=(12, 8)))
plt.show()
# Plot Cumulative Returns:
underlying_returns(signals_df).plot(figsize=(20, 10))