# **Exploratory Data Analisys: Sectors**

## **Initial Setup**

### Install Packages

In [None]:
%pip install pandas -q
%pip install plotly -q

### Import libs

In [1]:
import os
import itertools
import pandas as pd
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

### Pandas Config

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Create a file path default

In [3]:
file_path_book = str(Path(os.getcwd()).parent.parent.parent / "data/book")

## **Fundamentals**

#### Load data

In [4]:
df_fundamentals_book = pd.read_csv(file_path_book + "/fundamentals_book.csv")
df_fundamentals_book.head(5)

Unnamed: 0,ticker,long_name,sector,industry,market_cap,enterprise_value,total_revenue,profit_margins,operating_margins,dividend_rate,beta,ebitda,trailing_pe,forward_pe,volume,average_volume,fifty_two_week_low,fifty_two_week_high,price_to_sales_trailing_12_months,fifty_day_average,two_hundred_day_average,trailing_annual_dividend_rate,trailing_annual_dividend_yield,book_value,price_to_book,total_cash,total_cash_per_share,total_debt,earnings_quarterly_growth,revenue_growth,gross_margins,ebitda_margins,return_on_assets,return_on_equity,gross_profits,total_assets_approx,asset_turnover,earnings_growth_rate,dividend_payout_ratio,equity,debt_to_equity,roi,roce
0,ABCB4.SA,Banco ABC Brasil S.A.,Financial Services,Banks - Regional,4265434000.0,14773390000.0,1941779000.0,0.41576,0.38826,1.56,0.679,0.0,4.069768,4.706601,92300.0,747165.0,15.85,21.99,2.196663,19.3382,18.14667,1.55,0.080687,24.518,0.785138,7774306000.0,35.162,18298460000.0,0.001,0.003,0.0,0.0,0.0153,0.1568,1973086000.0,7774306000.0,0.249769,0.1,155000.0,-10524160000.0,-1.73871,0.131438,0.0
1,AGRO3.SA,BrasilAgro - Companhia Brasileira de Proprieda...,Consumer Defensive,Farm Products,2466480000.0,2912933000.0,1249437000.0,0.21493,0.25031,3.21,0.432,264892000.0,9.450382,6.332481,298100.0,666692.0,22.29,32.71,1.974073,27.0106,25.58635,3.24,0.132029,22.237,1.11346,383837000.0,3.885,872075000.0,6.801,0.671,0.25252,0.21201,0.03839,0.1217,315504000.0,383837000.0,3.255124,680.1,47.640053,-488238000.0,-1.786168,0.428927,0.079343
2,RAIL3.SA,Rumo S.A.,Industrials,Railroads,42288820000.0,55243050000.0,10317460000.0,0.07639,0.33544,0.07,0.227,4522541000.0,54.309525,21.72381,5733400.0,14644522.0,16.21,24.44,4.098764,22.5852,20.95235,0.066,0.002993,8.334,2.736981,7656040000.0,4.132,21843200000.0,3.935,0.121,0.34493,0.43834,0.04252,0.05163,3146360000.0,7656040000.0,1.347623,393.5,1.677255,-14187160000.0,-1.539646,0.186765,0.070519
3,ALPA3.SA,Alpargatas S.A.,Consumer Cyclical,Footwear & Accessories,5309793000.0,6482982000.0,4022153000.0,-0.05671,-0.06434,0.4,0.571,-198000.0,0.0,0.0,1100.0,3953.0,7.27,17.8,1.320137,8.7146,9.6354,0.0,0.0,7.867,1.008008,414288000.0,0.614,1550341000.0,0.0,-0.127,0.43246,-5e-05,-0.0091,-0.04153,1968303000.0,414288000.0,9.708591,0.0,0.0,-1136053000.0,-1.364673,0.620417,-2.9e-05
4,ALPA4.SA,Alpargatas S.A.,Consumer Cyclical,Footwear & Accessories,5350758000.0,6395236000.0,4022153000.0,-0.05671,-0.06434,0.43,0.571,-198000.0,0.0,14.555555,1132100.0,5605825.0,6.81,22.51,1.330322,8.3228,9.2729,0.0,0.0,7.867,0.99911,414288000.0,0.614,1550341000.0,0.0,-0.127,0.43246,-5e-05,-0.0091,-0.04153,1968303000.0,414288000.0,9.708591,0.0,0.0,-1136053000.0,-1.364673,0.62893,-2.9e-05


In [5]:
df_fundamentals_numeric_cols = df_fundamentals_book.select_dtypes(include=["int", "number", "float64"])
df_fundamentals_numeric_cols.head(5)

Unnamed: 0,market_cap,enterprise_value,total_revenue,profit_margins,operating_margins,dividend_rate,beta,ebitda,trailing_pe,forward_pe,volume,average_volume,fifty_two_week_low,fifty_two_week_high,price_to_sales_trailing_12_months,fifty_day_average,two_hundred_day_average,trailing_annual_dividend_rate,trailing_annual_dividend_yield,book_value,price_to_book,total_cash,total_cash_per_share,total_debt,earnings_quarterly_growth,revenue_growth,gross_margins,ebitda_margins,return_on_assets,return_on_equity,gross_profits,total_assets_approx,asset_turnover,earnings_growth_rate,dividend_payout_ratio,equity,debt_to_equity,roi,roce
0,4265434000.0,14773390000.0,1941779000.0,0.41576,0.38826,1.56,0.679,0.0,4.069768,4.706601,92300.0,747165.0,15.85,21.99,2.196663,19.3382,18.14667,1.55,0.080687,24.518,0.785138,7774306000.0,35.162,18298460000.0,0.001,0.003,0.0,0.0,0.0153,0.1568,1973086000.0,7774306000.0,0.249769,0.1,155000.0,-10524160000.0,-1.73871,0.131438,0.0
1,2466480000.0,2912933000.0,1249437000.0,0.21493,0.25031,3.21,0.432,264892000.0,9.450382,6.332481,298100.0,666692.0,22.29,32.71,1.974073,27.0106,25.58635,3.24,0.132029,22.237,1.11346,383837000.0,3.885,872075000.0,6.801,0.671,0.25252,0.21201,0.03839,0.1217,315504000.0,383837000.0,3.255124,680.1,47.640053,-488238000.0,-1.786168,0.428927,0.079343
2,42288820000.0,55243050000.0,10317460000.0,0.07639,0.33544,0.07,0.227,4522541000.0,54.309525,21.72381,5733400.0,14644522.0,16.21,24.44,4.098764,22.5852,20.95235,0.066,0.002993,8.334,2.736981,7656040000.0,4.132,21843200000.0,3.935,0.121,0.34493,0.43834,0.04252,0.05163,3146360000.0,7656040000.0,1.347623,393.5,1.677255,-14187160000.0,-1.539646,0.186765,0.070519
3,5309793000.0,6482982000.0,4022153000.0,-0.05671,-0.06434,0.4,0.571,-198000.0,0.0,0.0,1100.0,3953.0,7.27,17.8,1.320137,8.7146,9.6354,0.0,0.0,7.867,1.008008,414288000.0,0.614,1550341000.0,0.0,-0.127,0.43246,-5e-05,-0.0091,-0.04153,1968303000.0,414288000.0,9.708591,0.0,0.0,-1136053000.0,-1.364673,0.620417,-2.9e-05
4,5350758000.0,6395236000.0,4022153000.0,-0.05671,-0.06434,0.43,0.571,-198000.0,0.0,14.555555,1132100.0,5605825.0,6.81,22.51,1.330322,8.3228,9.2729,0.0,0.0,7.867,0.99911,414288000.0,0.614,1550341000.0,0.0,-0.127,0.43246,-5e-05,-0.0091,-0.04153,1968303000.0,414288000.0,9.708591,0.0,0.0,-1136053000.0,-1.364673,0.62893,-2.9e-05


### Dataset Infos

In [None]:
df_fundamentals_book.shape

In [None]:
df_fundamentals_book.info()

In [None]:
df_fundamentals_book.describe()

In [None]:
df_fundamentals_book.nunique()

#### Correlations

In [None]:
fundamentals_corr = df_fundamentals_numeric_cols
fundamentals_corr.columns

In [None]:
fundamentals_corr = fundamentals_corr.corr()
fundamentals_corr.head()

In [None]:
color_scale = [
    [0, 'rgb(150, 245, 231)'],  # Cor mais clara
    [0.5, 'rgb(100, 195, 181)'], # Cor exata
    [1, 'rgb(50, 145, 131)']     # Cor mais escura
]
fig = px.imshow(fundamentals_corr, text_auto=True, aspect="auto", width=3000, height=1000, template="plotly_dark", title="Mapa de Calor: Correlação entre as variaveis", color_continuous_scale=color_scale)
fig.show()

##### Dispertion Matrix

In [None]:
fig = px.scatter_matrix(fundamentals_corr, dimensions=fundamentals_corr.columns, title='Gráficos de Dispersão para Todas as Colunas', template="plotly_dark",)
fig.update_traces(marker=dict(color='rgb(100, 195, 181)'))

fig.update_layout(width=4000, height=8000, grid=dict(xgap=0.1, ygap=0.1))

fig.show()


##### <font color="yellow"> **Strong Positive Correlations:** </font> 

**trailing_annual_dividend_rate vs. trailing_annual_dividend_yield**: Correlation of **0.998640**.

* Reason: Both variables are related to annual dividend metrics, with an increase in the dividend rate leading to a higher dividend yield.

* Effect: The high correlation reflects this strong linear relationship.

**dividend_rate vs. trailing_annual_dividend_yield**: Correlation of **0.994015**.

* Reason: Increases in the dividend rate (dividend_rate) tend to result in a higher dividend yield (trailing_annual_dividend_yield).

* Effect: The positive correlation reflects this linear relationship.

**dividend_rate vs. trailing_annual_dividend_rate**: Correlation of **0.994015**.

* Reason: Both variables are directly related to dividend rates.

* Effect: The strong correlation indicates that an increase in the dividend rate (dividend_rate) is directly associated with an increase in the trailing annual dividend rate.

**fifty_two_week_low vs. fifty_two_week_high**: Correlation of **0.988344**.

* Reason: Both variables represent the extremes of a 52week price range, with an increase in the minimum price affecting the maximum price.

* Effect: The high correlation reflects this significant positive linear relationship.

**volume vs. fifty_two_week_low**: Correlation of **0.983632**.

* Reason: Increases in trading volume can influence the 52week low price.

* Effect: The positive correlation reflects the relationship between an increase in trading volume and an increase in the low price.

**total_revenue vs. gross_profits**: Correlation of **0.982982**.

* Reason: Total revenue (total_revenue) and gross profits are directly related to a company's financial performance.

* Effect: The strong correlation shows that an increase in revenue tends to result in higher gross profits.

**total_revenue vs. ebitda**: Correlation of **0.974159**.

* Reason: A company's total revenue (total_revenue) is linked to its earnings before interest, taxes, depreciation, and amortization (EBITDA).

* Effect: The positive correlation reflects the relationship between an increase in revenue and higher EBITDA.

**fifty_two_week_high vs. fifty_day_average**: Correlation of **0.968563**.

* Reason: The 52week high price and the 50day average price are related to the recent performance of an asset.

* Effect: The positive correlation indicates that an increase in the 50day average price can affect the 52week high price.

**market_cap vs. total_revenue**: Correlation of **0.966357**.

* Reason: A company's market capitalization is related to its financial performance represented by total revenue.

* Effect: Larger companies in terms of market capitalization generally generate more revenue, justifying the positive correlation.

**market_cap vs. enterprise_value**: Correlation of **0.963003**.


 * Reason: A company's market capitalization is related to its enterprise value, considering debt.


 * Effect: The positive correlation indicates that larger companies in terms of market capitalization may also have a higher enterprise value, considering debt.

**fifty_day_average vs. two_hundred_day_average**: Correlation of **0.961965**.


 * Reason: Both moving averages are often used in technical analysis to assess price trends.


 * Effect: The strong correlation suggests that changes in the 50day average can affect the 200day average.

**fifty_two_week_high vs. two_hundred_day_average**: Correlation of **0.950189**.


 * Reason: The 52week high price and the 200day average are significant indicators in technical analysis.


 * Effect: The correlation shows that the performance of the 52week high price is linked to the 200day average.

**ebitda vs. gross_profits**: Correlation of **0.939498**.


 * Reason: Earnings before interest, taxes, depreciation, and amortization (EBITDA) and gross profits are related to a company's profitability.


 * Effect: The positive correlation reflects that an increase in EBITDA generally results in higher gross profits.

**fifty_two_week_low vs. two_hundred_day_average**: Correlation of **0.935803**.


 * Reason: The 52week low price and the 200day average are used in technical analysis to identify support and resistance levels.


 * Effect: The positive correlation indicates that the 52week low price is related to the 200day average.

**fifty_two_week_low vs. fifty_day_average**: Correlation of **0.933126**.


 * Reason: Both variables are used to assess shortterm price trends.


 * Effect: The positive correlation reflects the relationship between the 52week low price and the 50day average price.

**dividend_rate vs. trailing_annual_dividend_rate**: Correlation of **0.932857**.


 * Reason: Both variables are related to dividend rates.


 * Effect: The positive correlation suggests that an increase in the dividend rate (dividend_rate) is related to an increase in the trailing annual dividend rate.

**volume vs. fifty_two_week_high**: Correlation of **0.926501**.


 * Reason: Increased trading volume can influence the 52week high price.


 * Effect: The positive correlation reflects the relationship between an increase in trading volume and an increase in the high price.

**market_cap vs. ebitda**: Correlation of **0.907940**.


 * Reason: A company's market capitalization is related to its earnings before interest, taxes, depreciation, and amortization (EBITDA).


 * Effect: The positive correlation shows that larger companies in terms of market capitalization may also have higher EBITDA.

**total_revenue vs. enterprise_value**: Correlation of **0.881551**.


 * Reason: A company's total revenue (total_revenue) is related to its enterprise value, considering debt.


 * Effect: The positive correlation reflects that companies with higher total revenue may also have a higher enterprise value, considering their financial obligations.

**volume vs. fifty_day_average**: Correlation of **0.852918**.


 * Reason: Increased trading volume can influence the 50day average price.


 * Effect: The positive correlation shows that an increase in trading volume can affect the 50day average price.

**average_volume vs. two_hundred_day_average**: Correlation of **0.808796**.


 * Reason: Average trading volume and the 200day average are used in technical analysis to assess price trends.


 * Effect: The positive correlation suggests that average trading volume is related to the 200day average.

**total_cash vs. gross_profits**: Correlation of **0.790580**.


 * Reason: Total cash and gross profits are related to a company's liquidity and profitability.


 * Effect: The positive correlation reflects that companies with more cash generally have higher gross profits.

**average_volume vs. fifty_day_average**: Correlation of **0.784489**.


 * Reason: Average trading volume and the 50day average price are used in technical analysis.
 * Effect: The positive correlation suggests that average trading volume influences the 50day average price.

**enterprise_value vs. ebitda**: Correlation of **0.775812**.


 * Reason: Enterprise value is related to earnings before interest, taxes, depreciation, and amortization (EBITDA).


 * Effect: The positive correlation indicates that enterprise value is linked to EBITDA.

**average_volume vs. fifty_two_week_low**: Correlation of **0.759226**.


 * Reason: Average trading volume and the 52week low price are used in technical analysis to assess support and resistance.


 * Effect: The positive correlation suggests that average trading volume is related to the 52week low price.

**gross_profits vs. gross_margins**: Correlation of **0.755418**.


 * Reason: Gross profits and gross profit margins are related to a company's operational efficiency.


 * Effect: The positive correlation reflects that an increase in gross profits generally results in higher gross profit margins.

**volume vs. two_hundred_day_average**: Correlation of **0.744661**.


 * Reason: Increased trading volume can influence the 200day average.


 * Effect: The positive correlation suggests that trading volume affects the 200day average.

**total_revenue vs. total_debt**: Correlation of **0.707458**.


 * Reason: A company's total revenue (total_revenue) is related to its total debt.


 * Effect: The positive correlation indicates that companies with higher total revenue may also have more total debt.

##### <font color="purple"> **Strong Negative Correlations:** </font> 

1. **enterprise_value vs. gross_margins**: Correlation of **0.711904**.

* Reason: Higher gross margins can lead to lower enterprise values.

* Effect: The negative correlation indicates that companies with higher gross margins tend to have lower enterprise values.

2. **profit_margins vs. beta**: Correlation of **0.779440**.

* Reason: Higher profit margins can lead to lower beta values.

* Effect: The negative correlation suggests that companies with higher profit margins tend to have lower beta values.

3. **operating_margins vs. total_cash_per_share**: Correlation of **0.801788**.

* Reason: Companies with lower operating margins may have higher total cash per share.

* Effect: The negative correlation indicates that lower operating margins are associated with higher total cash per share.

4. **beta vs. profit_margins**: Correlation of **0.779440**.

* Reason: Lower beta values can be associated with higher profit margins.

* Effect: The negative correlation suggests that companies with lower beta values tend to have higher profit margins.

5. **beta vs. fifty_two_week_low**: Correlation of **0.717153**.

* Reason: Lower beta values are associated with higher 52week low prices.

* Effect: The negative correlation indicates that companies with lower beta values tend to have higher 52week low prices.

6. **beta vs. gross_margins**: Correlation of **0.747430**.

* Reason: Companies with higher gross margins tend to have lower beta values.

* Effect: The negative correlation suggests that higher gross margins are associated with lower beta values.

7. **beta vs. return_on_equity**: Correlation of **0.820595**.

* Reason: Higher return on equity is associated with lower beta values.

* Effect: The negative correlation indicates that companies with higher return on equity tend to have lower beta values.

8. **fifty_two_week_low vs. beta**: Correlation of **0.717153**.

* Reason: Lower beta values are associated with higher 52week low prices.
* Effect: The negative correlation suggests that companies with lower beta values tend to have higher 52week low prices.

9. **total_cash vs. gross_margins**: Correlation of **0.767081**.

* Reason: Companies with lower gross margins tend to have higher total cash.

* Effect: The negative correlation indicates that lower gross margins are associated with higher total cash.

10. **total_cash_per_share vs. operating_margins**: Correlation of **0.801788**.


 * Reason: Companies with lower operating margins tend to have higher total cash per share.


 * Effect: The negative correlation indicates that lower operating margins are associated with higher total cash per share.

11. **total_debt vs. gross_margins**: Correlation of **0.754986**.


 * Reason: Companies with higher gross margins tend to have lower total debt.


 * Effect: The negative correlation suggests that higher gross margins are associated with lower total debt.

12. **gross_margins vs. enterprise_value**: Correlation of **0.711904**.


 * Reason: Higher gross margins can lead to lower enterprise values.


 * Effect: The negative correlation indicates that companies with higher gross margins tend to have lower enterprise values.

13. **gross_margins vs. beta**: Correlation of **0.747430**.


 * Reason: Companies with higher gross margins tend to have lower beta values.


 * Effect: The negative correlation suggests that higher gross margins are associated with lower beta values.

14. **gross_margins vs. total_cash**: Correlation of **0.767081**.


 * Reason: Companies with lower gross margins tend to have higher total cash.


 * Effect: The negative correlation indicates that lower gross margins are associated with higher total cash.

15. **gross_margins vs. total_debt**: Correlation of **0.754986**.


 * Reason: Companies with higher gross margins tend to have lower total debt.


 * Effect: The negative correlation suggests that higher gross margins are associated with lower total debt.

16. **return_on_equity vs. beta**: Correlation of **0.820595**.


 * Reason: Higher return on equity is associated with lower beta values.


 * Effect: The negative correlation indicates that companies with higher return on equity tend to have lower beta values.

#### Outliers

In [None]:
df = df_fundamentals_book.select_dtypes(include=["float64"])
fig = sp.make_subplots(rows=5, cols=8, subplot_titles=df.columns, shared_yaxes=True, horizontal_spacing=0.01, vertical_spacing=0.1)

for row in range(1, 6):

    for col in range(1, 9):

        col_name = df.columns[col]

        trace = go.Box(y=df[col_name], name=col_name, marker=dict(color='rgb(100, 195, 181)'))

        fig.add_trace(trace, row=row, col=col)

fig.update_layout(title_text="<b>Boxplot: Outliers", height=800, width=2500, template="plotly_dark")

fig.show()

##### Detailed View of Outliers by Column

**market_cap**: Outliers may indicate companies with market capitalizations significantly larger or smaller than the majority, reflecting the presence of market giants or very small companies.

**enterprise_value**: Outliers may suggest companies with exceptional enterprise values, possibly due to substantial debt or valuable assets.

**total_revenue**: Discrepant values may indicate revenues significantly above or below the average, representing extraordinary variations in financial performance.

**profit_margins and operating_margins**: Outliers in these columns may represent companies with exceptionally high or low profit or operating margins, reflecting notable financial efficiency or inefficiency.

**dividend_rate**: Outliers may indicate the presence or absence of dividends, with extremely high or low values reflecting unusual dividend payment strategies.

**beta**: Outliers may point to stocks with exceptionally high or low betas, reflecting notable volatility or stability relative to the market.

**ebitda**: Discrepant values may represent profits before interest, taxes, depreciation, and amortization significantly different from the average, suggesting extraordinary financial events.

**trailing_pe and forward_pe**: Outliers in these columns may indicate stocks with very high or low pricetoearnings ratios, reflecting possible market deviations.

**volume and average_volume**: Discrepant values may represent stocks traded in volumes much above or below the average, reflecting unusual investor interest.

**fifty_two_week_low and fifty_two_week_high**: Outliers in these columns may indicate stocks that have reached notable price extremes in the past 52 weeks, reflecting volatility or exceptional performance.

**price_to_sales_trailing_12_months**: Discrepant values may suggest stocks being notably evaluated in relation to sales, reflecting market distortions.

**fifty_day_average and two_hundred_day_average**: Outliers in these columns may indicate exceptionally high or low moving averages, reflecting unusual price trends.

**trailing_annual_dividend_rate and trailing_annual_dividend_yield**: Outliers may represent companies paying exceptionally high or low dividends compared to the market average.

**book_value**: Discrepant values may suggest companies with exceptional book values, reflecting the presence of significant assets or high liabilities.

**price_to_book**: Outliers may indicate stocks that are notably evaluated in relation to book value.

**total_cash and total_cash_per_share**: Outliers may represent companies with large cash reserves or substantially different cash values compared to the average.

**total_debt**: Discrepant values may suggest substantial debt or the absence of debt, reflecting different financial strategies.

**earnings_quarterly_growth and revenue_growth**: Outliers may indicate exceptional earnings or revenue growth rates, suggesting remarkable performance.

**gross_margins and ebitda_margins**: Discrepant values in these columns may represent significantly high or low gross or EBITDA margins, reflecting operational efficiency or notable inefficiency.

**return_on_assets and return_on_equity**: Outliers may indicate exceptional returns on assets or equity, reflecting remarkable financial performance.

**gross_profits and earnings_growth_rate**: Outliers may represent exceptional gross profits or earnings growth rates.

**dividend_payout_ratio and roi**: Discrepant values in these columns may indicate notable dividend payout policies or returns on investment.


##### Macro View of Outliers in Financial Data

**Company Size:** Outliers in market values, enterprise value, and revenue may reflect large conglomerates in diversified sectors or small startups.

**Financial Efficiency:** Outliers in profit margins and operating margins may indicate exceptionally efficient or inefficient companies in various sectors.

**Dividend Policies:** Outliers in dividend rates may represent distinct dividend payment strategies among companies in different sectors.

**Market Volatility:** Exceptional betas may suggest the unique volatility of stocks in specific sectors.

**Financial Events:** Outliers in metrics such as EBITDA may reflect nonrecurring financial events, such as mergers, acquisitions, or restructurings.

**Investor Interest:** Stock volume and moving averages may be influenced by investor interest in specific sectors.

**Business Cycles:** Sectors with distinct economic cycles may lead to significant variations in financial performance.

**Debt Policies:** Different sectors have varied approaches to debt, resulting in varying debt values.

**Operational Performance:** Outliers in profit growth metrics, margins, and returns may reflect the unique performance of companies in different sectors.


#### Histograms and Distribution

In [None]:
import plotly.subplots as sp
import plotly.graph_objects as go

df = df_fundamentals_book.select_dtypes(include=["float64"])
fig = sp.make_subplots(rows=5, cols=8, subplot_titles=df.columns, shared_yaxes=True, horizontal_spacing=0.01, vertical_spacing=0.1)

for row in range(1, 6):
    for col in range(1, 9):
        subplot_num = (row - 1) * 8 + col
        if subplot_num <= len(df.columns):
            col_name = df.columns[subplot_num - 1]
            trace = go.Histogram(x=df[col_name], name=col_name, marker=dict(color = 'rgb(100, 195, 181)'), showlegend=False)
            fig.add_trace(trace, row=row, col=col)

fig.update_layout(title_text="Histogram: Distribution", height=800, width=2500, template="plotly_dark")

fig.show()


**market_cap (Capitalização de Mercado):**

- Most companies have a market capitalization between **1.257716e+06** and **4.832232e+10**.

**enterprise_value (Valor Empresarial):**

- Most companies have an enterprise value between **-6.435227e+09** and **8.995235e+10**.

**total_revenue (Receita Total):**

- Most companies have total revenue between **-4.242400e+07** and **5.811812e+10**.

**profit_margins (Margens de Lucro):**

- Most companies have profit margins between **-2.162070** and **0.229886**.

**operating_margins (Margens Operacionais):**

- Most companies have operating margins close to **0**.

**dividend_rate (Taxa de Dividendos):**

- Most companies do not pay dividends.

**beta:**

- Most companies have a beta below **0.25**.

**ebitda:**

- Most companies have an EBITDA between **-1.011200e+09** and **2.910870e+11**.

**trailing_pe (P/E Ratio Atual):**

- Most companies have a P/E ratio between **0** and **22**.

**forward_pe (P/E Ratio Futuro):**

- Most companies have a future P/E ratio between **-10.142858** and **68.823524**.

**volume:**

- Most companies have trading volumes below **9450820.0**.

**average_volume:**

- Most companies have average trading volumes below **10634197.9**.

**fifty_two_week_low (Preço Mínimo das Ações em 52 Semanas):**

- Most companies have a 52-week low stock price between **0.100** and **24.295**.

**fifty_two_week_high (Preço Máximo das Ações em 52 Semanas):**

- Most companies have a 52-week high stock price between **1.570** and **6530.000**.

**price_to_sales_trailing_12_months (Price/Sales Ratio em 12 Meses):**

- Most companies have a Price/Sales ratio between **-1.871929** and **21.706719**.

**fifty_day_average (Média de 50 Dias das Ações):**

- - Most companies have a 50-day average stock price below **71.79748**.

**two_hundred_day_average (Média de 200 Dias das Ações):**

- Most companies have a 200-day average stock price below **268.05739**.

**trailing_annual_dividend_rate (Taxa de Dividendos Anual Retroativa):**

- Most companies have a trailing annual dividend rate below **5.7786**.

**trailing_annual_dividend_yield (Yield de Dividendos Anual Retroativo):**

- Most companies have a yield of dividend yield retroativo inferior to **0.026773**.

**book_value (Valor Contábil):**

- Most companies have a book value between **-3962.8900** and **21.3875**.

**price_to_book (Price/Book Ratio):**

- Most companies have a Price/Book ratio below **54.66667**.

**total_cash (Total de Caixa):**

- Most companies have total cash between **6.000** and **3.943251e+10**.

**total_cash_per_share (Total de Caixa por Ação):**

- Most companies have total cash per share below **54.2057**.

**total_debt (Dívida Total):**

- Most companies have total debt between **0** and **8.957765e+11**.

**earnings_quarterly_growth (Crescimento dos Lucros Trimestrais):**

- Most companies have quarterly earnings growth between **-0.9990** and **1.8895**.

**revenue_growth (Crescimento da Receita):**

- Most companies have revenue growth between **-0.9830** and **3.6830**.

**gross_margins (Margens Brutas):**

- Most companies have gross margins close to **0**.

**ebitda_margins (Margens EBITDA):**

- Most companies have EBITDA margins close to **0**.

**return_on_assets (ROA):**

- Most companies have a positive **ROA**.

**return_on_equity (ROE):**

- Most companies have a positive **ROE**.

**gross_profits (Lucros Brutos):**

- Most companies have gross profits between **-4.981650e+08** and **3.296165e+11**.

**earnings_growth_rate (Taxa de Crescimento de Ganhos):**

- Most companies have an earnings growth rate between **-99.90** and **2788.60**.

**dividend_payout_ratio (Taxa de Pagamento de Dividendos):**

- Most companies have a dividend payout ratio close to **0**.

**roi (Return on Investment):**

- Most companies have an **ROI** between **-8.232338** and **34.366094**.


### Sector Analisys

#### Quantidade de empresas por setor

In [None]:
sector_counts = df_fundamentals_book['sector'].value_counts(ascending=True)
sector_counts

In [None]:
fig = px.bar(x=sector_counts.index, y=sector_counts.values, title="Número de Empresas por Setor",color_discrete_sequence=['rgb(100, 195, 181)']*len(sector_counts), template="plotly_dark", hover_name=sector_counts.index,)
fig.update_traces(text=sector_counts.values.astype(str), textposition="inside",textfont=dict(color="white"))
fig.update_layout(xaxis_title="Setor", yaxis_title="Número de Empresas")
fig.show()

#### Analise de Outliers

In [None]:
num_columns = len(df_fundamentals_numeric_cols)
num_rows = num_columns // 3 + (num_columns % 3 > 0)

subplot_titles = [str(col) for col in df_fundamentals_numeric_cols]

fig = sp.make_subplots(rows=num_rows, cols=3, subplot_titles=subplot_titles)

for i, column in enumerate(df_fundamentals_numeric_cols, start=1):

    row = (i - 1) // 3 + 1
    col = (i - 1) % 3 + 1
    
    trace = go.Box(y=df_fundamentals_book[column], name=column, marker_color='lightseagreen', boxpoints='outliers', jitter=0.7, hoverinfo='y+text',text=df_fundamentals_book['long_name'])

    fig.add_trace(trace, row=row, col=col)

fig.update_layout(title_text="Boxplot de Variáveis Numéricas por Setor", height=300*num_rows, showlegend=False, template="plotly_dark")
fig.show()


#### Distribuição

In [None]:
subplot_titles = [str(col) for col in df_fundamentals_numeric_cols]
columns_per_row = 3
num_rows = len(df_fundamentals_numeric_cols) // columns_per_row + (len(df_fundamentals_numeric_cols) % columns_per_row > 0)

fig = sp.make_subplots(rows=num_rows, cols=columns_per_row, subplot_titles=subplot_titles)

for i, column in enumerate(df_fundamentals_numeric_cols):

    row = i // columns_per_row + 1
    col = i % columns_per_row + 1
    
    fig.add_trace(go.Histogram(x=df_fundamentals_book[column],name=column, marker_color='lightseagreen'),row=row,col=col)

fig.update_layout(title = 'Histogramas por setor', height=300 * num_rows, showlegend=False, template="plotly_dark")
fig.show()

#### Dispersão entre pares de colunas

In [None]:
num_cols = 2
combinations = list(itertools.combinations(df_fundamentals_numeric_cols.columns, 2))
num_rows = (len(combinations) + num_cols - 1) // num_cols

fig = sp.make_subplots(rows=num_rows, cols=num_cols, subplot_titles=[f'{col1} vs {col2}' for col1, col2 in combinations])

for i, (col1, col2) in enumerate(combinations):
    row = i // num_cols + 1
    col = i % num_cols + 1

    scatter_fig = px.scatter(df_fundamentals_numeric_cols, x=col1, y=col2, template="plotly_dark")
    scatter_traces = scatter_fig['data']

    for trace in scatter_traces:
        fig.add_trace(trace, row=row, col=col)

fig.update_layout(title='Matriz de Dispersão', height=200 * num_rows, showlegend=False, template="plotly_dark")
fig.show()

#### Market Cap

In [None]:
sector_means = df_fundamentals_numeric_cols.groupby(df_fundamentals_book['sector']).mean()
sector_means = sector_means.sort_values(by=['market_cap', 'total_revenue'], ascending=[True, False])
sector_means.head(5)

In [None]:
fig = px.bar(sector_means, x=sector_means.index, y='market_cap', title='Market Cap Médio por Setor', color_discrete_sequence=['rgb(100, 195, 181)'], template="plotly_dark", hover_name=sector_means.index)
formatted_market_cap = [f"{x:.2f}B" for x in (sector_means['market_cap'] / 1e9)]
fig.update_traces(text=formatted_market_cap, textposition="outside",textfont=dict(color="white"))
fig.update_layout(xaxis_title="Setor", yaxis_title="Market Cap Médio", height = 520)
fig.show()

#### Lucratividade Por Setor

In [None]:
sector_averages = df_fundamentals_numeric_cols.groupby(df_fundamentals_book['sector']).mean()
sector_averages = sector_averages.sort_values(by=["operating_margins"])

fig_op_margin = px.bar(x=sector_averages.index, y=sector_averages['operating_margins'], color_discrete_sequence=['rgb(100, 195, 181)'], title='Margem Operacional', hover_name=sector_averages.index)
fig_op_margin.update_traces(text=sector_averages.operating_margins.round(2)*100, textposition="outside",textfont=dict(color="white"))
fig_op_margin.update_layout(title='Margem Operacional Média por Setor', xaxis_title='Setor', yaxis_title='Margem Operacional (%)', template = "plotly_dark")
fig_op_margin.show()

In [None]:
fig_net_margin = go.Figure()
fig_net_margin.add_trace(go.Bar(x=sector_averages.index, y=sector_averages['profit_margins'], name='Margem Líquida', marker_color='rgb(100, 195, 181)'))
fig_net_margin.update_traces(text=sector_averages.profit_margins, textposition="outside",textfont=dict(color="white"))
fig_net_margin.update_layout(title='Margem Líquida Média por Setor', xaxis_title='Setor', yaxis_title='Margem Líquida (%)', template='plotly_dark')

fig_net_margin.show()

In [None]:
fig_roce = go.Figure()
fig_roce.add_trace(go.Bar(x=sector_averages.index, y=sector_averages['roce'], name='ROCE', marker_color='rgb(100, 195, 181)',text=sector_averages['roce'].apply(lambda x: f'{x:.2f}'), textposition='auto', textfont=dict(color="white")))
fig_roce.update_layout(title='Retorno sobre o Capital Empregado (ROCE) por Setor', xaxis_title='Setor', yaxis_title='ROCE (%)', template='plotly_dark')
fig_roce.show()

#### Análise de Dívida por Setor

In [None]:
df['total_debt'] = pd.to_numeric(df_fundamentals_book['total_debt'], errors='coerce')
df['ebitda'] = pd.to_numeric(df_fundamentals_book['ebitda'], errors='coerce')
df['total_equity'] = pd.to_numeric(df_fundamentals_book['book_value'], errors='coerce')  # Presumindo que 'book_value' representa o valor patrimonial

sector_debt_ebitda = df.groupby(df_fundamentals_book['sector'])['total_debt'].sum() / df.groupby(df_fundamentals_book['sector'])['ebitda'].sum()
sector_debt_equity = df.groupby(df_fundamentals_book['sector'])['total_debt'].sum() / df.groupby(df_fundamentals_book['sector'])['total_equity'].sum()

fig = sp.make_subplots(rows=2, cols=1, subplot_titles=("Dívida/EBITDA por Setor", "Dívida/Capital Próprio por Setor"))

fig.add_trace(go.Bar(x=sector_debt_ebitda.index, y=sector_debt_ebitda, name='Dívida/EBITDA', marker_color='rgb(100, 195, 181)', text=["R$ {:,.2f}".format(val) for val in sector_debt_ebitda], textposition='outside', textfont=dict(color="white")), row=1, col=1)
fig.add_trace(go.Bar(x=sector_debt_equity.index, y=sector_debt_equity, name='Dívida/Capital Próprio', marker_color='rgb(100, 195, 181)', text=["R$ {:,.2f}B".format(val) for val in sector_debt_equity], textposition='outside', textfont=dict(color="white")), row=2, col=1)

fig.update_layout(title_text="Análise de Dívida por Setor", height=1100, showlegend=False, template='plotly_dark')

fig.show()


#### Distribuição de Dividendos por Setor

In [None]:
sector_dividend_rate_avg = df_fundamentals_book.groupby('sector')['dividend_rate'].mean()
sector_dividend_yield_avg = df_fundamentals_book.groupby('sector')['trailing_annual_dividend_yield'].mean()

fig = sp.make_subplots(rows=2, cols=1, subplot_titles=("Taxa de Dividendos Média por Setor", "Rendimento de Dividendos Médio por Setor"))

fig.add_trace(go.Bar(x=sector_dividend_rate_avg.index, y=sector_dividend_rate_avg, name='Taxa de Dividendos', marker_color='rgb(100, 195, 181)', text=sector_dividend_rate_avg.apply(lambda x: f'R${x:,.2f}'), textposition='outside', textfont=dict(color="white")), row=1, col=1)
fig.add_trace(go.Bar(x=sector_dividend_yield_avg.index, y=sector_dividend_yield_avg, name='Rendimento de Dividendos', marker_color='rgb(100, 195, 181)', text=sector_dividend_yield_avg.apply(lambda x: f'{x:.2f}%'), textposition='outside', textfont=dict(color="white")),row=2, col=1)

fig.update_layout(title_text="Distribuição de Dividendos por Setor", height=1000, showlegend=False, template="plotly_dark")

fig.show()


#### Valuation por Setor

In [None]:
sector_pe_avg = df_fundamentals_book.groupby('sector')['trailing_pe'].mean().sort_values()
sector_pb_avg = df_fundamentals_book.groupby('sector')['price_to_book'].mean().sort_values()

fig = sp.make_subplots(rows=2, cols=1, subplot_titles=("P/E Ratio Médio por Setor", "P/B Ratio Médio por Setor"))
fig.add_trace(go.Bar(x=sector_pe_avg.index, y=sector_pe_avg, name='P/E Ratio', marker_color='rgb(100, 195, 181)', text=sector_pe_avg.apply(lambda x: f"{x:.2f}x"), textposition='outside', textfont=dict(color="white")), row=1, col=1)
fig.add_trace(go.Bar(x=sector_pb_avg.index, y=sector_pb_avg, name='P/B Ratio', marker_color='rgb(100, 195, 181)', text=sector_pb_avg.apply(lambda x: f"{x:.2f}x"), textposition='outside', textfont=dict(color="white")), row=2, col=1)
fig.update_layout(title_text="Valuation por Setor: P/E e P/B Ratios", height=1100, showlegend=False, template="plotly_dark")
fig.show()

#### Eficiência por Setor:

In [None]:
df['return_on_assets'] = pd.to_numeric(df_fundamentals_book['return_on_assets'], errors='coerce')
df['total_revenue'] = pd.to_numeric(df_fundamentals_book['total_revenue'], errors='coerce')
df['total_assets'] = pd.to_numeric(df_fundamentals_book['total_assets_approx'], errors='coerce')
df['asset_turnover'] = df_fundamentals_book['total_revenue'] / df_fundamentals_book['total_assets_approx']

sector_efficiency = df_fundamentals_book.groupby('sector').agg({'return_on_assets': 'mean', 'asset_turnover': 'mean'}).reset_index()
sector_efficiency.sort_values(by='return_on_assets', ascending=False, inplace=True)

fig1 = go.Figure()
fig1.add_trace(go.Bar(x=sector_efficiency['sector'], y=sector_efficiency['return_on_assets'], name='ROA', marker_color='lightseagreen', text=sector_efficiency['return_on_assets'].apply(lambda x: f"{x:.2f}%"), textposition='outside'))
fig1.update_layout(title='Eficiência por Setor: ROA',xaxis_tickangle=-45, barmode='group', template='plotly_dark', height=600)
fig1.show()

fig2 = go.Figure()
fig2.add_trace(go.Bar(x=sector_efficiency['sector'], y=sector_efficiency['asset_turnover'], name='Giro dos Ativos', marker_color='lightseagreen', text=sector_efficiency['asset_turnover'].apply(lambda x: f"{x:.2f}"), textposition='outside'))
fig2.update_layout(title='Eficiência por Setor: Giro dos Ativos', xaxis_tickangle=-45, barmode='group', template='plotly_dark', height=600)
fig2.show()

#### Análise de Risco por Setor

In [None]:
sector_volatility = df_fundamentals_book.groupby('sector')['return_on_assets'].std().reset_index()
sector_volatility.sort_values(by='return_on_assets', ascending=False, inplace=True)

fig1 = go.Figure()
fig1.add_trace(go.Bar(x=sector_volatility['sector'], y=sector_volatility['return_on_assets'], name='Volatilidade', marker_color='lightseagreen', text=sector_volatility['return_on_assets'].apply(lambda x: f"{x:.2f}"), textposition='outside'))
fig1.update_layout(title='Volatilidade (Desvio Padrão) do ROA por Setor', xaxis_tickangle=-45, template='plotly_dark', height=550)
fig1.show()


In [None]:

sector_debt_equity = df_fundamentals_book.groupby('sector')['debt_to_equity'].mean().reset_index()
sector_debt_equity.sort_values(by='debt_to_equity', ascending=False, inplace=True)

fig = go.Figure()
fig.add_trace(go.Bar(x=sector_debt_equity['sector'], y=sector_debt_equity['debt_to_equity'], name='Dívida/Patrimônio', marker_color='lightseagreen'))
fig.update_layout(title='Relação Dívida/Patrimônio por Setor', xaxis_tickangle=-45, template='plotly_dark')
fig.show()


#### Raking dos setore

In [None]:
df_fundamentals_book['investment_score'] = df_fundamentals_book[['profit_margins', 'operating_margins', 'return_on_equity']].mean(axis=1)
investment_score_mean = df_fundamentals_book.groupby('sector')['investment_score'].mean().sort_values(ascending=False).reset_index()

fig = px.bar(investment_score_mean, x='sector', y='investment_score', title='Escore de Investimento por Setor', labels={'investment_score': 'Escore de Investimento (%)', 'sector': 'Setor'}, template='plotly_dark', height=550)
fig.update_traces(texttemplate='%{y:.2%}', textposition='outside', marker_color='lightseagreen')
fig.show()