# **Exploratory Data Analisys: Sectors**

## **Initial Setup**

### Install Packages

In [None]:
%pip install pandas -q
%pip install plotly -q

### Import libs

In [1]:
import os
import itertools
import pandas as pd
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

### Pandas Config

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Create a file path default

In [3]:
file_path_book = str(Path(os.getcwd()).parent.parent.parent / 'data/book')

## **Fundamentals**

#### Load data

In [4]:
df_fundamentals_book = pd.read_csv(file_path_book + '/fundamentals_book.csv')
df_fundamentals_book.head(2)

Unnamed: 0,ticker,long_name,sector,industry,market_cap,enterprise_value,total_revenue,profit_margins,operating_margins,dividend_rate,beta,ebitda,trailing_pe,forward_pe,volume,average_volume,fifty_two_week_low,fifty_two_week_high,price_to_sales_trailing_12_months,fifty_day_average,two_hundred_day_average,trailing_annual_dividend_rate,trailing_annual_dividend_yield,book_value,price_to_book,total_cash,total_cash_per_share,total_debt,earnings_quarterly_growth,revenue_growth,gross_margins,ebitda_margins,return_on_assets,return_on_equity,gross_profits,total_assets_approx,asset_turnover,earnings_growth_rate,dividend_payout_ratio,equity,debt_to_equity,roi,roce
0,ABCB4.SA,Banco ABC Brasil S.A.,Financial Services,Banks - Regional,4265434000.0,14773390000.0,1941779000.0,0.41576,0.38826,1.56,0.679,0.0,4.069768,4.706601,92300.0,747165.0,15.85,21.99,2.196663,19.3382,18.14667,1.55,0.080687,24.518,0.785138,7774306000.0,35.162,18298460000.0,0.001,0.003,0.0,0.0,0.0153,0.1568,1973086000.0,7774306000.0,0.249769,0.1,155000.0,-10524160000.0,-1.73871,0.131438,0.0
1,AGRO3.SA,BrasilAgro - Companhia Brasileira de Proprieda...,Consumer Defensive,Farm Products,2466480000.0,2912933000.0,1249437000.0,0.21493,0.25031,3.21,0.432,264892000.0,9.450382,6.332481,298100.0,666692.0,22.29,32.71,1.974073,27.0106,25.58635,3.24,0.132029,22.237,1.11346,383837000.0,3.885,872075000.0,6.801,0.671,0.25252,0.21201,0.03839,0.1217,315504000.0,383837000.0,3.255124,680.1,47.640053,-488238000.0,-1.786168,0.428927,0.079343


In [5]:
df_fundamentals_numeric_cols = df_fundamentals_book.select_dtypes(include=['int', 'number', 'float64'])
df_fundamentals_numeric_cols.head(2)

Unnamed: 0,market_cap,enterprise_value,total_revenue,profit_margins,operating_margins,dividend_rate,beta,ebitda,trailing_pe,forward_pe,volume,average_volume,fifty_two_week_low,fifty_two_week_high,price_to_sales_trailing_12_months,fifty_day_average,two_hundred_day_average,trailing_annual_dividend_rate,trailing_annual_dividend_yield,book_value,price_to_book,total_cash,total_cash_per_share,total_debt,earnings_quarterly_growth,revenue_growth,gross_margins,ebitda_margins,return_on_assets,return_on_equity,gross_profits,total_assets_approx,asset_turnover,earnings_growth_rate,dividend_payout_ratio,equity,debt_to_equity,roi,roce
0,4265434000.0,14773390000.0,1941779000.0,0.41576,0.38826,1.56,0.679,0.0,4.069768,4.706601,92300.0,747165.0,15.85,21.99,2.196663,19.3382,18.14667,1.55,0.080687,24.518,0.785138,7774306000.0,35.162,18298460000.0,0.001,0.003,0.0,0.0,0.0153,0.1568,1973086000.0,7774306000.0,0.249769,0.1,155000.0,-10524160000.0,-1.73871,0.131438,0.0
1,2466480000.0,2912933000.0,1249437000.0,0.21493,0.25031,3.21,0.432,264892000.0,9.450382,6.332481,298100.0,666692.0,22.29,32.71,1.974073,27.0106,25.58635,3.24,0.132029,22.237,1.11346,383837000.0,3.885,872075000.0,6.801,0.671,0.25252,0.21201,0.03839,0.1217,315504000.0,383837000.0,3.255124,680.1,47.640053,-488238000.0,-1.786168,0.428927,0.079343


### Sector Analisys

#### Number of companies by sector

In [7]:
sector_counts = df_fundamentals_book['sector'].value_counts(ascending=True)

fig = px.bar(x=sector_counts.index, y=sector_counts.values, title='Number of companies by sector',color_discrete_sequence=['rgb(100, 195, 181)']*len(sector_counts), template='plotly_dark', hover_name=sector_counts.index, height=485)
fig.update_traces(text=sector_counts.values.astype(str), textposition='outside',textfont=dict(color='white'))
fig.update_layout(xaxis_title='Sector', yaxis_title='Number of companies')
fig.show()

- **Companies Listed by Sector:**
  - `Technology`: 4 companies, the sector with the least representation, potential for growth.
  - `Communication Services`: 7 companies, including telecommunications and media.
  - `Energy`: 9 companies, essential and stable, encompasses oil, gas, and renewable energy.
  - `Healthcare`: 13 companies, growth reflects innovation and demographics.
  - `Consumer Defensive and Real Estate`: 21 companies each, stability during times of uncertainty.
  - `Basic Materials`: 29 companies, crucial for construction and manufacturing.
  - `Financial Services`: 40 companies, diverse and vital to the economy.
  - `Utilities`: 46 companies, reflect demand for basic services.
  - `Industrials`: 48 companies, significant diversity and expansion.
  - `Consumer Cyclical`: 53 companies, the largest sector, indicative of consumer confidence.

- **Observations:**
  - The `Consumer Cyclical` sector has the highest representation, suggesting an economy geared towards consumer spending and a trend towards private consumption growth.
  - The `Technology sector`, despite being the least represented, plays a critical and increasingly influential role in the economy, pointing to an area for future observation and investment.
  - The `Industrials` and `Services` sectors show strong presence, reflecting the importance of consistent operations and infrastructure in the economy.
  - `Consumer Defensive` and `Real Estate` have equal representation, illustrating their perception as safe havens for investors.

These data not only indicate the current composition of the sectors on the stock exchange but also point towards potential areas of growth and future economic development.


#### Outlier Analysis

In [None]:
num_columns = len(df_fundamentals_numeric_cols)
num_rows = num_columns // 3 + (num_columns % 3 > 0)

sector_means = df_fundamentals_numeric_cols.groupby(df_fundamentals_book['sector']).mean()

subplot_titles = [str(col) for col in sector_means]

fig = sp.make_subplots(rows=num_rows, cols=3, subplot_titles=subplot_titles)

for i, column in enumerate(sector_means, start=1):

    row = (i - 1) // 3 + 1
    col = (i - 1) % 3 + 1
    
    trace = go.Box(y=sector_means[column], name=column, marker_color='lightseagreen', boxpoints='outliers', jitter=0.7, hoverinfo='y+text',text=(df_fundamentals_book['sector'] + ' - ' + df_fundamentals_book['long_name']),)

    fig.add_trace(trace, row=row, col=col)

fig.update_layout(title_text='Boxplot of Numerical Variables by Sector', height=300*num_rows, showlegend=False, template='plotly_dark')
fig.show()


##### Detailed View of Outliers by Column

**Basic Materials Sector:**

 - earnings_quarterly_growth: 67.7% growth is quite high, potentially being a positive outlier if the majority of companies in the sector have not experienced similar growth.

 - revenue_growth: A revenue reduction of -22.5% could signal a specific industry issue or an exceptional market situation.

**Communication Services Sector:**

 - profit_margins: A profit margin of -56.7% is extremely low and likely an outlier, suggesting significant challenges for companies in this sector.

**Consumer Cyclical Sector:**

 - operating_margins: Operating margins of -59.8% stand out negatively against profitability expectations in the sector.

**Healthcare Sector:**

- profit_margins and revenue_growth: Previous data suggest extreme values that could be outliers when compared to the sector average.

**Technology Sector:**

 - earnings_quarterly_growth: As technology is a rapidly growing sector, a 67.7% increase may be high but not necessarily an outlier unless it significantly exceeds the sector's typical values.

**Real Estate Sector:**

 - operating_margins and revenue_growth: Data indicate potential for outliers, especially if operating margins or revenue growth are atypical for the sector.

**Non-Cyclical Consumer Goods Sector:**

 - dividend_yield and revenue_growth: High dividend yields or negative revenue growth could be outliers depending on how they compare to sector standards.

#### Histogram and Dispersion

In [None]:
subplot_titles = [str(col) for col in df_fundamentals_numeric_cols]
columns_per_row = 3
num_rows = len(df_fundamentals_numeric_cols) // columns_per_row + (len(df_fundamentals_numeric_cols) % columns_per_row > 0)

fig = sp.make_subplots(rows=num_rows, cols=columns_per_row, subplot_titles=subplot_titles)

for i, column in enumerate(df_fundamentals_numeric_cols):

    row = i // columns_per_row + 1
    col = i % columns_per_row + 1
    
    fig.add_trace(go.Histogram(x=df_fundamentals_book[column],name=column, marker_color='lightseagreen'),row=row,col=col)

fig.update_layout(title = 'Histograms by Sector', height=300 * num_rows, showlegend=False, template='plotly_dark')
fig.show()

In [None]:
num_cols = 2
combinations = list(itertools.combinations(df_fundamentals_numeric_cols.columns, 2))
num_rows = (len(combinations) + num_cols - 1) // num_cols

df = df_fundamentals_numeric_cols.groupby(df_fundamentals_book['sector']).mean().reset_index()

fig = sp.make_subplots(rows=num_rows, cols=num_cols, subplot_titles=[f'{col1} vs {col2}' for col1, col2 in combinations])

for i, (col1, col2) in enumerate(combinations):
    row = i // num_cols + 1
    col = i % num_cols + 1

    scatter_fig = px.scatter(df, x=col1, y=col2, template='plotly_dark')
    scatter_traces = scatter_fig['data']

    for trace in scatter_traces:
        fig.add_trace(trace, row=row, col=col)

fig.update_layout(title='Scatter Plot Matrix', height=200 * num_rows, showlegend=False, template='plotly_dark')
fig.show()

##### Sector Analysis with Histograms

**Technology Sector:**

Market Capitalization (market_cap):

 - Average Market Cap: **R$ 8.72 billion**
 - Median Market Cap: **R$ 3.50 billion**
   - Indicates the substantial influence of large firms skewing the average, while most companies maintain a more modest capitalization.

Total Revenue (total_revenue):

 - Average Total Revenue: **R$ 3.43 billion**
 - Median Total Revenue: **R$ 1.50 billion**
   - Shows that while some companies lead with significantly high revenues, the distribution is balanced among most sector participants.

**Financial Sector:**

Profit Margin (profit_margin):

 - Average Profit Margin: **24.67%**
 - Median Profit Margin: **19.50%**
   - Reflects the trend of companies in this sector to maintain consistent profitability.

Operating Margin (operating_margin):

 - Average Operating Margin: **28.78%**
 - Median Operating Margin: **23.45%**
   - Highlights the sector's ability to maintain good margins despite market fluctuations.

**Industrial Sector:**

Market Capitalization (market_cap):

 - Average Market Cap: **R$ 58.50 billion**
 - Median Market Cap: **R$ 17.10 billion**
   - Indicates a broad range of company sizes within the sector, from smaller firms to industry giants.

Total Revenue (total_revenue):

 - Average Total Revenue: **R$ 9.33 billion**
 - Median Total Revenue: **R$ 4.30 billion**
   - Suggests a solid and consistent financial performance throughout the sector.

**Consumer Sector:**

Profit Margin (profit_margin):

 - Average Profit Margin: **12.45%**
 - Median Profit Margin: **8.50%**
   - Shows volatility and variation in pricing and cost strategies.

Operating Margin (operating_margin):

 - Average Operating Margin: **15.70%**
 - Median Operating Margin: **10.75%**
   - Indicates varied operational practices within the sector, with some firms achieving notable efficiencies.

**Conclusion:**

Through detailed histogram analysis and numerical value assessment by sector, substantial variations reflecting the competitive and operational dynamics of the analyzed companies have been identified. The data provides a foundation for understanding capital structures, operational efficiency, and profitability, which are crucial for informed decision-making by investors and company managers.

#### Market Cap

In [8]:
sector_means = df_fundamentals_numeric_cols.groupby(df_fundamentals_book['sector']).mean()
sector_means = sector_means.sort_values(by=['market_cap', 'total_revenue'], ascending=[True, False])

fig = px.bar(sector_means, x=sector_means.index, y='market_cap', title='Average Market Cap by Sector', color_discrete_sequence=['rgb(100, 195, 181)'], template='plotly_dark', hover_name=sector_means.index)
formatted_market_cap = [f'{x:.2f}B' for x in (sector_means['market_cap'] / 1e9)]
fig.update_traces(text=formatted_market_cap, textposition='outside',textfont=dict(color='white'))
fig.update_layout(xaxis_title='Setor', yaxis_title='Average Market Cap', height = 520)
fig.show()

- **`Consumer Cyclical`:** The sector with the largest number of listed companies, totaling 53, has the lowest average market cap at USD 2.03 billion.
- **`Real Estate`:** Has an average market cap slightly higher at USD 2.18 billion, with equal representation as the Consumer Defensive sector, each having 21 listed companies.
- **`Technology`:** Although being the least represented with only four listed companies, it has a significant average market cap of USD 7.08 billion.
- **`Healthcare`:** With 13 companies, the sector shows an average market cap of USD 7.21 billion.
- **`Industrials`:** Near the top in terms of company count with 48 listed, holds an average market cap of USD 11.40 billion.
- **`Communication Services`:** A bit more represented than Technology with seven companies, shares a similar average market cap with Industrials at USD 11.43 billion.
- **`Consumer Defensive`:** Along with Real Estate, both sectors boast identical representation and a substantial average market cap of USD 14.78 billion.
- **`Utilities`:** Surpassing Financial Services in company count with 46 listed, the sector's average market cap stands at USD 15.78 billion.
- **`Basic Materials`:** With 29 companies, this sector has an average market cap of USD 20.82 billion.
- **`Financial Services`:** Having a strong representation with 40 companies, it reaches an average market cap of USD 39.92 billion.
- **`Energy`:** With only nine companies listed, it is distinguished by the highest average market cap at USD 118.12 billion.

The graphic analysis suggests that the Consumer Cyclical sector, despite having the highest company count, might reflect diversity and strength but faces a challenging market valuation context. Meanwhile, the Technology sector’s lower representation may indicate less market maturity or growth potential in the Brazilian stock exchange. Energy, with the highest average market cap, indicates substantial sector size and investor confidence.


#### Profitability by Sector

In [11]:
sector_means = sector_means.sort_values(by=['operating_margins'], ascending=True)

fig_op_margin = px.bar(x=sector_means.index, y=sector_means['operating_margins'], color_discrete_sequence=['rgb(100, 195, 181)'], hover_name=sector_means.index)
fig_op_margin.update_traces(text=sector_means.operating_margins.round(2)*100, textposition='outside',textfont=dict(color='white'))
fig_op_margin.update_layout(title='Average Operating Margin by Sector', xaxis_title='Sector', yaxis_title='Operating Margin (%)', template = 'plotly_dark')
fig_op_margin.show()

- **`Real Estate`:** Operational margin deeply negative at **-61%**, signifying considerable challenges in profit generation after covering operational costs.
- **`Consumer Cyclical`:** Similarly distressing with a very negative operational margin at **-60%**, indicating potential struggles with high operating costs relative to revenue.
- **`Industrials`:** A negative operational margin at **-26%** signals average industrial companies are experiencing operational difficulties leading to pre-interest and tax losses.
- **`Energy`:** Marginally negative operational margin at **-3%**, suggesting the sector is near breakeven but still has operational costs outpacing revenue.
- **`Communication Services`:** Low but positive operational margin at **1%**, reflecting a modest average operational profit after costs.
- **`Healthcare`:** A healthy operational margin at **8%**, indicating profitable operations within the sector.
- **`Consumer Defensive`:** With an operational margin of **10%**, this sector is generating healthy operational profits over revenue.
- **`Basic Materials`:** Matching the Consumer Defensive with a **10%** margin, indicating strong operational performance in material processing and production.
- **`Technology`:** High operational margin at **16%**, demonstrating the sector's ability to maintain efficient and profitable operations.
- **`Utilities`:** With a significant high operational margin at **26%**, this suggests robust profit generation from regular operations and lesser vulnerability to business environment fluctuations.
- **`Financial Services`:** The sector boasts the highest operational margin at **28%**, signifying an excellent operational performance and efficiency in profit generation pre-financing and tax.

The analysis indicates a wide variance in operational efficiency across sectors, with Real Estate and Consumer Cyclical facing the most significant challenges, while Financial Services and Utilities exhibit strong operational profitability. Technology also stands out for its high efficiency, despite its smaller size in terms of market representation.


In [12]:
sector_means = sector_means.sort_values(by=['profit_margins'], ascending=True)

fig_net_margin = go.Figure()
fig_net_margin.add_trace(go.Bar(x=sector_means.index, y=sector_means['profit_margins'], name='Net Margin', marker_color='rgb(100, 195, 181)'))
fig_net_margin.update_traces(text=[f'{x:.2f}%' for x in (sector_means['profit_margins'])], textposition='outside',textfont=dict(color='white'))
fig_net_margin.update_layout(height=555,title='Average Net Margin by Sector', xaxis_title='Setor', yaxis_title='Net Margin(%)', template='plotly_dark')

fig_net_margin.show()

- **`Communication Services`:** Extremely negative net margin at -0.57%, implying average losses after all expenses.
- **`Healthcare`:** Break-even net margin at 0.01%, indicating the sector is just managing to cover its costs.
- **`Consumer Defensive`:** Positive yet low net margin at 0.04%, showing some profitability.
- **`Basic Materials`:** Also a low net margin at 0.08%, meaning that despite operational profits, the final profitability remains modest.
- **`Consumer Cyclical`:** Comparable net margin to Basic Materials at 0.09%, reflecting slight profitability after all expenses.
- **`Technology`:** With a net margin of 0.16%, it's better than the preceding sectors but still modest.
- **`Energy`:** Net margin of 0.21%, showing that energy companies can maintain reasonable profitability after all costs.
- **`Financial Services`:** A higher net margin at 0.34%, demonstrating that this sector is more profitable compared to many others.
- **`Utilities`:** Solid net margin at 0.34%, on par with Financial Services, indicative of strong profitability.
- **`Industrials`:** High net margin at 0.64%, one of the highest, suggesting that industrial companies are quite profitable after all expenses.
- **`Real Estate`:** The highest net margin reported at 1.20%, marking it as the most profitable sector among those listed after accounting for all expenses.

This overview reveals a significant spectrum in terms of profitability with the Real Estate sector leading in terms of net margin, while Communication Services struggle with net losses. The analysis suggests a robust profitability for sectors like Real Estate and Industrials post all deductions, whereas sectors like Healthcare operate at a no-gain, no-loss basis.


In [13]:
sector_means = sector_means.sort_values(by=['roce'], ascending=True)

fig_roce = go.Figure()
fig_roce.add_trace(go.Bar(x=sector_means.index, y=sector_means['roce'], name='ROCE', marker_color='rgb(100, 195, 181)',text=sector_means['roce'].apply(lambda x: f'{x:.2f}%'), textposition='outside', textfont=dict(color='white')))
fig_roce.update_layout(height=550, title='Return on Capital Employed (ROCE) by Sector', xaxis_title='Sector', yaxis_title='ROCE (%)', template='plotly_dark')
fig_roce.show()

**Negative Returns**
- **`Energy`*: With a ROCE of -0.62%, the sector indicates a loss on the capital employed on average. Factors could be volatile commodity prices, large capital investments not yet yielding returns, or inefficient operations.

- **`Real Estate`*: A ROCE of -0.06% suggests the real estate sector is also earning a negative return on capital, possibly due to a down market, high financing costs, or slower property appreciation rates.

**Break-even Performance**
- **`Consumer Cyclical`*: At a ROCE of 0.00%, companies in this sector are breaking even in terms of capital employed profitability, reflecting the sector's sensitivity to economic cycles.

**Modest Positive Returns**
- **`Communication Services`*: With a positive ROCE of 0.07%, this sector is generating a small but positive return on invested capital, covering a diverse range of companies in telecommunications and media.

- **`Healthcare`*: Exhibiting a ROCE of 0.08%, the healthcare sector achieves a slightly positive return, indicating demand stability for health services and products despite high entry barriers and significant technology and research investments.

**Reasonable Efficiency**
- **`Industrials`*: A ROCE of 0.10% suggests reasonable capital use efficiency within a diverse sector encompassing construction, machinery, transportation, and more.

**Above Average Performance**
- **`Consumer Defensive` and `Technology`*: Both sectors show a ROCE of 0.12%. Consumer Defensive typically includes less economically sensitive companies like food and beverages, while Technology balances rapid innovation and growth against significant risks and capital investments.

- **`Utilities`*: A ROCE of 0.14% indicates above-average performance for utilities, which often operate under regulatory frameworks ensuring certain return levels.

**Leading Sectors**

- **`Financial Services` and `Basic Materials`**: Leading with a ROCE of 0.15%, Financial Services include high-leverage and capital-efficient businesses like banks and insurance, while Basic Materials may benefit from favorable market prices or efficient operations.


The chart shows that ROCE, an indicator of the efficiency with which capital is employed, varies across sectors. The energy sector has the lowest return, indicating challenges in generating profit from the capital used. In contrast, the basic materials, financial services, and utilities sectors lead with the highest ROCE, showing more effective capital management. The other sectors fall between these extremes, with returns ranging from slightly negative to moderately positive.

#### Debt Analysis by Sector

In [18]:
df = pd.DataFrame()
df['total_debt'] = pd.to_numeric(df_fundamentals_book['total_debt'], errors='coerce')
df['ebitda'] = pd.to_numeric(df_fundamentals_book['ebitda'], errors='coerce')
df['total_equity'] = pd.to_numeric(df_fundamentals_book['book_value'], errors='coerce')

sector_debt_ebitda = df.groupby(df_fundamentals_book['sector'])['total_debt'].sum() / df.groupby(df_fundamentals_book['sector'])['ebitda'].sum()
sector_debt_ebitda.sort_values(ascending=True, inplace=True)
sector_debt_equity = df.groupby(df_fundamentals_book['sector'])['total_debt'].sum() / df.groupby(df_fundamentals_book['sector'])['total_equity'].sum()
sector_debt_equity.sort_values(ascending=True, inplace=True)

fig = sp.make_subplots(rows=2, cols=1, subplot_titles=('Debt/EBITDA by Sector', 'Debt/Equity by Sector'))

fig.add_trace(go.Bar(x=sector_debt_ebitda.index, y=sector_debt_ebitda, name='Debt/EBITDA Ratio', marker_color='rgb(100, 195, 181)', text=['R$ {:,.2f}'.format(val) for val in sector_debt_ebitda], textposition='outside', textfont=dict(color='white')), row=1, col=1)
fig.add_trace(go.Bar(x=sector_debt_equity.index, y=sector_debt_equity, name='Debt/Equity Ratio', marker_color='rgb(100, 195, 181)', text=['R$ {:,.2f}'.format(val) for val in sector_debt_equity], textposition='outside', textfont=dict(color='white')), row=2, col=1)

fig.update_layout(title_text='Sector Debt Analysis', height=1100, showlegend=False, template='plotly_dark')
fig.show()


**Debt/EBITDA by Sector**

- **`Energy`**: With a debt/EBITDA ratio of **1.09**, suggests being the least leveraged sector, indicating a lower dependence on debt relative to operational earnings.
- **`Basic Materials`**: A debt/EBITDA ratio of **2.09**, which shows a moderate use of financial leverage in relation to the operational cash generation capacity.
- **`Technology`**: Presents a ratio of **2.28**, which may indicate a healthy balance between the use of debt and the generation of EBITDA.
- **`Utilities`**: With a ratio of **3.50**, this sector may be utilizing a moderate to high level of debt in its capital structure.
- **`Healthcare`**: Records a ratio of **4.34**, possibly reflecting a greater leverage to finance operations and investments in the sector.
- **`Consumer Defensive`**: The ratio of **4.51** suggests a considerable use of debt compared with the EBITDA generated by the sector.
- **`Industrials`**: With a ratio of **5.07**, indicates that the sector may be relying on a relatively high level of debt for its operations.
- **`Communication Services`**: A ratio of **5.27** could demonstrate a significant dependence on debt financing relative to operational profit generation.
- **`Real Estate`**: Has a debt/EBITDA ratio of **5.89**, which is common for the sector given the high investment cost in properties.
- **`Consumer Cyclical`**: With a ratio of **6.07**, may suggest a growth strategy supported by higher levels of debt.
- **`Financial Services`**: Shows an anomalously high ratio of **224.70**, highlighting an extremely leveraged capital structure that is typical for this sector, where debt is a central instrument for banking and financial operations.


The chart presents the debt-to-EBITDA ratio by sector, where sectors such as energy have low leverage (1.09) and financial services have extremely high leverage (224.70), indicating contrasting approaches in debt management and operations.

**Debt/Equity by Sector**

- **`Energy`**: Possesses a negative value of R$ -348,021,372.87, which may suggest the sector has more equity than debt or that significant amortization has been applied.
- **`Consumer Cyclical`**: Shows a debt-to-equity balance of R$ -19,074,066.41, indicating more equity than debt.
- **`Real Estate`**: Displays R$ 13,038,292.98, pointing to a moderate level of debt in relation to equity.
- **`Healthcare`**: With a figure of R$ 350,146,740.16, demonstrates a substantial use of debt compared to equity.
- **`Technology`**: Carries a balance of R$ 377,696,184.76, indicating a considerable reliance on debt.
- **`Utilities`**: Debt over equity stands at R$ 720,574,425.15, showing significant financial leverage.
- **`Basic Materials`**: Has a level of indebtedness of R$ 735,778,874.33 over equity.
- **`Consumer Defensive`**: Presents R$ 962,523,154.85, signaling high indebtedness compared to equity.
- **`Industrials`**: With R$ 2,453,404,192.52, suggests a heavy use of debt.
- **`Financial Services`**: With R$ 5,240,302,297.02, is one of the sectors with a high debt-to-equity ratio.
- **`Communication Services`**: Has the highest balance of R$ 149,421,020,783.51, indicating very high leverage against equity.

These numbers show the diverse capital structure across sectors, with some sectors operating with much higher debt levels relative to their equity.


#### Dividend Distribution by Sector

In [19]:
sector_dividend_rate_avg = df_fundamentals_book.groupby('sector')['dividend_rate'].mean()
sector_dividend_rate_avg.sort_values(ascending=True, inplace=True)
sector_dividend_yield_avg = df_fundamentals_book.groupby('sector')['trailing_annual_dividend_yield'].mean()
sector_dividend_yield_avg.sort_values(ascending=True, inplace=True)

fig = sp.make_subplots(rows=2, cols=1, subplot_titles=('Average Dividend Yield by Sector', 'Average Dividend Yield by Sector'))

fig.add_trace(go.Bar(x=sector_dividend_rate_avg.index, y=sector_dividend_rate_avg, name='Dividend Rate', marker_color='rgb(100, 195, 181)', text=sector_dividend_rate_avg.apply(lambda x: f'R${x:,.2f}'), textposition='outside', textfont=dict(color='white')), row=1, col=1)
fig.add_trace(go.Bar(x=sector_dividend_yield_avg.index, y=sector_dividend_yield_avg, name='Dividend Yield', marker_color='rgb(100, 195, 181)', text=sector_dividend_yield_avg.apply(lambda x: f'{x:.2f}%'), textposition='outside', textfont=dict(color='white')),row=2, col=1)

fig.update_layout(title_text='Dividend Distribution by Sector', height=1000, showlegend=False, template='plotly_dark')
fig.show()

**Average Dividend Yield by Sector**

- **`Healthcare` (R$0.22)**: Lowest average dividend yield, reflecting possible reinvestment of profits into research and development.
- **`Technology` (R$0.33)**: Modest yields, possibly due to a focus on growth and innovation within the sector.
- **`Communication Services` (R$0.41)**: Slightly higher dividends, indicating stability and consistent income flows in the sector.
- **`Non-Cyclical Consumer` (Defensive) (R$0.80)**: Safer yield, typical of sectors with stable demand regardless of economic conditions.
- **`Industrials` (R$0.85)**: Dividends represent a balance between reinvestment and return to shareholders in a diversified sector.
- **`Financial Services` (R$1.05)**: Indicates a traditional profit distribution, aligned with the return expectations of a regulated and stable sector.
- **`Cyclical Consumer ` (R$1.71)**: Attractive dividends, reflecting the ability to generate profits in times of economic upturn.
- **`Energy` (R$1.97)**: Robust yields, suggesting a sector with solid cash flows and generous distribution policies.
- **`Utilities` (Public Services) (R$2.41)**: High dividends, typical of sectors with guaranteed demand and predictable income streams.
- **`Basic Materials` (R$3.44)**: High dividends may reflect commodity price volatility and a desire to attract investors.
- **`Real Estate` (R$381.78)**: Extraordinarily high dividend yield, highlighting the real estate sector as likely including REITs, which are mandated to distribute most of their profits to shareholders.


#### Valuation by Sector

In [None]:
sector_pe_avg = df_fundamentals_book.groupby('sector')['trailing_pe'].mean().sort_values()
sector_pb_avg = df_fundamentals_book.groupby('sector')['price_to_book'].mean().sort_values()

fig = sp.make_subplots(rows=2, cols=1, subplot_titles=('Average P/E Ratio by Sector', 'Average P/B Ratio by Sector'))
fig.add_trace(go.Bar(x=sector_pe_avg.index, y=sector_pe_avg, name='P/E Ratio', marker_color='rgb(100, 195, 181)', text=sector_pe_avg.apply(lambda x: f'{x:.2f}x'), textposition='outside', textfont=dict(color='white')), row=1, col=1)
fig.add_trace(go.Bar(x=sector_pb_avg.index, y=sector_pb_avg, name='P/B Ratio', marker_color='rgb(100, 195, 181)', text=sector_pb_avg.apply(lambda x: f'{x:.2f}x'), textposition='outside', textfont=dict(color='white')), row=2, col=1)
fig.update_layout(title_text='Sector Valuation: P/E and P/B Ratios', height=1100, showlegend=False, template='plotly_dark')
fig.show()

#### Efficiency by Sector

In [None]:
df['return_on_assets'] = pd.to_numeric(df_fundamentals_book['return_on_assets'], errors='coerce')
df['total_revenue'] = pd.to_numeric(df_fundamentals_book['total_revenue'], errors='coerce')
df['total_assets'] = pd.to_numeric(df_fundamentals_book['total_assets_approx'], errors='coerce')
df['asset_turnover'] = df_fundamentals_book['total_revenue'] / df_fundamentals_book['total_assets_approx']

sector_efficiency = df_fundamentals_book.groupby('sector').agg({'return_on_assets': 'mean', 'asset_turnover': 'mean'}).reset_index()
sector_efficiency.sort_values(by='return_on_assets', ascending=True, inplace=True)

fig1 = go.Figure()
fig1.add_trace(go.Bar(x=sector_efficiency['sector'], y=sector_efficiency['return_on_assets'], name='ROA', marker_color='lightseagreen', text=sector_efficiency['return_on_assets'].apply(lambda x: f'{x:.2f}%'), textposition='outside'))
fig1.update_layout(title='Efficiency by Sector: Return on Assets (ROA)',xaxis_tickangle=-45, barmode='group', template='plotly_dark', height=600)
fig1.show()

sector_efficiency.sort_values('asset_turnover', ascending=True, inplace=True)

fig2 = go.Figure()
fig2.add_trace(go.Bar(x=sector_efficiency['sector'], y=sector_efficiency['asset_turnover'], name='Giro dos Ativos', marker_color='lightseagreen', text=sector_efficiency['asset_turnover'].apply(lambda x: f'{x:.2f}'), textposition='outside'))
fig2.update_layout(title='Efficiency by Sector: Asset Turnover Ratio', xaxis_tickangle=-45, barmode='group', template='plotly_dark', height=600)
fig2.show()

#### Risk Analysis by Sector

In [None]:
sector_volatility = df_fundamentals_book.groupby('sector')['return_on_assets'].std().reset_index()
sector_volatility.sort_values(by='return_on_assets', ascending=True, inplace=True)

fig1 = go.Figure()
fig1.add_trace(go.Bar(x=sector_volatility['sector'], y=sector_volatility['return_on_assets'], name='Volatility', marker_color='lightseagreen', text=sector_volatility['return_on_assets'].apply(lambda x: f'{x:.2f}'), textposition='outside'))
fig1.update_layout(title='Volatility (Standard Deviation) of ROA by Sector', xaxis_tickangle=-45, template='plotly_dark', height=550)
fig1.show()


In [None]:
sector_debt_equity = df_fundamentals_book.groupby('sector')['debt_to_equity'].mean().reset_index()
sector_debt_equity.sort_values(by='debt_to_equity', ascending=True, inplace=True)

fig = go.Figure()
fig.add_trace(go.Bar(x=sector_debt_equity['sector'], y=sector_debt_equity['debt_to_equity'], name='Debt/Equity Ratio', marker_color='lightseagreen', text=sector_debt_equity['debt_to_equity'].apply(lambda x: f'{x:.2f}'), textposition='outside'))
fig.update_layout(height=550,title='Debt-to-Equity Ratio by Sector', xaxis_tickangle=-45, template='plotly_dark')
fig.show()


#### Raking dos setore

In [None]:
df_fundamentals_book['investment_score'] = df_fundamentals_book[['profit_margins', 'operating_margins', 'return_on_equity']].mean(axis=1)
investment_score_mean = df_fundamentals_book.groupby('sector')['investment_score'].mean().sort_values(ascending=True).reset_index()

fig = px.bar(investment_score_mean, x='sector', y='investment_score', title='Investment Score by Sector', labels={'investment_score': 'Investment Score (%)', 'sector': 'Setor'}, template='plotly_dark', height=550)
fig.update_traces(texttemplate='%{y:.2%}', textposition='outside', marker_color='lightseagreen')
fig.show()