### Exploratory Data Analysis for a Monte Carlos Simulation

### Import all modules needed

In [None]:
import matplotlib.pyplot as plt
import numpy as np 
import yfinance as yf
import pandas as pd
import seaborn as sns

---

#### We will use data from *Microsoft, Apple and NVidea*

In [None]:
tickers = yf.Tickers('MSFT AAPL NVDA LCID QS WBD NU TSLA INTC RIOT')
all_ticker = yf.download('MSFT AAPL NVDA LCID QS WBD NU TSLA INTC RIOT', period="10y", group_by='ticker')  #download data
backup = all_ticker.copy()  # Create a backup of the data

### Starting EDA properly

In [None]:
all_ticker.head()  # Display the first 5 rows of the downloaded data

In [None]:
all_ticker.info()  # Display information about the DataFrame
# There IS NaN values

all_ticker.isna().sum()

#### OHLCV - Meaning

-   Open -> First traded price
-   High -> Highest price
-   Low -> Lowest price
-   Close -> Final traded price
-   Volume -> Total number of shares and contracts on a periody

In [None]:
all_ticker.describe()  # Display summary statistics of the DataFrame
# Special Attention to STD Variation, which is the standard deviation of the returns

---

### Graphical Analysis using *Matplotlib or Seaborn*

In [None]:
all_ticker.hist(bins=50, figsize=(20,15))  # Plot histograms for each ticker   
plt.show()  # Show the histograms

In [None]:
sns.boxplot(data=all_ticker)  # Create a boxplot for each ticker
plt.show()  # Show the boxplot

In [13]:
def calc_returns(stock_df) -> pd.DataFrame:
    # Coleta todas as colunas Close de uma vez
    close_columns = [col for col in stock_df.columns if 'Close' in col]
    close_prices = stock_df[close_columns].copy()
    
    # Renomeia colunas para ficar mais limpo: ('AAPL', 'Close') -> 'AAPL'
    close_prices.columns = [col[0] for col in close_prices.columns]
    
    returns = close_prices.pct_change().dropna()  
    return returns

returns = calc_returns(all_ticker)

In [None]:
sns.scatterplot(data=returns)
plt.show()

---

### Calculate a Covariance Matrix

In [None]:
returns_cov = returns.cov()
returns_corr = returns.corr()
annual_returns_cov = returns_cov * 252   #252 dias úteis

print(f"Matriz de Covariância:\n {returns_cov}\n Covariância Anual:\n {annual_returns_cov}\n")
print(f"Matriz de Correlação:\n {returns_corr}")