In [None]:
# ===============================
# IMPORT & CONFIG
# ===============================
%matplotlib inline
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime, timedelta
import matplotlib.dates as mdates
from IPython.display import display
import io

# ===============================
# CONFIGURAZIONE
# ===============================
TICKER = "PYPL"  # es. AAPL
YEARS = 8        # ultimi anni da visualizzare

# ===============================
# UTILITIES
# ===============================
BASE_URL = "https://discountingcashflows.com/company/{ticker}/{statement}/"

def get_table_discounting(ticker, statement):
    url = BASE_URL.format(ticker=ticker, statement=statement)
    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        r = requests.get(url, headers=headers, timeout=20)
        r.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Errore nella richiesta per {statement}: {e}")
        return None
    soup = BeautifulSoup(r.text, "html.parser")
    tables = soup.find_all("table")
    if not tables:
        print(f"Nessuna tabella trovata per {statement} su {url}")
        return None
    for t in tables:
        try:
            df = pd.read_html(io.StringIO(str(t)))[0]
            if df.shape[1] >= 2:
                return df
        except:
            continue
    print(f"Nessuna tabella leggibile trovata per {statement} su {url}")
    return None

def extract_series_from_row(df, keywords):
    if df is None: return None, None
    periods = df.columns.tolist()[1:]
    for kw in keywords:
        mask = df.iloc[:,0].astype(str).str.contains(kw, case=False, na=False)
        if mask.any():
            row = df.loc[mask].iloc[0]
            vals = row.iloc[1:].astype(str).replace("-", np.nan).replace("", np.nan)
            cleaned_vals, cleaned_periods = [], []
            for i, v in enumerate(vals):
                if pd.isna(v): continue
                s = str(v).replace(",", "").replace("(", "-").replace(")", "").strip()
                try:
                    cleaned_vals.append(float(s))
                    cleaned_periods.append(periods[i])
                except:
                    continue
            if cleaned_vals:
                return cleaned_vals, cleaned_periods
    return None, None

# ===============================
# ESTRAZIONE FINANCIALS
# ===============================
def extract_financials(ticker):
    financials = {}
    # Income Statement
    is_df = get_table_discounting(ticker, "income-statement")
    financials["Revenue"], financials["Periods"] = extract_series_from_row(is_df, ["Revenue", "Sales", "Total Revenue"])
    financials["NetIncome"], _ = extract_series_from_row(is_df, ["Net Income", "Net loss", "NetLossProfit"])
    # Balance Sheet
    bs_df = get_table_discounting(ticker, "balance-sheet-statement")
    financials["Equity"], _ = extract_series_from_row(bs_df, ["Total Equity", "Total shareholders' equity", "Total stockholders' equity"])
    financials["Debt"], _ = extract_series_from_row(bs_df, ["Total Debt", "Total liabilities", "Total Long Term Debt"])
    # Cash Flow Statement
    cf_df = get_table_discounting(ticker, "cash-flow-statement")
    financials["FCF"], _ = extract_series_from_row(cf_df, ["Free Cash Flow", "Free cash flow", "FreeCashFlow", "Operating Cash Flow"])
    # Check if periods extracted
    if financials["Periods"] is None:
        print("Could not extract periods data. Cannot proceed with financial data processing.")
        return None
    # Limit last YEARS and invert order (vecchio → recente)
    for k in financials:
        if k != "Periods" and financials[k] is not None:
            financials[k] = financials[k][:YEARS][::-1]
    financials["Periods"] = financials["Periods"][:YEARS][::-1]
    return financials
# ===============================
# CALCOLO RATIOS
# ===============================
def calculate_ratios(financials):
    ratios = {}
    if financials is None: return None
    try:
        if financials["NetIncome"] is not None and financials["Revenue"] is not None and len(financials["NetIncome"]) == len(financials["Revenue"]):
            ratios["ProfitMargin"] = [ni/r*100 for ni,r in zip(financials["NetIncome"], financials["Revenue"])]
        else:
            ratios["ProfitMargin"] = None
    except:
        ratios["ProfitMargin"] = None
    try:
        if financials["NetIncome"] is not None and financials["Equity"] is not None and len(financials["NetIncome"]) == len(financials["Equity"]):
            ratios["ROE"] = [ni/e*100 for ni,e in zip(financials["NetIncome"], financials["Equity"])]
        else:
            ratios["ROE"] = None
    except:
        ratios["ROE"] = None
    try:
        if financials["Debt"] is not None and financials["Equity"] is not None and len(financials["Debt"]) == len(financials["Equity"]):
            ratios["DebtEquity"] = [d/e if e != 0 else np.nan for d,e in zip(financials["Debt"], financials["Equity"])]
        else:
            ratios["DebtEquity"] = None
    except:
        ratios["DebtEquity"] = None
    return ratios

# ===============================
# ESTRAZIONE STIME NET INCOME
# ===============================
def get_net_income_estimates(ticker):
    url = f"https://discountingcashflows.com/company/{ticker}/estimates/"
    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
    except:
        return None, None
    soup = BeautifulSoup(r.text, "html.parser")
    tables = soup.find_all("table")
    estimates_table = None
    for table in tables:
        if "Estimated Net Income" in table.text:
            estimates_table = table
            break
    if estimates_table is None: return None, None
    try:
        df = pd.read_html(io.StringIO(str(estimates_table)), header=None)[0]
    except:
        return None, None
    idx = df[df.iloc[:,0].astype(str).str.contains("Estimated Net Income", case=False, na=False)].index
    if idx.empty: return None, None
    low_row = df.iloc[idx[0]+1]
    numerical_cols_values = []
    for i in range(1, len(low_row)):
        val = low_row.iloc[i]
        try:
            s = str(val).replace(",", "").replace("(", "-").replace(")", "").strip()
            numerical_values = float(s)
            numerical_cols_values.append(numerical_values)
        except:
            continue
    current_year_estimate = numerical_cols_values[0] if len(numerical_cols_values)>=1 else None
    next_year_estimate = numerical_cols_values[1] if len(numerical_cols_values)>=2 else None
    return current_year_estimate, next_year_estimate

# ===============================
# PLOTTAGGIO FINANCIALS + RATIOS
# ===============================
def plot_financials_ratios_price(financials, ratios, ticker):
    if financials is None or ratios is None:
        print("Cannot plot due to missing financial or ratio data.")
        return

    labels = financials.get("Periods", [])
    rev = np.array(financials.get("Revenue", []), dtype=float) if financials.get("Revenue") is not None else np.array([])
    ni = np.array(financials.get("NetIncome", []), dtype=float) if financials.get("NetIncome") is not None else np.array([])

    # Adatta scala (Migliaia / Milioni)
    factor = 1
    max_val = 0
    if rev.size>0: max_val=max(max_val,max(rev))
    if ni.size>0: max_val=max(max_val,max(ni))
    if max_val>1e6: factor=1e6
    if rev.size>0: rev/=factor
    if ni.size>0: ni/=factor
    y_label_fin = "Amount"
    if factor==1e6: y_label_fin+=" (M)"

    # Ratios
    pm = np.array(ratios.get("ProfitMargin", np.zeros(len(labels))))
    roe = np.array(ratios.get("ROE", np.zeros(len(labels))))
    de = np.array(ratios.get("DebtEquity", np.zeros(len(labels))))

    # ===============================
    # PLOT FINANCIALS + RATIOS
    # ===============================
    fig, axs = plt.subplots(2,1, figsize=(14,10))
    # --- Financials
    if len(labels)==len(rev) and len(labels)==len(ni):
        axs[0].plot(labels, rev, marker='o', label="Revenue")
        axs[0].plot(labels, ni, marker='o', label="Net Income")
        axs[0].set_title(f"{ticker} Financials")
        axs[0].set_ylabel(y_label_fin)
        axs[0].legend()
        axs[0].grid(True)
    else:
        axs[0].set_title(f"{ticker} Financials (Data Unavailable)")
        axs[0].text(0.5,0.5,"Financial data not available.", ha='center', va='center', transform=axs[0].transAxes)

    # --- Ratios
    if len(labels)==len(pm) and len(labels)==len(roe) and len(labels)==len(de):
        axs[1].plot(labels, pm, marker='o', label="Profit Margin %")
        axs[1].plot(labels, roe, marker='o', label="ROE %")
        axs[1].plot(labels, de, marker='o', label="Debt/Equity")
        axs[1].set_title(f"{ticker} Ratios")
        axs[1].set_ylabel("Ratio / %")
        axs[1].legend()
        axs[1].grid(True)

        # Add labels to the ratio plot
        for i, label in enumerate(labels):
            if not np.isnan(pm[i]):
                axs[1].text(label, pm[i], f'{pm[i]:.1f}%', ha='left', va='bottom')
            if not np.isnan(roe[i]):
                axs[1].text(label, roe[i], f'{roe[i]:.1f}%', ha='left', va='bottom')
            if not np.isnan(de[i]):
                 axs[1].text(label, de[i], f'{de[i]:.2f}', ha='left', va='bottom') # Debt/Equity as a ratio, not percentage

    else:
        axs[1].set_title(f"{ticker} Ratios (Data Unavailable)")
        axs[1].text(0.5,0.5,"Ratio data not available.", ha='center', va='center', transform=axs[1].transAxes)


    plt.tight_layout()
    plt.show()


# Added calls to get financial data and ratio data first
financial_data = extract_financials(TICKER)
ratio_data = calculate_ratios(financial_data)
plot_financials_ratios_price(financial_data, ratio_data, TICKER)


# ===============================
# ===============================
# TERZO BLOCCO: NET INCOME + STOCK PRICE (assi X/Y corretti)
# ===============================

# Creazione DataFrame Net Income storico + stime
historical_ni = financial_data.get("NetIncome", [])
historical_periods = financial_data.get("Periods", [])

historical_data_with_years = []
if historical_ni and historical_periods and len(historical_ni) == len(historical_periods):
    for period, ni in zip(historical_periods, historical_ni):
        try:
            year = int(pd.to_datetime(period).year)
            historical_data_with_years.append({'Year': year, 'Net Income': ni, 'Type': 'Historical'})
        except:
            continue

# Estrazione stime Net Income
estimate_for_2025 = None
estimate_for_2026 = None

url = f"https://discountingcashflows.com/company/{TICKER}/estimates/"
headers = {"User-Agent": "Mozilla/5.0"}
try:
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    tables = soup.find_all("table")

    estimates_table = None
    for table in tables:
        if "Estimated Net Income" in table.text:
            estimates_table = table
            break

    if estimates_table:
        df_estimates = pd.read_html(io.StringIO(str(estimates_table)), header=None)[0]
        estimated_net_income_row_index = df_estimates[df_estimates.iloc[:, 0].astype(str).str.contains("Estimated Net Income", case=False, na=False)].index

        if not estimated_net_income_row_index.empty:
            low_row_index = estimated_net_income_row_index[0] + 1
            low_row = df_estimates.iloc[low_row_index]

            numerical_cols_values = []
            for i in range(1, len(low_row)):
                val = low_row.iloc[i]
                try:
                    s = str(val).replace(",", "").replace("(", "-").replace(")", "").strip()
                    numerical_values = float(s)
                    numerical_cols_values.append(numerical_values)
                except:
                    pass

            # 5° valore → stima 2025, 4° valore → stima 2026
            if len(numerical_cols_values) > 4: estimate_for_2025 = numerical_cols_values[4]
            if len(numerical_cols_values) > 3: estimate_for_2026 = numerical_cols_values[3]

except:
    pass

# Aggiungi le stime
estimated_data = []
if estimate_for_2025 is not None: estimated_data.append({'Year': 2025, 'Net Income': estimate_for_2025, 'Type': 'Estimated'})
if estimate_for_2026 is not None: estimated_data.append({'Year': 2026, 'Net Income': estimate_for_2026, 'Type': 'Estimated'})

# Combina storico + stimato
ni_df = pd.DataFrame(historical_data_with_years + estimated_data).sort_values(by='Year').reset_index(drop=True)

# ===============================
# Stock Price
# ===============================
stock_data = pd.DataFrame()  # Initialize stock_data as an empty DataFrame
price_col = None # Initialize price_col to None

try:
    start_date_stock = (datetime.now() - timedelta(days=YEARS * 365 * 1.1)).strftime('%Y-%m-%d') # Fetch slightly more than YEARS to ensure coverage
    end_date_stock = datetime.now().strftime('%Y-%m-%d')
    stock_data = yf.download(TICKER, start=start_date_stock, end=end_date_stock, progress=False, auto_adjust=False) # Added auto_adjust=False

    if not stock_data.empty:
        # Scegli la colonna prezzo
        if 'Adj Close' in stock_data.columns:
            price_col = 'Adj Close'
        elif 'Close' in stock_data.columns:
            price_col = 'Close'
        else:
            print("Nessuna colonna Close/Adj Close trovata nei dati delle azioni.")
            stock_data = pd.DataFrame() # Imposta stock_data a un DataFrame vuoto se non ci sono colonne di prezzo valide
            price_col = None

        # Filter stock_data to include only the last YEARS if stock_data is not empty
        if not stock_data.empty:
            start_date_filter = datetime.now() - timedelta(days=YEARS * 365)
            stock_data = stock_data[stock_data.index >= start_date_filter]


except Exception as e:
    print("Errore fetching stock data:", e)
    # If fetching stock data fails, stock_data and price_col remain as initialized empty DataFrame and None


# ===============================
# PLOT
# ===============================
fig, ax3 = plt.subplots(figsize=(14,6))

# Linea arancione: Historical Net Income
hist_ni = ni_df[ni_df['Type'] == 'Historical']
ax3.plot(pd.to_datetime(hist_ni['Year'].astype(str) + "-12-31"),
         hist_ni['Net Income'], marker='o', color='orange', label='Historical Net Income')

# Linea rossa tratteggiata: Estimated Net Income
est_ni = ni_df[ni_df['Type'] == 'Estimated']
ax3.plot(pd.to_datetime(est_ni['Year'].astype(str) + "-12-31"),
         est_ni['Net Income'], marker='o', color='red', linestyle='--', label='Estimated Net Income')

# Linea di continuità tra ultimo storico e prima stima
if not hist_ni.empty and not est_ni.empty:
    last_hist_date = pd.to_datetime(hist_ni['Year'].iloc[-1].astype(str) + "-12-31")
    last_hist_value = hist_ni['Net Income'].iloc[-1]
    first_est_date = pd.to_datetime(est_ni['Year'].iloc[0].astype(str) + "-12-31")
    first_est_value = est_ni['Net Income'].iloc[0]
    ax3.plot([last_hist_date, first_est_date], [last_hist_value, first_est_value],
             color='red', linestyle='--', alpha=0.7)

ax3.set_xlabel("Year")
ax3.set_ylabel("Net Income")
ax3.set_title(f"{TICKER} Net Income + Stock Price")
ax3.grid(True)
ax3.legend(loc='upper left')

# ===============================
# Stock Price su asse Y destro (stesso X di Net Income)
# ===============================
if 'stock_data' in globals() and not stock_data.empty and price_col in stock_data.columns:
    ax4 = ax3.twinx()
    ax4.plot(stock_data.index, stock_data[price_col], color='blue', alpha=0.5, label='Stock Price')
    ax4.set_ylabel("Stock Price")
    ax4.legend(loc='upper right')
    # Allineiamo X
    min_date = min(pd.to_datetime(hist_ni['Year'].iloc[0].astype(str) + "-12-31"), stock_data.index.min())
    max_date = max(pd.to_datetime(est_ni['Year'].iloc[-1].astype(str) + "-12-31"), stock_data.index.max())
    ax3.set_xlim(min_date, max_date)

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()