In [14]:
import numpy as np
import pandas as pd

tickers = ["BRK","MMC","PGR","ALL","AJG","MET","PRU","TRV","AIG","FNF","MKL","FAF","HIG","BRO","AIZ","AFL","UNM"]
dataframes = {}

for ticker in tickers:
    df = pd.read_csv(f"FinancialRatios_{ticker}.csv")
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
    dataframes[ticker] = df

    

In [18]:
dataframes["BRK"].info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 39 entries, 2014-12-31 to 2024-06-30
Data columns (total 7 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Working Capital/Total Assets        39 non-null     float64
 1   RetainedEarnings/TotalAssets        39 non-null     float64
 2   EBIT/TotalAssets                    39 non-null     float64
 3   BookValueofEquity/TotalLiabilities  39 non-null     float64
 4   Interest Coverage                   39 non-null     float64
 5   CPI                                 39 non-null     float64
 6   IAK                                 39 non-null     float64
dtypes: float64(7)
memory usage: 2.4 KB


In [23]:

target_columns = [
    "Working Capital/Total Assets",
    "RetainedEarnings/TotalAssets",
    "EBIT/TotalAssets",
    "BookValueofEquity/TotalLiabilities"
]



In [31]:
!pip install xgboost

Defaulting to user installation because normal site-packages is not writeable
Collecting xgboost
  Downloading xgboost-2.1.3-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.3-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.1/124.9 MB 812.7 kB/s eta 0:02:34
   ---------------------------------------- 0.4/124.9 MB 2.5 MB/s eta 0:00:49
   ---------------------------------------- 0.9/124.9 MB 5.3 MB/s eta 0:00:24
    --------------------------------------- 1.8/124.9 MB 8.4 MB/s eta 0:00:15
   - -------------------------------------- 4.2/124.9 MB 15.9 MB/s eta 0:00:08
   -- ------------------------------------- 7.1/124.9 MB 22.7 MB/s eta 0:00:06
   -- ------------------------------------- 9.3/124.9 MB 27.0 MB/s eta 0:00:05
   --- ------------------------------------ 11.3/124.9 MB 50.4 MB/s eta 0:00:03
 

In [33]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Function to create lag features for time series
def create_lagged_features(df, target_columns, lags=10):
    df_lagged = df.copy()
    for target in target_columns:
        for lag in range(1, lags + 1):
            df_lagged[f"{target}_lag{lag}"] = df_lagged[target].shift(lag)
    df_lagged.dropna(inplace=True)  # Remove NaNs due to shifting
    return df_lagged

# Train and forecast for each ticker
forecast_dfs = []
for ticker, df in dataframes.items():
    # Create lag features
    df_lagged = create_lagged_features(df, target_columns)

    # Define features (X) and target variables (y)
    feature_columns = [col for col in df_lagged.columns if col not in target_columns]
    X = df_lagged[feature_columns]
    y = df_lagged[target_columns]

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Train XGBoost model for each target variable
    models = {}
    predictions = {}
    
    for i, target in enumerate(target_columns):
        model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100, learning_rate=0.05, max_depth=5)
        model.fit(X_train, y_train.iloc[:, i])
        models[target] = model
        predictions[target] = model.predict(X_test)

    # Evaluate performance
    for i, target in enumerate(target_columns):
        print(f"🔹 {ticker} - {target}")
        mae = mean_absolute_error(y_test.iloc[:, i], predictions[target])
        rmse = np.sqrt(mean_squared_error(y_test.iloc[:, i], predictions[target]))
        r2 = r2_score(y_test.iloc[:, i], predictions[target])
        print(f"📉 MAE: {mae}, RMSE: {rmse}, R²: {r2}\n")

    # Forecast next step
    last_row = X.iloc[-1:].copy()  # Get last available row as input
    for target in target_columns:
        last_row[target] = models[target].predict(last_row)

    # Prepare forecast DataFrame
    next_step_date = df.index[-1] + pd.DateOffset(months=3)  # Adjust for quarterly predictions
    forecast_df = pd.DataFrame({
        "Date": [next_step_date],
        "Ticker": [ticker],
        "Predicted_WorkingCapital_TotalAssets": [last_row["Working Capital/Total Assets"].values[0]],
        "Predicted_RetainedEarnings_TotalAssets": [last_row["RetainedEarnings/TotalAssets"].values[0]],
        "Predicted_EBIT_TotalAssets": [last_row["EBIT/TotalAssets"].values[0]],
        "Predicted_BookValueofEquity_TotalLiabilities": [last_row["BookValueofEquity/TotalLiabilities"].values[0]]
    })
    forecast_dfs.append(forecast_df)

# Combine forecasts for all tickers
all_forecasts = pd.concat(forecast_dfs, ignore_index=True)

# Display the final forecast DataFrame
print(all_forecasts)

🔹 BRK - Working Capital/Total Assets
📉 MAE: 0.031148043537861186, RMSE: 0.044210402125576205, R²: -0.05573126046246446

🔹 BRK - RetainedEarnings/TotalAssets
📉 MAE: 0.08331750074886324, RMSE: 0.11017786431390772, R²: -71.04261011619847

🔹 BRK - EBIT/TotalAssets
📉 MAE: 0.009462696010582925, RMSE: 0.011493417840477476, R²: 0.7322628338568427

🔹 BRK - BookValueofEquity/TotalLiabilities
📉 MAE: 0.14774899248154705, RMSE: 0.1647669869746001, R²: -11.925615360127715



ValueError: feature_names mismatch: ['Interest Coverage', 'CPI', 'IAK', 'Working Capital/Total Assets_lag1', 'Working Capital/Total Assets_lag2', 'Working Capital/Total Assets_lag3', 'Working Capital/Total Assets_lag4', 'Working Capital/Total Assets_lag5', 'Working Capital/Total Assets_lag6', 'Working Capital/Total Assets_lag7', 'Working Capital/Total Assets_lag8', 'Working Capital/Total Assets_lag9', 'Working Capital/Total Assets_lag10', 'RetainedEarnings/TotalAssets_lag1', 'RetainedEarnings/TotalAssets_lag2', 'RetainedEarnings/TotalAssets_lag3', 'RetainedEarnings/TotalAssets_lag4', 'RetainedEarnings/TotalAssets_lag5', 'RetainedEarnings/TotalAssets_lag6', 'RetainedEarnings/TotalAssets_lag7', 'RetainedEarnings/TotalAssets_lag8', 'RetainedEarnings/TotalAssets_lag9', 'RetainedEarnings/TotalAssets_lag10', 'EBIT/TotalAssets_lag1', 'EBIT/TotalAssets_lag2', 'EBIT/TotalAssets_lag3', 'EBIT/TotalAssets_lag4', 'EBIT/TotalAssets_lag5', 'EBIT/TotalAssets_lag6', 'EBIT/TotalAssets_lag7', 'EBIT/TotalAssets_lag8', 'EBIT/TotalAssets_lag9', 'EBIT/TotalAssets_lag10', 'BookValueofEquity/TotalLiabilities_lag1', 'BookValueofEquity/TotalLiabilities_lag2', 'BookValueofEquity/TotalLiabilities_lag3', 'BookValueofEquity/TotalLiabilities_lag4', 'BookValueofEquity/TotalLiabilities_lag5', 'BookValueofEquity/TotalLiabilities_lag6', 'BookValueofEquity/TotalLiabilities_lag7', 'BookValueofEquity/TotalLiabilities_lag8', 'BookValueofEquity/TotalLiabilities_lag9', 'BookValueofEquity/TotalLiabilities_lag10'] ['Interest Coverage', 'CPI', 'IAK', 'Working Capital/Total Assets_lag1', 'Working Capital/Total Assets_lag2', 'Working Capital/Total Assets_lag3', 'Working Capital/Total Assets_lag4', 'Working Capital/Total Assets_lag5', 'Working Capital/Total Assets_lag6', 'Working Capital/Total Assets_lag7', 'Working Capital/Total Assets_lag8', 'Working Capital/Total Assets_lag9', 'Working Capital/Total Assets_lag10', 'RetainedEarnings/TotalAssets_lag1', 'RetainedEarnings/TotalAssets_lag2', 'RetainedEarnings/TotalAssets_lag3', 'RetainedEarnings/TotalAssets_lag4', 'RetainedEarnings/TotalAssets_lag5', 'RetainedEarnings/TotalAssets_lag6', 'RetainedEarnings/TotalAssets_lag7', 'RetainedEarnings/TotalAssets_lag8', 'RetainedEarnings/TotalAssets_lag9', 'RetainedEarnings/TotalAssets_lag10', 'EBIT/TotalAssets_lag1', 'EBIT/TotalAssets_lag2', 'EBIT/TotalAssets_lag3', 'EBIT/TotalAssets_lag4', 'EBIT/TotalAssets_lag5', 'EBIT/TotalAssets_lag6', 'EBIT/TotalAssets_lag7', 'EBIT/TotalAssets_lag8', 'EBIT/TotalAssets_lag9', 'EBIT/TotalAssets_lag10', 'BookValueofEquity/TotalLiabilities_lag1', 'BookValueofEquity/TotalLiabilities_lag2', 'BookValueofEquity/TotalLiabilities_lag3', 'BookValueofEquity/TotalLiabilities_lag4', 'BookValueofEquity/TotalLiabilities_lag5', 'BookValueofEquity/TotalLiabilities_lag6', 'BookValueofEquity/TotalLiabilities_lag7', 'BookValueofEquity/TotalLiabilities_lag8', 'BookValueofEquity/TotalLiabilities_lag9', 'BookValueofEquity/TotalLiabilities_lag10', 'Working Capital/Total Assets']
training data did not have the following fields: Working Capital/Total Assets

In [None]:
forecast_dfs[0]

In [None]:
dataframes["BRK"].tail()