In [1]:
import os
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt

import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score, mean_absolute_error, explained_variance_score

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    pipeline,
)
from datasets import Dataset

from langdetect import detect, DetectorFactory, LangDetectException

# Check GPU Availability
print("GPU available:", torch.cuda.is_available())

  from .autonotebook import tqdm as notebook_tqdm


GPU available: True


In [2]:
def calc_returns(df):
    # returns percentage difference of stock price relative to previous trading interval 
    return pd.Series(df.pct_change())

def make_labels(df):
    # returns 0 or 1 based on whether the stock price rised or dropped compared to previous trading interval
    labels = [None]
    
    prev_price = df['Close'].iloc[0]
    for i in range(1, len(df)):
        if df['Close'].iloc[i] > prev_price:
            labels.append(1)
        else:
            labels.append(0)
        prev_price = df['Close'].iloc[i]
        
    return pd.Series(labels)

# finbert scoring
def finbert_sentiment(text: str, tokenizer, model) -> tuple[float, float, float, str]:
      # check for non-string or empty input
    if not isinstance(text, str) or not text.strip():
        # fill NaN values later
        return (None, None, None, "unknown")
    
    with torch.no_grad():
        inputs = tokenizer(
            text, return_tensors="pt", padding=True, truncation=True, max_length=512
        )
        outputs = model(**inputs)
        logits = outputs.logits
        scores = {
            k: v
            for k, v in zip(
                model.config.id2label.values(),
                scipy.special.softmax(logits.numpy().squeeze()),
            )
        }
        return (scores["positive"], scores["negative"], scores["neutral"], max(scores, key=scores.get))

# applies function to pandas series
def apply_finbert(df, tokenizer, model):
    df[["finbert_pos", "finbert_neg", "finbert_neu", "finbert_sentiment"]] = (df["summary"].apply(lambda text: finbert_sentiment(text, tokenizer, model)).apply(pd.Series))
    df["finbert_score"] = df["finbert_pos"] - df["finbert_neg"]
    return df

In [3]:
"""
Functions for translating text
"""
DetectorFactory.seed = 0
torch.cuda.empty_cache()

# translation model with GPU processing
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en", device=0, batch_size=4)

# detects language of word
def detect_language(word):
    try:
        if len(word) < 5:  # Ignore very short tokens
            return "short"
        return detect(word)
    except LangDetectException:
        return "unknown"

# 
def split_by_language_with_context(text):
    words = text.split()  # Tokenise into words
    split_text = []
    current_chunk = []
    current_language = None

    # grouping text by language
    for i, word in enumerate(words):
        lang = detect_language(word)

        # If short or unknown, assign to context's majority language
        if lang in {"short", "unknown"}:
            if current_language:
                current_chunk.append(word)
            else:
                # Assign to the next word's detected language or keep as "unknown"
                next_lang = detect_language(words[i + 1]) if i + 1 < len(words) else "unknown"
                lang = next_lang if next_lang != "short" else "unknown"
        elif lang != current_language:
            if current_chunk:
                split_text.append((" ".join(current_chunk), current_language))
            # Reset for the new language
            current_chunk = [word]
            current_language = lang
        else:
            current_chunk.append(word)

    # Add the last chunk
    if current_chunk:
        split_text.append((" ".join(current_chunk), current_language))

    return split_text

def translate_mixed_languages(text):
    try:
        # Split text by language with context handling
        split_text = split_by_language_with_context(text)

        # Translate each chunk
        translations = []
        for chunk, lang in split_text:
            if lang == "en":  # Skip translation for English
                translations.append(chunk)
            elif lang not in {"unknown", "short"}:  # Only translate known non-English languages
                translated_chunk = translator(chunk, max_length=512)[0]["translation_text"]
                translations.append(translated_chunk)
            else:
                translations.append(chunk)  # Keep unknown/short as-is

        # Combine translations back
        return " ".join(translations)
    except Exception as e:
        return f"Error during translation: {e}"

def batch_translate(examples, column_name="title", max_length=512):
    # Combine all text into a batch for processing
    translations = []
    for text in examples[column_name]:
        split_text = split_by_language_with_context(text)
        chunks = [chunk for chunk, lang in split_text if lang not in {"en", "unknown", "short"}]
        if chunks:
            # Batch translation of chunks
            translated_chunks = translator(chunks, max_length=max_length)
            translated_text = " ".join([t["translation_text"] for t in translated_chunks])
            translations.append(translated_text)
        else:
            translations.append(text)
    return {f"Translated {column_name.capitalize()}": translations}



In [None]:

def prep_data(company):
    df_news = pd.read_csv(f"data/news/{company}_news.csv")
    df_stock = pd.read_csv(f"data/twelve_data/{company}.csv")
    
    # Translation currently disabled because of issues with model liscence
    
    # columns_to_translate = ['summary']
    
    # df_news_dataset = Dataset.from_pandas(df_news)
    
    # # Apply translation to each specified column
    # for column in columns_to_translate:
    #     df_news_dataset = df_news_dataset.map(
    #         lambda examples: batch_translate(examples, column_name=column),
    #         batched=True,
    #         batch_size=4
    #     )
    
    # # Convert back to pandas
    # df_news = df_news_dataset.to_pandas()
    
    tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
    model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
    
    df_news = apply_finbert(df_news, tokenizer, model)
    
    df_news.columns = df_news.columns.str.capitalize()

    df_merge = df_news.merge(df_stock, on='Date')
    
    labels = make_labels(df_merge)
    stock_returns = calc_returns(df_merge['Close'])
    
    df_merge['Label'] = labels
    df_merge['Returns'] = stock_returns
    
    return df_merge

In [5]:
# df of companies to extract data from
filepath = "data/companies.csv"
company_df = pd.read_csv(filepath)

for i in range(len(company_df)-1):
    company = company_df.iloc[i]['Company']
    
    print('Company:', company)
    # load and translate text
    df = prep_data(company.lower().replace(' ', '_'))
    
    df.drop(columns=['Title', 'Description', 'Summary', 'Unnamed: 0'], inplace=True)
    
    filepath = 'data/processed'

    if not os.path.exists(filepath):
        os.makedirs(filepath)
    
    df.to_csv(os.path.join(filepath, f"{company.lower().replace(' ', '_')}_processed.csv"))
    
    print(company, 'data saved')

Company: Intel
Intel data saved
Company: AMD
AMD data saved
Company: NVIDIA
NVIDIA data saved
Company: Qualcomm
Qualcomm data saved
Company: Texas Instruments
Texas Instruments data saved
Company: Broadcom
Broadcom data saved
Company: Micron Technology
Micron Technology data saved
Company: TSMC
TSMC data saved
Company: Samsung Electronics
Samsung Electronics data saved
Company: NXP Semiconductors
NXP Semiconductors data saved
Company: ASML
ASML data saved
Company: Lam Research
Lam Research data saved


In [None]:
"""
Functions for model training
"""

def df_to_Xy(df, window_size=10):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_size):
    row = [r for r in df_as_np[i:i+window_size]]
    X.append(row)
    label = df_as_np[i+window_size][3]
    y.append(label)
  return np.array(X), np.array(y)

# standardize the data with training set mean and standard deviation                 
def preprocess(X, mean, std, i):
  X[:, :, i] = (X[:, :, i] - mean) / std
  return X

class CNNModel(nn.Module):
  def __init__(self):
      super(CNNModel, self).__init__()
      # Conv1D layer
      self.conv1d = nn.Conv1d(in_channels=7, out_channels=64, kernel_size=2)
      # Flatten layer is implicit in PyTorch
      self.fc1 = nn.Linear(64 * 9, 8)  # Adjusted input size to 64 * 4 = 256
      self.relu = nn.ReLU()
      self.fc2 = nn.Linear(8, 1)  # Final output layer

  def forward(self, x):
      x = self.conv1d(x)  # Apply Conv1D
      #print(f"After Conv1D: {x.shape}")  # Debugging: Print shape after Conv1D
      x = x.view(x.size(0), -1)  # Flatten (equivalent to Flatten layer in Keras)
      #print(f"After Flatten: {x.shape}")  # Debugging: Print shape after flatten
      x = self.relu(self.fc1(x))  # Fully connected + ReLU
      x = self.fc2(x)  # Final linear layer
      return x

# Function to plot predictions
def plot_predictions(model, X, y, title, company, filename, mean, std):
    model.eval()  # Ensure the model is in evaluation mode
    with torch.no_grad():
      # Transpose X to match Conv1D input format (Batch Size, Channels, Sequence Length)
      X_tensor = torch.tensor(X, dtype=torch.float32).transpose(1, 2)
      predictions = model(X_tensor).flatten().numpy()  # Get predictions

    # Create a DataFrame to compare predictions with actuals
    df = pd.DataFrame(data={"Predictions": predictions, "Actuals": y})
    
    savefig_folder = f"plots/{company.lower().replace(' ', '_')}"
    if not os.path.exists(savefig_folder):
        os.makedirs(savefig_folder)
    
    # Plot predictions vs. actuals
    plt.figure(figsize=(10, 6))
    plt.plot(df["Predictions"], label="Predictions")
    plt.plot(df["Actuals"], label="Actuals")
    plt.ylabel("Stock Price ($)")
    plt.title(title)
    plt.legend()
    plt.savefig(os.path.join(savefig_folder, filename))
    plt.close()
    
    return df, mse(y, predictions)

In [7]:
def evaluation_metrics(model, X_test, y_test, company):
    # Convert X_test to a PyTorch Tensor and transpose for Conv1D
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).transpose(1, 2)

    # Get predictions
    model.eval()  # Ensure the model is in evaluation mode
    with torch.no_grad():
        predictions = model(X_test_tensor).flatten().numpy()

    # R^2 Score
    r2 = r2_score(y_test, predictions)
    
    # Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, predictions)
    
    # Mean Absolute Percentage Error (MAPE)
    mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

    # Explained Variance Score
    explained_variance = explained_variance_score(y_test, predictions)

    # Symmetric Mean Absolute Percentage Error (SMAPE)
    smape = np.mean(2 * np.abs(y_test - predictions) / (np.abs(y_test) + np.abs(predictions))) * 100

    # Save metrics to a file
    save_metrics_path = 'model_metrics/'
    if not os.path.exists(save_metrics_path):
        os.mkdir(save_metrics_path)
    
    metrics_file = os.path.join(save_metrics_path, f"{company.lower().replace(' ', '_')}_regression_metrics.txt")
    with open(metrics_file, "w") as file:
        file.write(f"R^2 Score: {r2:.4f}\n")
        file.write(f"Mean Absolute Error (MAE): {mae:.4f}\n")
        file.write(f"Mean Absolute Percentage Error (MAPE): {mape:.4f}%\n")
        file.write(f"Explained Variance: {explained_variance:.4f}\n")
        file.write(f"SMAPE (Symmetric Mean Absolute Percentage Error): {smape:.4f}%\n")

    print(f"Metrics saved to {metrics_file}")

In [None]:
filepath = "data/companies.csv"
company_df = pd.read_csv(filepath)

for i in range(len(company_df)-1):
    company = company_df.iloc[i]['Company']
    print('Training Model for - ', company)
    
    #df = pd.read_csv(f"data/finbert/{company.lower().replace(' ', '_')}_translated.csv")
    df = pd.read_csv(f"data/processed/{company.lower().replace(' ', '_')}_processed.csv")
    df = df[['Open', 'High', 'Low', 'Close', 'Volume', 'Time', 'Finbert_score']]
    
    # reverse order of dates
    df = df.iloc[::-1].reset_index(drop=True)

    df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values
    df.fillna(0, inplace=True)
    
    X, y = df_to_Xy(df)
    X.shape, y.shape
    
    X_train, y_train = X[:2000], y[:2000]
    X_val, y_val = X[2000:3000], y[2000:3000]
    X_test, y_test = X[3000:], y[3000:]
    
    for i in range(2, len(X_train[0,0])):
        mean = np.mean(X_train[:, :, i]) # mean of training temp
        std = np.std(X_train[:, :, i]) # std of training temp
        if i == 3:
            mean_close = mean
            std_close = std
        preprocess(X_train, mean, std, i)
        preprocess(X_val, mean, std, i)    
        preprocess(X_test, mean, std, i)
    
    # Instantiate the model
    model = CNNModel()
    
    # Define loss function and optimizer
    loss_fn = nn.MSELoss()  # Mean Squared Error Loss
    optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

    # Reshape the data for Conv1D input
    train_data = TensorDataset(torch.tensor(X_train, dtype=torch.float32).transpose(1, 2), 
                            torch.tensor(y_train, dtype=torch.float32))
    val_data = TensorDataset(torch.tensor(X_val, dtype=torch.float32).transpose(1, 2), 
                            torch.tensor(y_val, dtype=torch.float32))

    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

    # Training loop
    best_val_loss = float("inf")  # Initialize best validation loss for checkpointing
    best_epoch = 0
    epochs = 500

    # Lists to store loss values for plotting
    train_losses = []
    val_losses = []
    
    for epoch in range(epochs):
        # Set the model to training mode
        model.train()  
        train_loss = 0.0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(X_batch).flatten()  # Forward pass
            loss = loss_fn(outputs, y_batch)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights
            train_loss += loss.item()

        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        # Validation phase
        model.eval() 
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch).flatten()
                loss = loss_fn(outputs, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_epoch = epoch
        
        print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # Save checkpoint for the current epoch
        save_folder = f"checkpoints/{company.lower().replace(' ', '_')}_model"
        if not os.path.exists(save_folder):
            os.makedirs(save_folder)
            
        filename = f"checkpoint_epoch_{epoch + 1}.pth"
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss,
        }, os.path.join(save_folder, filename))
        
    # Plot the training and validation loss curves
    savefig_folder = f"plots/{company.lower().replace(' ', '_')}"
    if not os.path.exists(savefig_folder):
        os.makedirs(savefig_folder)
    
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, epochs + 1), train_losses, label="Training Loss")
    plt.plot(range(1, epochs + 1), val_losses, label="Validation Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.yscale('log')
    plt.title(f"Training and Validation Loss - {company}")
    plt.legend()
    plt.grid()
    plt.savefig(os.path.join(savefig_folder, 'loss_curve.png'))
    plt.close()
        
    best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))
    model.load_state_dict(best_checkpoint['model_state_dict'])
    
    evaluation_metrics(model, X_test, y_test, company)
    
    # Call the function
    plot_predictions(model, X_train, y_train, f'{company} - Training Data - Model on Epoch {best_epoch+1}', company, 'train.png', mean_close, std_close)
    plot_predictions(model, X_val, y_val, f'{company} - Validation Data - Model on Epoch {best_epoch+1}', company, 'val.png', mean_close, std_close)
    plot_predictions(model, X_test, y_test, f'{company} - Test Data - Model on Epoch {best_epoch+1}', company, 'test.png', mean_close, std_close)
    print()

Training Model for -  Intel
Epoch 1/500, Train Loss: 46.5707, Val Loss: 0.1988


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 2/500, Train Loss: 0.3074, Val Loss: 0.1435
Epoch 3/500, Train Loss: 0.2705, Val Loss: 0.1238
Epoch 4/500, Train Loss: 0.2650, Val Loss: 0.1145
Epoch 5/500, Train Loss: 0.2651, Val Loss: 0.1133
Epoch 6/500, Train Loss: 0.2574, Val Loss: 0.1119
Epoch 7/500, Train Loss: 0.2526, Val Loss: 0.1504
Epoch 8/500, Train Loss: 0.2526, Val Loss: 0.1063
Epoch 9/500, Train Loss: 0.2498, Val Loss: 0.1056
Epoch 10/500, Train Loss: 0.2433, Val Loss: 0.1017
Epoch 11/500, Train Loss: 0.2330, Val Loss: 0.0996
Epoch 12/500, Train Loss: 0.2262, Val Loss: 0.1014
Epoch 13/500, Train Loss: 0.2250, Val Loss: 0.1153
Epoch 14/500, Train Loss: 0.2143, Val Loss: 0.0895
Epoch 15/500, Train Loss: 0.2065, Val Loss: 0.0913
Epoch 16/500, Train Loss: 0.1980, Val Loss: 0.0838
Epoch 17/500, Train Loss: 0.1888, Val Loss: 0.0817
Epoch 18/500, Train Loss: 0.1824, Val Loss: 0.0876
Epoch 19/500, Train Loss: 0.1782, Val Loss: 0.1568
Epoch 20/500, Train Loss: 0.1786, Val Loss: 0.0794
Epoch 21/500, Train Loss: 0.1614, Val L

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/intel_regression_metrics.txt

Training Model for -  AMD
Epoch 1/500, Train Loss: 1698.4679, Val Loss: 6.5725


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 2/500, Train Loss: 5.7199, Val Loss: 6.1156
Epoch 3/500, Train Loss: 5.0423, Val Loss: 6.0743
Epoch 4/500, Train Loss: 4.9448, Val Loss: 6.0788
Epoch 5/500, Train Loss: 4.9628, Val Loss: 6.1047
Epoch 6/500, Train Loss: 4.9246, Val Loss: 5.9661
Epoch 7/500, Train Loss: 4.9168, Val Loss: 6.0991
Epoch 8/500, Train Loss: 4.8840, Val Loss: 6.0490
Epoch 9/500, Train Loss: 4.8717, Val Loss: 5.8322
Epoch 10/500, Train Loss: 4.8920, Val Loss: 5.8372
Epoch 11/500, Train Loss: 4.9289, Val Loss: 5.9172
Epoch 12/500, Train Loss: 4.8871, Val Loss: 5.7711
Epoch 13/500, Train Loss: 4.8521, Val Loss: 5.7347
Epoch 14/500, Train Loss: 4.7444, Val Loss: 5.6843
Epoch 15/500, Train Loss: 4.7854, Val Loss: 5.6879
Epoch 16/500, Train Loss: 4.8202, Val Loss: 5.6081
Epoch 17/500, Train Loss: 4.7071, Val Loss: 5.7501
Epoch 18/500, Train Loss: 4.9597, Val Loss: 5.7554
Epoch 19/500, Train Loss: 4.7762, Val Loss: 5.5638
Epoch 20/500, Train Loss: 4.7205, Val Loss: 6.1184
Epoch 21/500, Train Loss: 4.7970, Val L

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/amd_regression_metrics.txt

Training Model for -  NVIDIA


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 686.4666, Val Loss: 11.9667
Epoch 2/500, Train Loss: 5.6187, Val Loss: 4.3816
Epoch 3/500, Train Loss: 4.7055, Val Loss: 4.1802
Epoch 4/500, Train Loss: 4.6760, Val Loss: 4.1946
Epoch 5/500, Train Loss: 4.6620, Val Loss: 4.4117
Epoch 6/500, Train Loss: 4.6013, Val Loss: 4.2583
Epoch 7/500, Train Loss: 4.6005, Val Loss: 4.0126
Epoch 8/500, Train Loss: 4.5554, Val Loss: 4.1726
Epoch 9/500, Train Loss: 4.6721, Val Loss: 4.7366
Epoch 10/500, Train Loss: 4.5289, Val Loss: 3.8999
Epoch 11/500, Train Loss: 4.4015, Val Loss: 3.8616
Epoch 12/500, Train Loss: 4.3727, Val Loss: 4.1001
Epoch 13/500, Train Loss: 4.3726, Val Loss: 3.9321
Epoch 14/500, Train Loss: 4.4102, Val Loss: 3.7644
Epoch 15/500, Train Loss: 4.3173, Val Loss: 3.7768
Epoch 16/500, Train Loss: 4.1923, Val Loss: 3.7649
Epoch 17/500, Train Loss: 4.2771, Val Loss: 3.6077
Epoch 18/500, Train Loss: 4.2742, Val Loss: 3.6249
Epoch 19/500, Train Loss: 4.1839, Val Loss: 4.0856
Epoch 20/500, Train Loss: 4.0165, Val

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/nvidia_regression_metrics.txt

Training Model for -  Qualcomm
Epoch 1/500, Train Loss: 2459.5760, Val Loss: 51.1491


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 2/500, Train Loss: 6.0097, Val Loss: 7.1891
Epoch 3/500, Train Loss: 4.0535, Val Loss: 7.5373
Epoch 4/500, Train Loss: 4.0028, Val Loss: 7.8425
Epoch 5/500, Train Loss: 3.9041, Val Loss: 7.8037
Epoch 6/500, Train Loss: 3.8734, Val Loss: 7.9340
Epoch 7/500, Train Loss: 3.8546, Val Loss: 8.3249
Epoch 8/500, Train Loss: 3.9043, Val Loss: 8.4833
Epoch 9/500, Train Loss: 3.8414, Val Loss: 9.2719
Epoch 10/500, Train Loss: 3.8774, Val Loss: 8.3723
Epoch 11/500, Train Loss: 3.8266, Val Loss: 7.4507
Epoch 12/500, Train Loss: 3.7554, Val Loss: 8.1740
Epoch 13/500, Train Loss: 3.7181, Val Loss: 8.7358
Epoch 14/500, Train Loss: 3.7385, Val Loss: 7.1947
Epoch 15/500, Train Loss: 3.7423, Val Loss: 6.8333
Epoch 16/500, Train Loss: 3.8548, Val Loss: 7.8908
Epoch 17/500, Train Loss: 3.6202, Val Loss: 8.8764
Epoch 18/500, Train Loss: 3.5991, Val Loss: 7.1202
Epoch 19/500, Train Loss: 3.5687, Val Loss: 9.5935
Epoch 20/500, Train Loss: 3.5741, Val Loss: 8.2671
Epoch 21/500, Train Loss: 3.5508, Val L

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/qualcomm_regression_metrics.txt

Training Model for -  Texas Instruments
Epoch 1/500, Train Loss: 1974.8483, Val Loss: 2.4198


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 2/500, Train Loss: 5.2065, Val Loss: 1.7020
Epoch 3/500, Train Loss: 4.0249, Val Loss: 1.6205
Epoch 4/500, Train Loss: 4.1748, Val Loss: 1.8070
Epoch 5/500, Train Loss: 4.0237, Val Loss: 1.6042
Epoch 6/500, Train Loss: 4.0641, Val Loss: 1.8325
Epoch 7/500, Train Loss: 4.0340, Val Loss: 1.5980
Epoch 8/500, Train Loss: 4.0021, Val Loss: 1.6050
Epoch 9/500, Train Loss: 4.0878, Val Loss: 1.6661
Epoch 10/500, Train Loss: 4.0214, Val Loss: 1.6008
Epoch 11/500, Train Loss: 3.9225, Val Loss: 1.5612
Epoch 12/500, Train Loss: 3.8908, Val Loss: 1.5534
Epoch 13/500, Train Loss: 4.1138, Val Loss: 1.8971
Epoch 14/500, Train Loss: 4.0638, Val Loss: 1.7478
Epoch 15/500, Train Loss: 3.9202, Val Loss: 1.6481
Epoch 16/500, Train Loss: 3.7998, Val Loss: 1.8616
Epoch 17/500, Train Loss: 3.9569, Val Loss: 1.8185
Epoch 18/500, Train Loss: 3.8515, Val Loss: 1.7649
Epoch 19/500, Train Loss: 3.7959, Val Loss: 1.7132
Epoch 20/500, Train Loss: 4.3507, Val Loss: 1.5948
Epoch 21/500, Train Loss: 3.9270, Val L

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/texas_instruments_regression_metrics.txt

Training Model for -  Broadcom


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 2865.3868, Val Loss: 16.5541
Epoch 2/500, Train Loss: 9.6058, Val Loss: 6.7888
Epoch 3/500, Train Loss: 6.8627, Val Loss: 6.7158
Epoch 4/500, Train Loss: 6.9074, Val Loss: 6.6831
Epoch 5/500, Train Loss: 6.7944, Val Loss: 6.6988
Epoch 6/500, Train Loss: 6.9626, Val Loss: 6.7349
Epoch 7/500, Train Loss: 6.8573, Val Loss: 6.5984
Epoch 8/500, Train Loss: 6.7837, Val Loss: 6.6139
Epoch 9/500, Train Loss: 6.7480, Val Loss: 6.9704
Epoch 10/500, Train Loss: 6.6787, Val Loss: 6.5021
Epoch 11/500, Train Loss: 6.6576, Val Loss: 6.4781
Epoch 12/500, Train Loss: 6.6174, Val Loss: 6.4672
Epoch 13/500, Train Loss: 6.5986, Val Loss: 6.4600
Epoch 14/500, Train Loss: 6.7301, Val Loss: 6.9264
Epoch 15/500, Train Loss: 6.5766, Val Loss: 6.6237
Epoch 16/500, Train Loss: 6.4780, Val Loss: 6.5397
Epoch 17/500, Train Loss: 6.4927, Val Loss: 6.8205
Epoch 18/500, Train Loss: 6.4308, Val Loss: 7.0596
Epoch 19/500, Train Loss: 6.5030, Val Loss: 7.7577
Epoch 20/500, Train Loss: 6.4485, Va

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/broadcom_regression_metrics.txt

Training Model for -  Micron Technology


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 1237.7472, Val Loss: 11.2353
Epoch 2/500, Train Loss: 4.4154, Val Loss: 4.5250
Epoch 3/500, Train Loss: 3.2146, Val Loss: 4.5904
Epoch 4/500, Train Loss: 3.1717, Val Loss: 4.5609
Epoch 5/500, Train Loss: 3.1249, Val Loss: 4.3977
Epoch 6/500, Train Loss: 3.1654, Val Loss: 5.0391
Epoch 7/500, Train Loss: 3.1208, Val Loss: 5.4232
Epoch 8/500, Train Loss: 3.1122, Val Loss: 4.3542
Epoch 9/500, Train Loss: 3.0288, Val Loss: 4.3580
Epoch 10/500, Train Loss: 2.9995, Val Loss: 4.3278
Epoch 11/500, Train Loss: 3.0428, Val Loss: 4.5939
Epoch 12/500, Train Loss: 3.0039, Val Loss: 4.3429
Epoch 13/500, Train Loss: 3.0615, Val Loss: 4.4775
Epoch 14/500, Train Loss: 3.0211, Val Loss: 4.2213
Epoch 15/500, Train Loss: 2.9597, Val Loss: 4.1957
Epoch 16/500, Train Loss: 2.9974, Val Loss: 4.2920
Epoch 17/500, Train Loss: 2.9439, Val Loss: 4.4420
Epoch 18/500, Train Loss: 2.9404, Val Loss: 4.3696
Epoch 19/500, Train Loss: 2.9165, Val Loss: 4.4227
Epoch 20/500, Train Loss: 2.9023, Va

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/micron_technology_regression_metrics.txt

Training Model for -  TSMC


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 2951.1179, Val Loss: 26.1470
Epoch 2/500, Train Loss: 7.9715, Val Loss: 4.9514
Epoch 3/500, Train Loss: 5.6741, Val Loss: 4.9882
Epoch 4/500, Train Loss: 5.6388, Val Loss: 5.0192
Epoch 5/500, Train Loss: 5.6561, Val Loss: 4.9890
Epoch 6/500, Train Loss: 5.5943, Val Loss: 5.0479
Epoch 7/500, Train Loss: 5.6102, Val Loss: 4.8836
Epoch 8/500, Train Loss: 5.6193, Val Loss: 4.8967
Epoch 9/500, Train Loss: 5.5661, Val Loss: 4.8655
Epoch 10/500, Train Loss: 5.5336, Val Loss: 4.9119
Epoch 11/500, Train Loss: 5.5177, Val Loss: 4.8451
Epoch 12/500, Train Loss: 5.6386, Val Loss: 4.8559
Epoch 13/500, Train Loss: 5.4961, Val Loss: 5.0082
Epoch 14/500, Train Loss: 5.4958, Val Loss: 5.0848
Epoch 15/500, Train Loss: 5.4791, Val Loss: 4.9791
Epoch 16/500, Train Loss: 5.5924, Val Loss: 4.9227
Epoch 17/500, Train Loss: 5.4726, Val Loss: 4.7767
Epoch 18/500, Train Loss: 5.3703, Val Loss: 4.8160
Epoch 19/500, Train Loss: 5.4262, Val Loss: 4.7737
Epoch 20/500, Train Loss: 5.3791, Va

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/tsmc_regression_metrics.txt

Training Model for -  Samsung Electronics


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 434008974.1845, Val Loss: 5078858.3203
Epoch 2/500, Train Loss: 964921.3705, Val Loss: 605850.8875
Epoch 3/500, Train Loss: 662921.0461, Val Loss: 581855.6523
Epoch 4/500, Train Loss: 654925.0422, Val Loss: 547888.0153
Epoch 5/500, Train Loss: 680642.9995, Val Loss: 556871.9680
Epoch 6/500, Train Loss: 680537.6649, Val Loss: 645646.7388
Epoch 7/500, Train Loss: 660318.7351, Val Loss: 553014.1496
Epoch 8/500, Train Loss: 668915.8202, Val Loss: 570727.1814
Epoch 9/500, Train Loss: 659878.4361, Val Loss: 619329.4181
Epoch 10/500, Train Loss: 652975.2156, Val Loss: 600037.8127
Epoch 11/500, Train Loss: 667731.4700, Val Loss: 548110.8071
Epoch 12/500, Train Loss: 659700.9635, Val Loss: 555070.6843
Epoch 13/500, Train Loss: 679636.7639, Val Loss: 576746.0559
Epoch 14/500, Train Loss: 664234.7760, Val Loss: 587698.1925
Epoch 15/500, Train Loss: 660171.4117, Val Loss: 544980.3737
Epoch 16/500, Train Loss: 661810.4400, Val Loss: 610267.5875
Epoch 17/500, Train Loss: 654

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/samsung_electronics_regression_metrics.txt

Training Model for -  NXP Semiconductors


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 4898.6418, Val Loss: 47.0646
Epoch 2/500, Train Loss: 11.8829, Val Loss: 8.7749
Epoch 3/500, Train Loss: 8.1229, Val Loss: 8.4988
Epoch 4/500, Train Loss: 8.1171, Val Loss: 8.3701
Epoch 5/500, Train Loss: 8.1957, Val Loss: 8.1321
Epoch 6/500, Train Loss: 8.1252, Val Loss: 8.6143
Epoch 7/500, Train Loss: 8.1016, Val Loss: 11.3796
Epoch 8/500, Train Loss: 8.1815, Val Loss: 9.4865
Epoch 9/500, Train Loss: 8.0490, Val Loss: 9.3602
Epoch 10/500, Train Loss: 8.1832, Val Loss: 9.0441
Epoch 11/500, Train Loss: 7.9676, Val Loss: 8.2621
Epoch 12/500, Train Loss: 7.8622, Val Loss: 9.4706
Epoch 13/500, Train Loss: 7.9889, Val Loss: 8.0119
Epoch 14/500, Train Loss: 7.9005, Val Loss: 8.4869
Epoch 15/500, Train Loss: 7.7847, Val Loss: 8.2771
Epoch 16/500, Train Loss: 7.8294, Val Loss: 7.7636
Epoch 17/500, Train Loss: 8.2754, Val Loss: 8.6569
Epoch 18/500, Train Loss: 7.7201, Val Loss: 7.6294
Epoch 19/500, Train Loss: 7.8814, Val Loss: 8.8909
Epoch 20/500, Train Loss: 7.8407, 

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/nxp_semiconductors_regression_metrics.txt

Training Model for -  ASML


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 37951.2542, Val Loss: 807.2541
Epoch 2/500, Train Loss: 244.7324, Val Loss: 194.8439
Epoch 3/500, Train Loss: 212.2628, Val Loss: 180.6311
Epoch 4/500, Train Loss: 210.8737, Val Loss: 182.1981
Epoch 5/500, Train Loss: 210.4417, Val Loss: 207.4062
Epoch 6/500, Train Loss: 207.9498, Val Loss: 178.6189
Epoch 7/500, Train Loss: 208.8232, Val Loss: 212.8195
Epoch 8/500, Train Loss: 212.9645, Val Loss: 184.9016
Epoch 9/500, Train Loss: 207.4449, Val Loss: 176.3120
Epoch 10/500, Train Loss: 210.2616, Val Loss: 173.2803
Epoch 11/500, Train Loss: 213.5195, Val Loss: 191.7201
Epoch 12/500, Train Loss: 204.1907, Val Loss: 192.0469
Epoch 13/500, Train Loss: 204.8831, Val Loss: 181.7998
Epoch 14/500, Train Loss: 203.0172, Val Loss: 185.4234
Epoch 15/500, Train Loss: 206.8197, Val Loss: 166.6224
Epoch 16/500, Train Loss: 200.3759, Val Loss: 175.8755
Epoch 17/500, Train Loss: 199.3738, Val Loss: 164.6529
Epoch 18/500, Train Loss: 195.7848, Val Loss: 169.2570
Epoch 19/500, Tra

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/asml_regression_metrics.txt

Training Model for -  Lam Research


  df.fillna(method='ffill', inplace=True)  # Replace NaNs with the previous row's values


Epoch 1/500, Train Loss: 809.5124, Val Loss: 1.8923
Epoch 2/500, Train Loss: 2.4022, Val Loss: 3.1739
Epoch 3/500, Train Loss: 1.6727, Val Loss: 2.6822
Epoch 4/500, Train Loss: 1.6466, Val Loss: 2.0366
Epoch 5/500, Train Loss: 1.6323, Val Loss: 1.9330
Epoch 6/500, Train Loss: 1.6527, Val Loss: 1.8096
Epoch 7/500, Train Loss: 1.6112, Val Loss: 1.7802
Epoch 8/500, Train Loss: 1.6234, Val Loss: 1.7266
Epoch 9/500, Train Loss: 1.6130, Val Loss: 1.7310
Epoch 10/500, Train Loss: 1.6276, Val Loss: 1.7875
Epoch 11/500, Train Loss: 1.5792, Val Loss: 1.7852
Epoch 12/500, Train Loss: 1.5895, Val Loss: 1.9428
Epoch 13/500, Train Loss: 1.6007, Val Loss: 1.8208
Epoch 14/500, Train Loss: 1.5730, Val Loss: 2.1047
Epoch 15/500, Train Loss: 1.5695, Val Loss: 1.7157
Epoch 16/500, Train Loss: 1.5668, Val Loss: 1.8173
Epoch 17/500, Train Loss: 1.5740, Val Loss: 1.6971
Epoch 18/500, Train Loss: 1.5298, Val Loss: 1.6389
Epoch 19/500, Train Loss: 1.5267, Val Loss: 1.7141
Epoch 20/500, Train Loss: 1.5134, Val 

  best_checkpoint = torch.load(os.path.join(save_folder,f"checkpoint_epoch_{best_epoch+1}.pth"))


Metrics saved to model_metrics/lam_research_regression_metrics.txt

