In [None]:
# Import packages
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.compose import ColumnTransformer


In [None]:
# Read the preprocessed data

# Construct the full path to the CSV file
csv_file_path_pre_processed = Path.cwd().parent / 'preprocessed_data.csv'
csv_file_path_raw = Path.cwd().parent / 'training_data_vt2025.csv'

# Read the CSV file using pandas
pre_processed_data = pd.read_csv(csv_file_path_pre_processed)
raw_data = pd.read_csv(csv_file_path_raw)

In [None]:
# Split the data into input values, X, and output value, y
X = pre_processed_data.drop(columns=['increase_stock'])
y = pre_processed_data['increase_stock']

In [None]:
# Stratified K-Fold Cross Validation (better for imbalanced classification problems)
K = 10
skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)
accuracies = []
models = []
classification_report = []
confusion_matrices = []

for fold, (train_index, val_index) in enumerate(skf.split(X, y)):
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]
    
    # Standardize
    ct = ColumnTransformer([
        ('scaler', StandardScaler(), ['temp','dew', 'windspeed']),
        ('passthrough', 'passthrough', ['weekday', 'day_of_week_sin', 'day_of_week_cos' ])
    ])

    # Train the model

    # Make predictions on the validation set

    # Evaluate the model



In [None]:
# Initializing the model and training it through stratified k-fold cross validation
model = LinearDiscriminantAnalysis()
scores = cross_val_score(model, X, y, cv=skf, scoring='f1')

In [55]:
print("Cross-validation scores: ", scores)
print("Mean accuracy: ", np.mean(scores))
print("Standard deviation: ", np.std(scores))

Cross-validation scores:  [0.45454545 0.4        0.37209302 0.45454545 0.59259259 0.6
 0.54166667 0.39130435 0.47826087 0.33333333]
Mean accuracy:  0.461834174233062
Standard deviation:  0.08735029205764182
