In [80]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
from sklearn.svm import SVR
import warnings
warnings.filterwarnings('ignore')

In [81]:
df = pd.read_csv("../preprocessed-data.csv")
df

Unnamed: 0,Date,Value
0,2010-01-01,388.91
1,2010-02-01,390.41
2,2010-03-01,391.37
3,2010-04-01,392.67
4,2010-05-01,393.21
...,...,...
179,2024-12-01,425.40
180,2025-01-01,426.65
181,2025-02-01,427.09
182,2025-03-01,428.15


In [82]:
def evaluateModel(df, predCol):
    # remove rows with NaN predictions for evaluation
    validDf = df.dropna(subset=[predCol, 'Value'])
    
    if len(validDf) == 0:
        return "No valid predictions to evaluate"
    
    actual = validDf['Value']
    predicted = validDf[predCol]
    
    # calculate metrics
    mae = np.mean(np.abs(actual - predicted))
    mse = np.mean((actual - predicted) ** 2)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return f"MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}%"

In [None]:
def execSVM(steps=12):
	dfCopy = df.copy()
	dfCopy['Date'] = pd.to_datetime(dfCopy['Date'])

	# create time index (months since start)
	startDate = dfCopy['Date'].min()
	dfCopy['timeIndex'] = (dfCopy['Date'] - startDate).dt.days / 30.44 

	x = dfCopy['timeIndex'].values.reshape(-1, 1)
	y = dfCopy['Value'].values

	# fit linear regression model
	model = SVR(kernel='linear', C=0.1, gamma='auto')
	model.fit(x, y)
	print("SVM  Model:")
	print(f"R² Score: {model.score(x, y):.6f}")

	# make in-sample predictions
	inSamplePred = model.predict(x)

	resultDf = dfCopy.copy()
	resultDf['SVM'] = inSamplePred

	# generate future predictions
	lastTimeIndex = dfCopy['timeIndex'].iloc[-1]
	lastDate = dfCopy['Date'].iloc[-1]

	futureDates = []
	futureValues = []
	futureSvmPred = []

	for i in range(steps):
		newDate = lastDate + relativedelta(months=i+1)
		newTimeIndex = lastTimeIndex + (i + 1)
		
		# predict future value
		futurePred = model.predict([[newTimeIndex]])[0]
		
		futureDates.append(newDate)
		futureValues.append(np.nan)
		futureSvmPred.append(futurePred)

	# create future dataframe
	futureDf = pd.DataFrame({
		'Date': futureDates,
		'Value': futureValues,
		'timeIndex': [lastTimeIndex + i + 1 for i in range(steps)],
		'SVM': futureSvmPred
	})

	extendedDf = pd.concat([resultDf, futureDf], ignore_index=True)

	print("\nModel Evaluation:")
	evaluation = evaluateModel(resultDf, "SVM")
	print(evaluation, "\n")

	# format output similar to sarima
	extendedDf['Date'] = extendedDf['Date'].dt.strftime('%Y-%m-%d')
	outputDf = extendedDf[['Date', 'Value', 'SVM']].copy()

	print(outputDf.to_string())
	return outputDf

In [84]:
linearRegResults = execSVM(12)

outPath = "../SVM-results.csv"
linearRegResults.to_csv(outPath, index=False)

linearRegResults

SVM  Model:
R² Score: 0.958224

Model Evaluation:
MAE: 1.9307, MSE: 5.1784, RMSE: 2.2756, MAPE: 0.4752% 

           Date   Value         SVM
0    2010-01-01  388.91  388.517060
1    2010-02-01  390.41  388.725869
2    2010-03-01  391.37  388.914470
3    2010-04-01  392.67  389.123278
4    2010-05-01  393.21  389.325350
5    2010-06-01  392.38  389.534158
6    2010-07-01  390.41  389.736231
7    2010-08-01  388.54  389.945039
8    2010-09-01  387.03  390.153847
9    2010-10-01  387.43  390.355920
10   2010-11-01  388.87  390.564728
11   2010-12-01  389.99  390.766800
12   2011-01-01  391.50  390.975609
13   2011-02-01  392.05  391.184417
14   2011-03-01  392.80  391.373018
15   2011-04-01  393.44  391.581826
16   2011-05-01  394.41  391.783898
17   2011-06-01  393.95  391.992707
18   2011-07-01  392.72  392.194779
19   2011-08-01  390.33  392.403587
20   2011-09-01  389.28  392.612395
21   2011-10-01  389.19  392.814468
22   2011-11-01  390.48  393.023276
23   2011-12-01  392.06  393.2

Unnamed: 0,Date,Value,SVM
0,2010-01-01,388.91,388.517060
1,2010-02-01,390.41,388.725869
2,2010-03-01,391.37,388.914470
3,2010-04-01,392.67,389.123278
4,2010-05-01,393.21,389.325350
...,...,...,...
191,2025-12-01,,427.668732
192,2026-01-01,,427.873768
193,2026-02-01,,428.078804
194,2026-03-01,,428.283840
