In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_val_score
from sklearn.svm import SVC

In [2]:
data = pd.read_csv('MFTrend_data.csv')
data

classes = data['Classification'] 
data = data.drop(['Company', 'Time', 'Classification'], axis = 1)

col_list = []
for col in data.columns:
    col_list.append(col)
    
for col in col_list:
    print(col)
    data[col] = data[col].astype(float) # Converting columns to floats
    print("Done")
    
# Normalising the data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler() 
scaled_values = scaler.fit_transform(data) 
data.loc[:,:] = scaled_values

data.insert(data.shape[1], 'Classification', classes)
data


Total Assets
Done
Total Liabilities
Done
Total Equity
Done
Total Shares Out. on Filing Date
Done
Book Value / Share
Done
Tangible Book Value
Done
Tangible Book Value Per Share
Done
Total Debt
Done
Net Debt
Done
Total Revenues
Done
Operating Income
Done
Revenue Per Share
Done
Basic EPS
Done
Normalized Basic EPS
Done
Dividend Per Share
Done
EBITA
Done
EBIT
Done
Normalized Net Income
Done
Net Income
Done
Cash from Operations
Done
Cash from Investing
Done
Cash from Financing
Done
Net Change in Cash
Done
Levered Free Cash Flow
Done
Unlevered Free Cash Flow
Done
Free Cash Flow / Share
Done


Unnamed: 0,Total Assets,Total Liabilities,Total Equity,Total Shares Out. on Filing Date,Book Value / Share,Tangible Book Value,Tangible Book Value Per Share,Total Debt,Net Debt,Total Revenues,...,Normalized Net Income,Net Income,Cash from Operations,Cash from Investing,Cash from Financing,Net Change in Cash,Levered Free Cash Flow,Unlevered Free Cash Flow,Free Cash Flow / Share,Classification
0,0.569312,0.673112,0.530858,0.206247,0.001261,0.501619,0.156751,0.650024,0.600770,0.561531,...,0.248183,0.489969,0.465644,0.501994,0.539354,0.605060,0.557563,0.562196,0.437724,1
1,0.557462,0.671981,0.508632,0.185579,0.001001,0.478251,0.156527,0.656607,0.606829,0.552442,...,0.232072,0.481012,0.464748,0.503420,0.539007,0.605678,0.546631,0.551502,0.437715,1
2,0.559693,0.668188,0.519284,0.206252,0.001081,0.488625,0.156567,0.650807,0.601465,0.551287,...,0.231563,0.481283,0.466684,0.502463,0.539762,0.606156,0.540990,0.545750,0.437798,1
3,0.558162,0.667093,0.517944,0.206256,0.001016,0.487961,0.156546,0.649434,0.600275,0.550652,...,0.230709,0.480863,0.464234,0.504401,0.538905,0.606264,0.540605,0.545363,0.437566,1
4,0.559790,0.668336,0.519241,0.206239,0.001041,0.489297,0.156562,0.651134,0.599691,0.552618,...,0.231555,0.481546,0.471326,0.503658,0.539965,0.608569,0.540664,0.545408,0.437819,1
5,0.563711,0.674297,0.517576,0.206171,0.001002,0.487998,0.156529,0.645178,0.598405,0.577664,...,0.234508,0.475300,0.423409,0.524292,0.526961,0.600276,0.537845,0.542641,0.437196,1
6,0.548815,0.636383,0.548551,0.204582,0.001065,0.529296,0.156586,0.609240,0.558623,0.516359,...,0.223902,0.574878,0.490716,0.508747,0.512838,0.590443,0.530680,0.535250,0.437771,1
7,0.560937,0.671061,0.517170,0.206085,0.001016,0.483707,0.156431,0.655280,0.606129,0.553771,...,0.230735,0.480820,0.473396,0.497264,0.541487,0.605175,0.541500,0.546293,0.437859,1
8,0.553189,0.668362,0.505802,0.205953,0.000529,0.481494,0.156229,0.655589,0.603764,0.549858,...,0.229720,0.478106,0.464105,0.506224,0.539213,0.607241,0.541467,0.546313,0.437818,0
9,0.560111,0.668193,0.520124,0.206200,0.001149,0.489501,0.156617,0.651134,0.601573,0.552013,...,0.230980,0.480985,0.465778,0.504763,0.538091,0.606187,0.539902,0.544669,0.437749,1


In [3]:
# Converting Data to Numpy Arrays
NpMatrix = data.to_numpy(dtype = None, copy = False)
X = NpMatrix[:,0:26] # Parameters
y = NpMatrix[:,26] # Price
print("X:", X)
print("\n")
print("Type X:", type(X))
print("Length of Individual X_train Vector:", len(X[1]))
print("Total Number of Training instances:", len(X))
print("\n")
print(y)
print("\n")
print("Type y:", type(y))
print("Length of Individual y_train vector", y[0])
print("Total number of y values", len(y))

X: [[0.56931236 0.67311195 0.5308581  ... 0.55756272 0.56219563 0.43772384]
 [0.55746247 0.67198147 0.50863228 ... 0.54663096 0.5515018  0.43771544]
 [0.55969327 0.6681881  0.51928354 ... 0.54098986 0.54574963 0.43779811]
 ...
 [0.5597835  0.66898728 0.51817709 ... 0.54043122 0.54524518 0.43777422]
 [0.56507245 0.68197519 0.50794345 ... 0.54105975 0.5456177  0.43780909]
 [0.55312547 0.6591651  0.52051989 ... 0.55138498 0.55573384 0.4377148 ]]


Type X: <class 'numpy.ndarray'>
Length of Individual X_train Vector: 26
Total Number of Training instances: 4500


[1. 1. 1. ... 1. 0. 0.]


Type y: <class 'numpy.ndarray'>
Length of Individual y_train vector 1.0
Total number of y values 4500


In [5]:
SVM = SVC(C=10000, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [4]:
CV = StratifiedKFold(n_splits=10, random_state=None, shuffle=False)

In [5]:
from sklearn import metrics
tns = []
fps = []
fns = []
tps = []

for train_index, test_index in CV.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf = SVC(C=10000, cache_size=200, class_weight=None, coef0=0.0,
            decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
            max_iter=-1, probability=False, random_state=None, shrinking=True,
            tol=0.001, verbose=False)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("Classification Report")
    print(metrics.classification_report(y_test, y_pred, digits = 3))
    print("\n")
    print("Confusion Matrix")
    print(metrics.confusion_matrix(y_test, y_pred))
    print("True Negatives:", metrics.confusion_matrix(y_test, y_pred).ravel()[0])
    tns.append(metrics.confusion_matrix(y_test, y_pred).ravel()[0])
    print("False Positives:", metrics.confusion_matrix(y_test, y_pred).ravel()[1])
    fps.append(metrics.confusion_matrix(y_test, y_pred).ravel()[1])
    print("False Negatives:", metrics.confusion_matrix(y_test, y_pred).ravel()[2])
    fns.append(metrics.confusion_matrix(y_test, y_pred).ravel()[2])
    print("True Positives:", metrics.confusion_matrix(y_test, y_pred).ravel()[3])
    tps.append(metrics.confusion_matrix(y_test, y_pred).ravel()[3])

Classification Report
              precision    recall  f1-score   support

         0.0      0.519     0.095     0.161       147
         1.0      0.686     0.957     0.799       304

    accuracy                          0.676       451
   macro avg      0.602     0.526     0.480       451
weighted avg      0.632     0.676     0.591       451



Confusion Matrix
[[ 14 133]
 [ 13 291]]
True Negatives: 14
False Positives: 133
False Negatives: 13
True Positives: 291
Classification Report
              precision    recall  f1-score   support

         0.0      0.520     0.088     0.151       147
         1.0      0.685     0.961     0.800       304

    accuracy                          0.676       451
   macro avg      0.603     0.524     0.476       451
weighted avg      0.632     0.676     0.589       451



Confusion Matrix
[[ 13 134]
 [ 12 292]]
True Negatives: 13
False Positives: 134
False Negatives: 12
True Positives: 292
Classification Report
              precision    recall  f

In [6]:
av_tn = np.mean(tns)
av_fp = np.mean(fps)
av_fn = np.mean(fns)
av_tp = np.mean(tps)

print("Avg True Negavtives:", av_tn)
print("Avg False Positives:", av_fp)
print("Avg False Negavtives:", av_fn)
print("Avg True Positives:", av_tp)

Precision_Class1 = (av_tp / (av_tp + av_fp))
Precision_Class0 = (av_tn / (av_tn + av_fn))
Overall_AvgPrecision = round((Precision_Class0 + Precision_Class1)/2, 5)
print("Overall Average Precision:", Overall_AvgPrecision)

Recall_Class1 = (av_tp / (av_tp + av_fn))
Recall_Class0 = (av_tn / (av_tn + av_fp))
Overall_AvgRecall = round((Recall_Class0 + Recall_Class1)/2, 5)
print("Overall Average Recall:", Overall_AvgRecall)

F1_Score = round(2*(Overall_AvgPrecision*Overall_AvgRecall) / (Overall_AvgPrecision + Overall_AvgRecall), 5)
print("F1-Score:", F1_Score)

Avg True Negavtives: 13.6
Avg False Positives: 132.6
Avg False Negavtives: 10.1
Avg True Positives: 293.7
Overall Average Precision: 0.6314
Overall Average Recall: 0.52989
F1-Score: 0.57621


In [6]:
# 7 minutes to run
CV = StratifiedKFold(n_splits=10, random_state=None, shuffle=False)

scores = cross_val_score(SVM, X, y, cv = CV) 

print(scores)

print("Mean Accuracy:", scores.mean())
print("SD of the mean:", scores.std())

[0.67627494 0.67627494 0.68444444 0.67777778 0.70444444 0.68
 0.68666667 0.68       0.68151448 0.68151448]
Mean Accuracy: 0.6828912175697988
SD of the mean: 0.007844836716522379
