## Linear Regression

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import linear_model
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('AAPL.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2015-05-28,32.965,32.987499,32.775002,32.945,30.031921,122933200
1,2015-05-29,32.807499,32.862499,32.474998,32.57,29.690079,203538000
2,2015-06-01,32.57,32.8475,32.512501,32.634998,29.749327,128451200
3,2015-06-02,32.465,32.665001,32.330002,32.490002,29.617155,134670400
4,2015-06-03,32.665001,32.735001,32.474998,32.529999,29.653614,123934000


In [100]:
def prepare_data(df,forecast_col,forecast_out,test_size):
    label = df[forecast_col].shift(-forecast_out)#creating new column called label with the last 5 rows are nan
    X = np.array(df[[forecast_col]]) #creating the feature array
    X = preprocessing.scale(X) #processing the feature array
    X_lately = X[-forecast_out:] #creating the column i want to use later in the predicting method
    X = X[:-forecast_out] # X that will contain the training and testing
    label.dropna(inplace=True) #dropping na values
    y = np.array(label)  # assigning Y
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=test_size) #cross validation 

    response = [X_train,X_test , Y_train, Y_test , X_lately]
    return response

In [99]:
forecast_col = 'Close' #choosing which column to forecast
forecast_out = 5 #how far to forecast 
test_size = 0.2 #the size of my test set

X_train, X_test, Y_train, Y_test , X_lately =prepare_data(df,forecast_col,forecast_out,test_size); #calling the method were the cross validation and data preperation is in

learner = linear_model.LinearRegression() #initializing linear regression model

learner.fit(X_train,Y_train) #training the linear regression model
score=learner.score(X_test,Y_test)#testing the linear regression model

forecast= learner.predict(X_lately) #set that will contain the forecasted data

response={} #creating json object
response['test_score']=score
response['forecast_set']=forecast

print(response);


{'test_score': 0.9839825556009695, 'forecast_set': array([80.19112226, 79.59261291, 80.10562207, 79.56243703, 79.90946423])}


## SVM

In [120]:
from sklearn.svm import SVR
from sklearn.metrics import  accuracy_score

In [121]:
# Split the data into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [123]:
#training the Support Vector Machine (Regressor) 
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) 
svr_rbf.fit(X_train, y_train)

SVR(C=1000.0, gamma=0.1)

In [124]:
#Finding accuracy of the model
svm_confidence = svr_rbf.score(X_test, y_test)
print("Model Accuracy: ", svm_confidence)


Model Accuracy:  0.5715598332972887


## RandomForest Classifier

In [111]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [112]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
# Training the model
rfc = RandomForestClassifier(random_state=0)
rfc = rfc.fit(X_train, y_train)
# Testing the model by doing some predictions
y_pred = rfc.predict(X_test)
# Finding accuarcy of the model 
report = classification_report(y_test, y_pred)
print('Model Accuracy', accuracy_score(y_test, y_pred, normalize=True))
print(report)

Model Accuracy 0.8517520215633423
              precision    recall  f1-score   support

           0       0.85      0.85      0.85       185
           1       0.85      0.85      0.85       186

    accuracy                           0.85       371
   macro avg       0.85      0.85      0.85       371
weighted avg       0.85      0.85      0.85       371



## DecisionTree Classifier

In [113]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [114]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

DecisionTreeClassifier()

In [115]:
y_pred = classifier.predict(X_test)

In [116]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[144  33]
 [ 33 161]]
              precision    recall  f1-score   support

           0       0.81      0.81      0.81       177
           1       0.83      0.83      0.83       194

    accuracy                           0.82       371
   macro avg       0.82      0.82      0.82       371
weighted avg       0.82      0.82      0.82       371

