In [3]:
import numpy as np
import pandas as pd
import math
import datetime
from ta import *

In [4]:
def clean_df(df):
    # Pick the needed columns
    df = df[['Adj. Open',  'Adj. High',  'Adj. Low',  'Adj. Close', 'Adj. Volume']]

    # Create new features (percent change)
    df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Close'] * 100.0
    df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0

    # Only keep the relative values now
    df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
    
    return df

In [5]:
### Single stock indicators util functions

high = []
low = []
close = []

def select_indicators(single_stock_df):
    df = single_stock_df
    high = df['Adj. High']
    low = df['Adj. Low']
    close = df['Adj. Close']

    df['avg_directional'] = trend.adx(high, low, close, n=14, fillna=False)
    df['rsi'] = momentum.rsi(close, n=14, fillna=False)
    df['wr_pct'] = momentum.wr(high, low, close, lbp=14, fillna=False)
    df['volatility_avg'] = volatility.average_true_range(high, low, close, n=14, fillna=False)
    df['bband'] = volatility.bollinger_hband(close, n=20, ndev=2, fillna=False)
    
    return df

In [6]:
### Modeling functions

def prep_naive_train_test(df, forecast=0.05):
    '''prep df for train test using starter featurization'''
    
    '''params:
            forecast 5% future values 
            feature to forecaset: closing price
    '''
    
    # This is what we really want to predict
    forecast_col = 'Adj. Close'
    
    # How do you deal with null/missing data? One approach is to replace it with some numerical placeholder for negative infinity.
    df.fillna(value=-99999, inplace=True)

    forecast_out = int(math.ceil(forecast * len(df)))

    df['label'] = df[forecast_col].shift(-forecast_out)
    X = np.array(df.drop(['label'], 1))

    # Use the "scale" feature of sklearn to rescale the data onto the same scale
    X = preprocessing.scale(X)

    # Only use the training set, and leave some values to predict on
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]

    df.dropna(inplace=True)
    y = np.array(df['label'])
    
    return X,y,X_lately

def linear_regression(X, y):
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)

    # Fit a linear regression model to the data
    clf = LinearRegression(n_jobs=-1)
    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print(confidence)
    
def hmm():
    return