In [2]:
import pandas as pd
import numpy as np
from datetime import date, timedelta
from scipy.stats import t
import yfinance as yf
import matplotlib.pyplot as plt
import gc; gc.enable()

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, balanced_accuracy_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from category_encoders import WOEEncoder

from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import MinMaxScaler, KBinsDiscretizer, FunctionTransformer
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer

from scipy.signal import savgol_filter, argrelmin, argrelmax

import math 
import random

import talib

import warnings
warnings.filterwarnings("ignore")

## Feature Creation

In [27]:
def create_features(stock):
    stock_df = yf.Ticker(stock)
    stock_df = stock_df.history('max')
    stock_df['mom_1'] = talib.MOM(stock_df.Close, 1)
    for days in [3, 5, 10, 15, 30]:
        stock_df['tema_'+str(days)] = talib.TEMA(stock_df.Close, days)
        stock_df['mom_'+str(days)] = talib.MOM(stock_df.Close, days)
        col='delta_'+str(days)
        stock_df[col] = stock_df.Close.shift(-days) - stock_df.Close
        col='target_'+str(days)
        stock_df[col] = stock_df['delta_'+str(days)].apply(lambda x: 1 if x > 0 else 0)
        col='RSI_'+str(days)
        stock_df[col] = talib.RSI(stock_df['Close'], 3)
        stock_df['RSI_overbought_'+str(days)] = stock_df[col].apply(lambda x: 1 if x > 70 else 0)
        stock_df['RSI_oversold_'+str(days)]   = stock_df[col].apply(lambda x: 1 if x < 30 else 0)
        stock_df['vol_'+str(days)] = (stock_df.Volume - stock_df.Volume.shift(days))
    stock_df['sma50'] = stock_df.Close.rolling(50).mean()
    stock_df['sma200'] = stock_df.Close.rolling(200).mean()
    stock_df['sma50_below'] = stock_df.Close - stock_df.sma50
    stock_df.sma50_below = stock_df.sma50_below.apply(lambda x: 1 if x < 0 else 0)
    stock_df['sma200_below'] = stock_df.Close - stock_df.sma200
    stock_df.sma200_below = stock_df.sma200_below.apply(lambda x: 1 if x < 0 else 0)
    stock_df['daily_range'] = stock_df.High - stock_df.Low
    stock_df['daily_range_std'] = stock_df.daily_range.rolling(3).std()
    stock_df['yyyyqq'] = stock_df.index.to_period('Q').astype("str")
    del stock_df['yyyyqq']
    stock_df['yyyymm'] = stock_df.index.to_period('M').astype("str")
    del stock_df['yyyymm']
    


In [29]:
df = yf.Ticker('UBER')
df.history('max')

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-05-10,42.00,45.00,41.06,41.57,186322500,0,0
2019-05-13,38.79,39.24,36.08,37.10,79442400,0,0
2019-05-14,38.31,39.96,36.85,39.96,46661100,0,0
2019-05-15,39.37,41.88,38.95,41.29,36086100,0,0
2019-05-16,41.48,44.06,41.25,43.00,38115500,0,0
...,...,...,...,...,...,...,...
2020-03-02,34.05,34.08,31.74,32.85,46662000,0,0
2020-03-03,34.02,34.18,31.90,33.04,35182400,0,0
2020-03-04,33.93,35.34,32.28,34.53,44738600,0,0
2020-03-05,33.47,33.57,31.97,32.21,32764500,0,0


In [32]:
def ticker_stats(ticker, day, verbose):
    period = "10y" # "max"
    asset  = yf.Ticker(ticker)
    hist   = asset.history(period=period)
    col = f'perc_change{day}'
    hist[col] = 100*hist.Close.pct_change(day).shift(-day)
    
    idx = hist[col] <= 0
    neg_count = hist.loc[idx].count()[0]
    pos_count = hist.loc[~idx].count()[0]
    tot_count = len(hist)
    
    if verbose == True:
        print("TICKER:", ticker)
        print("="*(8+len(ticker)))
        print('')
        print('neg_count=', neg_count, f'neg_count %={neg_count}/{tot_count} = {neg_count/tot_count}')
        print('pos_count=', pos_count, f'pos_count %={pos_count}/{tot_count} = {pos_count/tot_count}')
        print('')
        print(hist[col].describe())
        print('')
        print(hist[col].tail(10))
        print('')
        hist[col].plot(figsize=(20,8))
        plt.title(ticker+f" ({day} day % change)")
        plt.show()
        print('')
    
    return hist

In [33]:
ticker = 'UBER'

In [34]:
hist = ticker_stats(ticker, 3, False)

In [35]:
hist

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,perc_change3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-05-10,42.00,45.00,41.06,41.57,186322500,0,0,-0.673563
2019-05-13,38.79,39.24,36.08,37.10,79442400,0,0,15.902965
2019-05-14,38.31,39.96,36.85,39.96,46661100,0,0,4.879880
2019-05-15,39.37,41.88,38.95,41.29,36086100,0,0,0.726568
2019-05-16,41.48,44.06,41.25,43.00,38115500,0,0,-3.488372
...,...,...,...,...,...,...,...,...
2020-03-02,34.05,34.08,31.74,32.85,46662000,0,0,-1.948250
2020-03-03,34.02,34.18,31.90,33.04,35182400,0,0,-4.116223
2020-03-04,33.93,35.34,32.28,34.53,44738600,0,0,
2020-03-05,33.47,33.57,31.97,32.21,32764500,0,0,
