# Importing Essential Libraries

In [1]:
import pandas as pd
import talib

# Reading Data and removing NAs

In [2]:
candlestick_data = pd.read_csv('data/candlestick.csv')
candlestick_data['Candlestick'] = ""
candlestick_data = candlestick_data.drop(candlestick_data[candlestick_data.Open == ' NA '].index)
candlestick_data = candlestick_data.reset_index(drop = True)

# Identification of candlesticks usin Ta-Lib

In [3]:
open = candlestick_data['Open']
High = candlestick_data['High']
Low = candlestick_data['Low']
Close = candlestick_data['Close']

In [4]:
marubozu = talib.CDLMARUBOZU(open,High,Low,Close)
doji = talib.CDLDOJI(open,High,Low,Close)
dragonfly = talib.CDLDRAGONFLYDOJI(open,High,Low,Close)
gravestone = talib.CDLGRAVESTONEDOJI(open,High,Low,Close)
spinning = talib.CDLSPINNINGTOP(open,High,Low,Close)

# Adding Labels to  the data

### The candlesticks that can not be grouped into the given categories are labled as undefined.

In [5]:
candlestick_data.loc[list(spinning[spinning != 0].index),'Candlestick'] = 'Spinning Top'
    
candlestick_data.loc[list(marubozu[marubozu != 0].index),'Candlestick'] = 'Marubozu'
    
candlestick_data.loc[list(doji[doji != 0].index),'Candlestick'] = 'Doji'
    
candlestick_data.loc[list(dragonfly[dragonfly != 0].index),'Candlestick'] = 'Dragonfly Doji'
    
candlestick_data.loc[list(gravestone[gravestone != 0].index),'Candlestick'] = 'Gravestone Doji'
   
for i in range(len(candlestick_data)):
    if candlestick_data.loc[i,'Candlestick'] == "":
        candlestick_data.loc[i,'Candlestick'] = "Undefined"

In [6]:
candlestick_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Candlestick
0,1/2/2007,117.94,118.88,117.28,118.28,Undefined
1,1/3/2007,117.94,119.82,116.72,119.34,Undefined
2,1/4/2007,119.83,120.77,116.74,117.28,Undefined
3,1/5/2007,117.09,118.68,116.35,117.38,Undefined
4,1/8/2007,117.83,117.83,113.89,114.51,Undefined


# Counts of data in each category

In [7]:
Candlestick_frequency = candlestick_data['Candlestick'].value_counts()
Candlestick_frequency

Undefined          2128
Doji                332
Spinning Top        266
Marubozu             73
Gravestone Doji      25
Dragonfly Doji       21
Name: Candlestick, dtype: int64

### For Visualization purpose I have used plotly API that needs username and API key to run. Please visit https://plot.ly/settings/api#/ and generate your own 'username' and 'API Key' or it may give error if it crosses the threshhold imports.

In [8]:
import plotly.plotly as py
import plotly
import plotly.graph_objs as go

plotly.tools.set_credentials_file(username='abhithecracker', api_key='hvCb7Mv3tVekgBbpLZRr')


labels = list(Candlestick_frequency.index)
values = list(Candlestick_frequency.values)

trace = go.Pie(labels=labels, values=values)

layout = go.Layout(
    title='Proportion of candlesticks',
)

fig = go.Figure(data=[trace], layout=layout)

py.iplot(fig, filename='Proportion of candlesticks in unlabled data')

In [9]:
data = [go.Bar(
            x=list(Candlestick_frequency.index),
            y=list(Candlestick_frequency.values)
    )]

layout = go.Layout(
    title='Number of candlesticks for every category',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Number of candlesticks for every category in unlabled data')

# Candlestick chart

In [10]:
df = candlestick_data

trace = go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])
data = [trace]

layout = go.Layout(
    title='Candlestick Chart',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Candlestick Chart in predicted data')

# Ploting to check the identified candlesticks

# Dragonfly Doji

In [11]:
df = candlestick_data.loc[list(dragonfly[dragonfly != 0].index),:]

trace = go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])
data = [trace]

layout = go.Layout(
    title='Dragonfly Doji candlesticks',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Dragonfly Doji candlesticks in unlabled data')

# Doji

In [12]:
df = candlestick_data.loc[list(doji[doji != 0].index),:]

trace = go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])
data = [trace]

layout = go.Layout(
    title='Doji candlesticks',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Doji candlesticks in unlabled data')

# Marubozu

In [13]:
df = candlestick_data.loc[list(marubozu[marubozu != 0].index),:]

trace = go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])
data = [trace]

layout = go.Layout(
    title='Marubozu candlesticks',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Marubozu in unlabled data')

# Gravestone Doji

In [14]:
df = candlestick_data.loc[list(gravestone[gravestone != 0].index),:]

trace = go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])
data = [trace]

layout = go.Layout(
    title='Gravestone Doji candlesticks',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Gravestone Doji in unlabled data')

# Spinning Top

In [15]:
df = candlestick_data.loc[list(spinning[spinning != 0].index),:]

trace = go.Candlestick(x=df['Date'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])
data = [trace]

layout = go.Layout(
    title='Spinning Top candlesticks',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Spinning Top  in unlabled data')

# Candlesticks that can not be grouped to above categories

In [16]:
df = candlestick_data[candlestick_data['Candlestick'] == 'Undefined']
df = df.reset_index(drop = True)
trace = go.Candlestick(x=df.index,
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])
data = [trace]

layout = go.Layout(
    title='Undefined candlesticks',
)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='Undefined  in unlabled data')

# Exporting the labled data to csv file

In [17]:
candlestick_data.to_csv('data/labled_candlesticks.csv', index=False)