In [1]:
import os
from datetime import datetime
import numpy as np
import pandas as pd
from ta.volume import VolumeWeightedAveragePrice
from ta.momentum import StochRSIIndicator
from ta.trend import EMAIndicator
import yfinance as yf
import matplotlib.pyplot as plt

### Candle Colour Totals

In [2]:
def total_colours(df):

    # Label each row / candle with colour
    df.loc[df["open"] > df["close"], "colour"] = "red"
    df.loc[df["open"] < df["close"], "colour"] = "green"
    df.loc[df["open"] == df["close"], "colour"] = "yellow"
    
    # Calculate colour percents
    green_count = df['colour'].value_counts()["green"]
    red_count = df['colour'].value_counts()["red"]
    yellow_count = df['colour'].value_counts()["yellow"]
    green_percent = round(green_count / (green_count + red_count + yellow_count) * 100,2)
    red_percent = round(red_count / (green_count + red_count + yellow_count) * 100,2)
    yellow_percent = round(yellow_count / (green_count + red_count + yellow_count) * 100,2)
    
    print('Green count: {}%, Red count: {}%, Yellow count: {}%'.format(green_percent, red_percent, yellow_percent))


# Files with different data
datasets = ["spy-1m.csv","spy-5m.csv","spy-1h.csv","spy-1d.csv"]

# Calculate colour stats for each file
for dataset in datasets:
    print("=======================================")
    df = pd.read_csv(dataset).dropna()
    print("Dataset: " + dataset + " Size: " + str(len(df.index)))
    print("")
    total_colours(df)

Dataset: spy-1m.csv Size: 10327

Green count: 48.89%, Red count: 48.47%, Yellow count: 2.64%
Dataset: spy-5m.csv Size: 9837

Green count: 50.45%, Red count: 48.56%, Yellow count: 0.99%
Dataset: spy-1h.csv Size: 10439

Green count: 51.72%, Red count: 47.49%, Yellow count: 0.79%
Dataset: spy-1d.csv Size: 7346

Green count: 52.41%, Red count: 46.72%, Yellow count: 0.87%


### Candle Colour Totals with EMAs

In [11]:
def add_indicators(df):

    # Add EMA to DF
    emas = [9,21,200]
    for ema in emas:
        ema_values = EMAIndicator(close=df["close"], window=ema, fillna=False)
        df["ema"+str(ema)] = ema_values.ema_indicator()

    df.dropna(inplace=True)

    return df

def ema_colours(df):
    
    # For Tradingview indicators
    df.rename(columns={'EMA': 'ema200', 'EMA.1': 'ema21','EMA.2': 'ema9'}, inplace=True)
    df.drop(columns=['Smoothing Line','Smoothing Line.1','Smoothing Line.2'], inplace=True)

    # For local indicators
    # df = add_indicators(df)
    
    # Label our rows with green or red
    df.loc[df["close"] > df["open"], "colour"] = "green"
    df.loc[df["close"] < df["open"] , "colour"] = "red"
    df.loc[df["open"] == df["close"], "colour"] = "yellow"

    # Label our rows with over/under ema
    emas = ["9","21","200"]
    for ema in emas:
        df["over"+ema] = np.where((df["close"] > df["ema"+ema]), "true", "false")

    #print(df.head(5))
    
    # Calculate the percent of green/red candles over/under each EMA
    for ema in emas:
        over_count = df["over"+ema].value_counts()["true"]
        over_green = len(df[(df["over"+ema]=="true") & (df["colour"]=="green")])
        over_green_percent = round(over_green / over_count * 100,2)
        over_red_percent = round(100 - over_green_percent,2)

        under_count = df["over"+ema].value_counts()["false"]
        under_green = len(df[(df["over"+ema]=="false") & (df["colour"]=="green")])
        under_green_percent = round(under_green / under_count * 100,2)
        under_red_percent = round(100 - under_green_percent,2)
            
        print('{} EMA Colours || Over_Green: {}%, Over_Red: {}% || Under_Green: {}%, Under_Red: {}%'.format(ema, over_green_percent, over_red_percent, under_green_percent, under_red_percent))


datasets = ["spy-1m.csv","spy-5m.csv","spy-1h.csv","spy-1d.csv"]
#datasets = ["spy-5m.csv","aapl-5m.csv"]

for dataset in datasets:
    print("=======================================")
    df = pd.read_csv(dataset).dropna()
    print("Dataset: " + dataset + " Size: " + str(len(df.index)))
    print("")
    ema_colours(df)



Dataset: spy-1m.csv Size: 10327

9 EMA Colours || Over_Green: 66.87%, Over_Red: 33.13% || Under_Green: 29.81%, Under_Red: 70.19%
21 EMA Colours || Over_Green: 60.56%, Over_Red: 39.44% || Under_Green: 36.86%, Under_Red: 63.14%
200 EMA Colours || Over_Green: 51.88%, Over_Red: 48.12% || Under_Green: 45.3%, Under_Red: 54.7%
Dataset: spy-5m.csv Size: 9837

9 EMA Colours || Over_Green: 67.79%, Over_Red: 32.21% || Under_Green: 31.64%, Under_Red: 68.36%
21 EMA Colours || Over_Green: 61.62%, Over_Red: 38.38% || Under_Green: 38.54%, Under_Red: 61.46%
200 EMA Colours || Over_Green: 55.11%, Over_Red: 44.89% || Under_Green: 45.55%, Under_Red: 54.45%
Dataset: spy-1h.csv Size: 10439

9 EMA Colours || Over_Green: 65.47%, Over_Red: 34.53% || Under_Green: 32.48%, Under_Red: 67.52%
21 EMA Colours || Over_Green: 60.51%, Over_Red: 39.49% || Under_Green: 37.69%, Under_Red: 62.31%
200 EMA Colours || Over_Green: 54.63%, Over_Red: 45.37% || Under_Green: 44.94%, Under_Red: 55.06%
Dataset: spy-1d.csv Size: 7346
