# Hamming Distance

## Hamming distance between two vectors is simply the sum of corresponding elements that differ between the vectors.

In [1]:
import numpy as np
from scipy.spatial.distance import hamming

import warnings
warnings.filterwarnings("ignore") 

# yfinance is used to fetch data 
import yfinance as yf
yf.pdr_override()

In [2]:
symbol = '^NSEI'

start = '2020-01-01'
end = '2023-01-01'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01 00:00:00+05:30,12202.150391,12222.200195,12165.299805,12182.5,12182.5,304100
2020-01-02 00:00:00+05:30,12198.549805,12289.900391,12195.25,12282.200195,12282.200195,407700
2020-01-03 00:00:00+05:30,12261.099609,12265.599609,12191.349609,12226.650391,12226.650391,428800
2020-01-06 00:00:00+05:30,12170.599609,12179.099609,11974.200195,11993.049805,11993.049805,396500
2020-01-07 00:00:00+05:30,12079.099609,12152.150391,12005.349609,12052.950195,12052.950195,447800


In [3]:
dataset.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-26 00:00:00+05:30,17830.400391,18084.099609,17774.25,18014.599609,18014.599609,176700
2022-12-27 00:00:00+05:30,18089.800781,18149.25,17967.449219,18132.300781,18132.300781,214300
2022-12-28 00:00:00+05:30,18084.75,18173.099609,18068.349609,18122.5,18122.5,193900
2022-12-29 00:00:00+05:30,18045.699219,18229.699219,17992.800781,18191.0,18191.0,281100
2022-12-30 00:00:00+05:30,18259.099609,18265.25,18080.300781,18105.300781,18105.300781,192000


In [4]:
dataset = dataset.drop(['Adj Close', 'Volume'], axis=1)
dataset.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01 00:00:00+05:30,12202.150391,12222.200195,12165.299805,12182.5
2020-01-02 00:00:00+05:30,12198.549805,12289.900391,12195.25,12282.200195
2020-01-03 00:00:00+05:30,12261.099609,12265.599609,12191.349609,12226.650391
2020-01-06 00:00:00+05:30,12170.599609,12179.099609,11974.200195,11993.049805
2020-01-07 00:00:00+05:30,12079.099609,12152.150391,12005.349609,12052.950195


In [5]:
def hamming_distance(x, y):

    hamming_d = hamming(x, y) * len(x)
    return hamming_d

In [6]:
Open = np.array(dataset['Open'])

In [7]:
Close = np.array(dataset['Close'])

In [8]:
Open

array([12202.15039062, 12198.54980469, 12261.09960938, 12170.59960938,
       12079.09960938, 11939.09960938, 12153.15039062, 12271.        ,
       12296.70019531, 12333.09960938, 12349.40039062, 12347.09960938,
       12328.40039062, 12430.5       , 12195.29980469, 12218.34960938,
       12123.75      , 12174.54980469, 12197.09960938, 12148.09960938,
       12114.90039062, 12147.75      , 12100.40039062, 11627.45019531,
       11786.25      , 12005.84960938, 12120.        , 12151.15039062,
       12102.34960938, 12108.40039062, 12151.        , 12219.54980469,
       12190.15039062, 12131.79980469, 12028.25      , 12090.59960938,
       12119.        , 12012.54980469, 11877.5       , 11738.54980469,
       11661.25      , 11382.        , 11387.34960938, 11217.54980469,
       11351.34960938, 11306.04980469, 10942.65039062, 10742.04980469,
       10334.29980469, 10039.95019531,  9107.59960938,  9587.79980469,
        9285.40039062,  9088.45019531,  8063.29980469,  8284.45019531,
      

In [9]:
Close

array([12182.5       , 12282.20019531, 12226.65039062, 11993.04980469,
       12052.95019531, 12025.34960938, 12215.90039062, 12256.79980469,
       12329.54980469, 12362.29980469, 12343.29980469, 12355.5       ,
       12352.34960938, 12224.54980469, 12169.84960938, 12106.90039062,
       12180.34960938, 12248.25      , 12119.        , 12055.79980469,
       12129.5       , 12035.79980469, 11962.09960938, 11707.90039062,
       11979.65039062, 12089.15039062, 12137.95019531, 12098.34960938,
       12031.5       , 12107.90039062, 12201.20019531, 12174.65039062,
       12113.45019531, 12045.79980469, 11992.5       , 12125.90039062,
       12080.84960938, 11829.40039062, 11797.90039062, 11678.5       ,
       11633.29980469, 11201.75      , 11132.75      , 11303.29980469,
       11251.        , 11269.        , 10989.45019531, 10451.45019531,
       10458.40039062,  9590.15039062,  9955.20019531,  9197.40039062,
        8967.04980469,  8468.79980469,  8263.45019531,  8745.45019531,
      

In [10]:
hamming_distance(Open, Close)

746.0