In [1]:
# In this section we adopt K-Means Clustering to get similar stocks for recommendation

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans


In [5]:

# First load the dataset on stock basics
dataset = pd.read_csv('./data/stock_basics.csv')
stocks = dataset.iloc[:,0].values
print("Below is stock list in dataset:")
print(stocks)
print("-------------------------------------------------------------")

features = dataset.iloc[:,1:].values
features = pd.DataFrame(features)
features.columns = ["Price", "Volume", "Market Cap", "Beta", "PE Ratio", "EPS"]
cols = features.columns
features[cols] = features[cols].apply(pd.to_numeric, errors='coerce')
print("Below is the data of stock features: ")
print(features)

# Second we eliminate null values in the dataset
for i in features.columns:
    features[i] = features[i].fillna(int(features[i].mean()))

Below is stock list in dataset:
['AARTIIND.NS' 'ABBOTINDIA.NS' 'ACC.NS' 'ADANIENT.NS' 'ADANIPORTS.NS'
 'ABFRL.NS' 'APLLTD.NS' 'ALKEM.NS' 'AMARAJABAT.NS' 'AMBUJACEM.NS'
 'APOLLOHOSP.NS' 'APOLLOTYRE.NS' 'ASHOKLEY.NS' 'ASIANPAINT.NS' 'ASTRAL.NS'
 'AUBANK.NS' 'AUROPHARMA.NS' 'AXISBANK.NS' 'BAJAJ-AUTO.NS' 'BAJFINANCE.NS'
 'BAJAJFINSV.NS' 'BALKRISIND.NS' 'BANDHANBNK.NS' 'BANKBARODA.NS'
 'BATAINDIA.NS' 'BERGEPAINT.NS' 'BEL.NS' 'BHARATFORG.NS' 'BPCL.NS'
 'BHARTIARTL.NS' 'BHEL.NS' 'BIOCON.NS' 'BOSCHLTD.NS' 'BRITANNIA.NS'
 'CADILAHC.NS' 'CANFINHOME.NS' 'CANBK.NS' 'CHOLAFIN.NS' 'CIPLA.NS'
 'CUB.NS' 'COALINDIA.NS' 'COFORGE.NS' 'COLPAL.NS' 'CONCOR.NS'
 'COROMANDEL.NS' 'CROMPTON.NS' 'CUMMINSIND.NS' 'DABUR.NS' 'DALBHARAT.NS'
 'DEEPAKNTR.NS' 'DELTACORP.NS' 'DIVISLAB.NS' 'DIXON.NS' 'DLF.NS'
 'LALPATHLAB.NS' 'DRREDDY.NS' 'EICHERMOT.NS' 'ESCORTS.NS' 'EXIDEIND.NS'
 'FEDERALBNK.NS' 'GAIL.NS' 'GLENMARK.NS' 'GMRINFRA.NS' 'GODREJCP.NS'
 'GODREJPROP.NS' 'GRANULES.NS' 'GRASIM.NS' 'GUJGASLTD.NS' 'HAVELLS.NS'
 'H

In [6]:
def gen_clusters(features, columns, clusterNum, clusterPrint): 
    print("Creating clusters:")
    print( columns, clusterNum, clusterPrint)
    kmeans = KMeans(n_clusters=clusterNum,init='k-means++',max_iter=300,n_init=10,random_state=0)
    clusterIds = kmeans.fit_predict(features[columns])
    
    # First initialize each cluster
    clusters = []
    for i in range(0, clusterNum):
        clusters.append([])

    # Second fill cluster with stocks
    stockId = 0
    for clusterId in clusterIds:
        stock = stocks[stockId]
        clusters[clusterId].append(stock)
        stockId += 1 
        
    # Print out cluster
    if clusterPrint == 1:
        print("Here are generated clusters:\n")
        clusterId = 1
        for i in range(0, clusterNum):
            print("cluster-" + str(clusterId) + ": " + ",".join(clusters[i]))
            clusterId += 1
    return clusters

In [8]:
# Now we start to recommend stocks
# First input stock symbol of google (googl) and stock features you care about, the system will return a list of stocks in the same cluster
# You are free to modify to other stock symbol and concerned features to test
# 
# Available feature ID and name pair are lsited as below:
# 1: "Price", 
# 2: "Volume", 
# 3: "Market Cap", 
# 4: "Beta", 
# 5: "PE Ratio"
# 6: "EPS"

# Here users want to recommendation based on price and volume for Amazon Inc.
stock_input = "PAGEIND.NS"
fids_concerned = [1,2]

# Here cluster number is set to be 100 so that each cluster has 15 stocks on average
clusterNum = 80
clusterPrint = 0
columns = []
for fid in fids_concerned:
    columns.append(features.columns[fid-1])
clusters = gen_clusters(features, columns, clusterNum, clusterPrint)

print("Stock you have input: " + stock_input)
print("Features you are concerned about: " + ', '.join(columns))
print("---------------------------------------------------")
count = 0
isfound = 0
for cluster in clusters:
    if stock_input in cluster and len(cluster)>1:
        isfound = 1
        print("Here are a list of stocks you may be interested in:\n")
        for cluster_stock in cluster:
            if cluster_stock != stock_input:
                count += 1
                print("Stock-" + str(count) + ": " + cluster_stock)
                print("=====================================")
        break        
if not isfound:
    print("Sorry, we can not make any recommendation based on your input")

Creating clusters:
['Price', 'Volume'] 80 0
Stock you have input: PAGEIND.NS
Features you are concerned about: Price, Volume
---------------------------------------------------
Here are a list of stocks you may be interested in:

Stock-1: MRF.NS


['AARTIIND.NS',
 'ABBOTINDIA.NS',
 'ACC.NS',
 'ADANIENT.NS',
 'ADANIPORTS.NS',
 'ABFRL.NS',
 'APLLTD.NS',
 'ALKEM.NS',
 'AMARAJABAT.NS',
 'AMBUJACEM.NS',
 'APOLLOHOSP.NS',
 'APOLLOTYRE.NS',
 'ASHOKLEY.NS',
 'ASIANPAINT.NS',
 'ASTRAL.NS',
 'AUBANK.NS',
 'AUROPHARMA.NS',
 'AXISBANK.NS',
 'BAJAJ-AUTO.NS',
 'BAJFINANCE.NS',
 'BAJAJFINSV.NS',
 'BALKRISIND.NS',
 'BANDHANBNK.NS',
 'BANKBARODA.NS',
 'BATAINDIA.NS',
 'BERGEPAINT.NS',
 'BEL.NS',
 'BHARATFORG.NS',
 'BPCL.NS',
 'BHARTIARTL.NS',
 'BHEL.NS',
 'BIOCON.NS',
 'BOSCHLTD.NS',
 'BRITANNIA.NS',
 'CADILAHC.NS',
 'CANFINHOME.NS',
 'CANBK.NS',
 'CHOLAFIN.NS',
 'CIPLA.NS',
 'CUB.NS',
 'COALINDIA.NS',
 'COFORGE.NS',
 'COLPAL.NS',
 'CONCOR.NS',
 'COROMANDEL.NS',
 'CROMPTON.NS',
 'CUMMINSIND.NS',
 'DABUR.NS',
 'DALBHARAT.NS',
 'DEEPAKNTR.NS',
 'DELTACORP.NS',
 'DIVISLAB.NS',
 'DIXON.NS',
 'DLF.NS',
 'LALPATHLAB.NS',
 'DRREDDY.NS',
 'EICHERMOT.NS',
 'ESCORTS.NS',
 'EXIDEIND.NS',
 'FEDERALBNK.NS',
 'GAIL.NS',
 'GLENMARK.NS',
 'GMRINFRA.NS',
 'GODRE

In [42]:
html = requests.get('https://finance.yahoo.com/quote/ABBOTINDIA.NS?p=ABBOTINDIA.NS')
soup = BeautifulSoup(html.text, features="lxml") 

In [43]:
soup

<!DOCTYPE html>
<html lang="en-us"><head>
<meta content="text/html; charset=utf-8" http-equiv="content-type"/>
<meta charset="utf-8"/>
<title>Yahoo</title>
<meta content="width=device-width,initial-scale=1,minimal-ui" name="viewport"/>
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<style>
  html {
      height: 100%;
  }
  body {
      background: #fafafc url(https://s.yimg.com/nn/img/sad-panda-201402200631.png) 50% 50%;
      background-size: cover;
      height: 100%;
      text-align: center;
      font: 300 18px "helvetica neue", helvetica, verdana, tahoma, arial, sans-serif;
  }
  table {
      height: 100%;
      width: 100%;
      table-layout: fixed;
      border-collapse: collapse;
      border-spacing: 0;
      border: none;
  }
  h1 {
      font-size: 42px;
      font-weight: 400;
      color: #400090;
  }
  p {
      color: #1A1A1A;
  }
  #message-1 {
      font-weight: bold;
      margin: 0;
  }
  #message-2 {
      display: inline-block;
      *display: 