In [1]:
import os
import pathlib
import sys
from collections import defaultdict, OrderedDict
from datetime import datetime

import numpy as np
import pymongo
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.cluster import DBSCAN, KMeans, AgglomerativeClustering

In [2]:
PROJECT_ROOT = pathlib.Path.cwd().parent
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# scraper_path = str(PROJECT_ROOT / "scrapy_projects" / "etoro" / "etoro")
# command = f"cd {scraper_path} && scrapy crawl etoro_dashboard && scrapy crawl etoro_investor"
# print(command)
# os.system(command)

In [3]:
class Settings:
    MONGODB_SERVER = "localhost"
    MONGODB_PORT = 27017
    MONGODB_DB = "beautiful_creature"
    MONGODB_INVESTOR_COLLECTION = "investors"
    MONGODB_PORTFOLIO_COLLECTION = "portfolios"

    def __getitem__(self, key):
        return getattr(self, key)


timestamp = datetime.now().strftime("%d-%m-%y")
settings = Settings()
connection = pymongo.MongoClient(
    settings['MONGODB_SERVER'],
    settings['MONGODB_PORT']
)
investor_collection_name = f"{settings['MONGODB_INVESTOR_COLLECTION']}_{timestamp}"
portfolio_collection_name = f"{settings['MONGODB_PORTFOLIO_COLLECTION']}_{timestamp}"
db = connection[settings['MONGODB_DB']]
investor_collection = db[investor_collection_name]
portfolio_collection = db[portfolio_collection_name]
investors = list(investor_collection.find({}))
investors = {i["UserName"]: i for i in investors}

# SIMPLE PORTOFOLIO

In [4]:
portfols = list(portfolio_collection.find({}))

In [5]:
items = defaultdict(list)

In [6]:
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                items[i["company_ticker"]].append(round(float(i["invested"].replace("%", "")), 3))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [7]:

ticker_to_name_mapping = defaultdict(str)
for p in portfols:
    for i in p["items"]:
        ticker_to_name_mapping[i["company_ticker"]] = i.get("company_name", None)

In [8]:
# basic portfolio settings
TOP_N_BY_COUNTS = 100
N_PORTFOLIO_ITEMS = 20

In [9]:
stats = []
for item in items:
    item_stat = {"item_name": item, "count": len(items[item]),
                 "mean": np.mean(items[item]), "std": np.std(items[item])}
    stats.append(item_stat)

df = pd.DataFrame.from_records(stats)
df["std/mean"] = df["std"] / df["mean"]
df = df.set_index("item_name")
df = df.sort_values("count", ascending=False).head(TOP_N_BY_COUNTS)
df.head()

Unnamed: 0_level_0,count,mean,std,std/mean
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AMZN,9,4.125556,2.008151,0.486759
MSFT,9,2.777778,1.541138,0.55481
FB,9,2.978889,1.535654,0.515512
GOOG,8,2.4175,1.319382,0.545763
BABA,7,2.458571,1.338202,0.5443


In [10]:
portfol_basic = df.sort_values("count", ascending=False).sort_values("std/mean").head(N_PORTFOLIO_ITEMS)
portfol_basic["portfol_val"] = portfol_basic["mean"] * 100 / portfol_basic["mean"].sum()
portfol_basic.sort_values("portfol_val", ascending=False)
portfol_basic["company_name"] = portfol_basic.index.map(ticker_to_name_mapping)
portfol_basic.reset_index().sort_values("portfol_val", ascending=False).drop(["count", "mean", "std", "std/mean"],axis=1)

Unnamed: 0,item_name,portfol_val,company_name
16,ISF.L,10.89926,
7,FEYE,8.167331,FireEye
4,VEEV,7.968127,Veeva Systems Inc A
5,MDB,6.146841,MongoDB Inc
8,BA,6.089926,Boeing
15,AIR.PA,6.061468,AIRBUS GROUP
14,ESTC,6.061468,Elastic NV
10,OTLY,5.463859,Oatly Group AB
18,CYBR,5.463859,CyberArk
9,BYND,5.463859,Beyond Meat Inc.


# WEIGHTED PORTFOLIO

In [11]:
# weighted portfolio settings
N_PORTFOLIO_ITEMS = 20
N_CLUSTERS = 5
TOP_INVESTORS_WITHIN_CLUSTER = 1

In [12]:
a_portfols = []
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                a_portfols.append((p["investor_name"], float(i["value"].replace("%", "")), i["company_ticker"]))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [13]:
a_df = pd.DataFrame(data=a_portfols, columns=["investor", "percent", "ticker"])
a_df = a_df.pivot_table(index="investor", columns="ticker").fillna(0)
a_df["cluster"] = KMeans(n_clusters=N_CLUSTERS).fit_predict(a_df)
fields = ["Copiers", "WeeksSinceRegistration", "DailyDD", "WeeklyDD", "RiskScore", "Gain"]

for investor_name in a_df.index:
    for field in fields:
        a_df.loc[investor_name, field] = investors[investor_name][field]
a_df_short = a_df.drop("percent", axis=1)
a_df_short

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CPHequities,3,20037.0,223.0,-4.52,-11.12,4.0,92.93
GreenbullInvest,3,8135.0,109.0,-3.93,-6.85,3.0,64.1
JeppeKirkBonde,3,27683.0,416.0,-2.85,-5.61,4.0,56.56
MarianoPardo,3,9277.0,403.0,-4.34,-8.02,5.0,72.24
Miyoshi,4,8451.0,131.0,-2.73,-5.59,4.0,26.93
Richardstroud,1,17219.0,221.0,-2.48,-4.39,4.0,31.91
SparkLiang,2,6956.0,174.0,-2.48,-4.15,4.0,29.63
Wesl3y,0,20011.0,328.0,-3.3,-6.01,5.0,49.71
eddyb123,3,10780.0,266.0,-3.47,-7.65,4.0,44.85
rubymza,3,20552.0,265.0,-4.64,-6.12,5.0,48.94


In [14]:
top_investors = a_df_short.groupby("cluster")["Gain"].nlargest(TOP_INVESTORS_WITHIN_CLUSTER).reset_index()["investor"]
a_df_short.loc[top_investors]

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Wesl3y,0,20011.0,328.0,-3.3,-6.01,5.0,49.71
Richardstroud,1,17219.0,221.0,-2.48,-4.39,4.0,31.91
SparkLiang,2,6956.0,174.0,-2.48,-4.15,4.0,29.63
CPHequities,3,20037.0,223.0,-4.52,-11.12,4.0,92.93
Miyoshi,4,8451.0,131.0,-2.73,-5.59,4.0,26.93


In [15]:
portfol_weight = a_df.loc[top_investors]["percent"].sum(axis=0)
portfol_weight = portfol_weight[portfol_weight != 0]
portfol_weight = portfol_weight * 100 / portfol_weight.sum()
portfol_weight = portfol_weight.sort_values(ascending=False)[:N_PORTFOLIO_ITEMS]

In [16]:

portfol_weight_new = portfol_weight.to_frame("portfol_val")
portfol_weight_new["company_name"] = portfol_weight_new.index.map(ticker_to_name_mapping)
portfol_weight_new = portfol_weight_new.reset_index()
portfol_weight_new

Unnamed: 0,ticker,portfol_val,company_name
0,AMZN,5.173601,Amazon
1,SHOP,5.070326,Shopify Inc.
2,FB,4.312973,Facebook
3,DIS,3.555621,Walt Disney
4,9988.HK,3.147438,Alibaba Group Holding Ltd (Hong Kong)
5,MSFT,2.763844,Microsoft
6,AAPL,2.665486,Apple
7,SPY5.L,2.338448,
8,AMD,2.3114,Advanced Micro Devices Inc
9,BABA,2.215501,Alibaba
