In [1]:
import os
import pathlib
import sys
from collections import defaultdict, OrderedDict
from datetime import datetime

import numpy as np
import pymongo
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.cluster import DBSCAN, KMeans, AgglomerativeClustering

In [2]:
PROJECT_ROOT = pathlib.Path.cwd().parent
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# scraper_path = str(PROJECT_ROOT / "scrapy_projects" / "etoro" / "etoro")
# command = f"cd {scraper_path} && scrapy crawl etoro_dashboard && scrapy crawl etoro_investor"
# print(command)
# os.system(command)

In [3]:
class Settings:
    MONGODB_SERVER = "localhost"
    MONGODB_PORT = 27017
    MONGODB_DB = "beautiful_creature"
    MONGODB_INVESTOR_COLLECTION = "investors"
    MONGODB_PORTFOLIO_COLLECTION = "portfolios"
    MONGODB_GOOGLE_TRENDS_COLLECTION = "google_trends"

    def __getitem__(self, key):
        return getattr(self, key)

settings = Settings()
connection = pymongo.MongoClient(
    settings['MONGODB_SERVER'],
    settings['MONGODB_PORT']
)
investor_collection_name = f"{settings['MONGODB_INVESTOR_COLLECTION']}"
portfolio_collection_name = f"{settings['MONGODB_PORTFOLIO_COLLECTION']}"
google_trends_collection = settings['MONGODB_GOOGLE_TRENDS_COLLECTION']
db = connection[settings['MONGODB_DB']]
investor_collection = db[investor_collection_name]
portfolio_collection = db[portfolio_collection_name]
investors = list(investor_collection.find({}))
investors = {i["UserName"]: i for i in investors}

# SIMPLE PORTOFOLIO

In [4]:
latest_timestamp = list(portfolio_collection.find().sort("timestamp", -1).limit(1))[0]["timestamp"]
portfols = list(portfolio_collection.find({"timestamp": latest_timestamp}))

In [5]:
items = defaultdict(list)

In [6]:
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                items[i["company_ticker"]].append(round(float(i["invested"].replace("%", "")), 3))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [7]:
ticker_to_name_mapping = defaultdict(str)
for p in portfols:
    for i in p["items"]:
        ticker_to_name_mapping[i["company_ticker"]] = i.get("company_name", None)

In [8]:
# basic portfolio settings
TOP_N_BY_COUNTS = 50
N_PORTFOLIO_ITEMS = 20

In [9]:
stats = []
for item in items:
    item_stat = {"item_name": item, "count": len(items[item]),
                 "mean": np.mean(items[item]), "std": np.std(items[item])}
    stats.append(item_stat)

df = pd.DataFrame.from_records(stats)
df["std/mean"] = df["std"] / df["mean"]
df = df.set_index("item_name")
df = df.sort_values("count", ascending=False).head(TOP_N_BY_COUNTS)
df.head()

Unnamed: 0_level_0,count,mean,std,std/mean
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AMZN,9,4.404444,2.117657,0.4808
MSFT,9,3.092222,1.882987,0.608943
FB,9,3.278889,1.938272,0.591137
GOOG,8,2.755,1.918476,0.696362
BABA,7,2.458571,1.338202,0.5443


In [10]:
portfol_basic = df.sort_values("count", ascending=False).sort_values("std/mean").head(N_PORTFOLIO_ITEMS)
portfol_basic["portfol_val"] = portfol_basic["mean"] * 100 / portfol_basic["mean"].sum()
portfol_basic.sort_values("portfol_val", ascending=False)
portfol_basic["company_name"] = portfol_basic.index.map(ticker_to_name_mapping)
portfol_basic.reset_index().sort_values("portfol_val", ascending=False).drop(["count", "mean", "std", "std/mean"],axis=1)

Unnamed: 0,item_name,portfol_val,company_name
1,MA,9.159392,Mastercard
15,AMD,8.784319,Advanced Micro Devices Inc
19,GLD,8.765294,
17,DIS,8.502562,Walt Disney
14,FSLR,5.952246,"First Solar, Inc."
18,TDOC,5.81635,Teladoc Health Inc
4,NET,5.617036,Cloudflare
10,FXPO.L,5.327124,Ferrexpo PLC
7,PYPL,4.982854,PayPal Holdings
11,SEDG,4.511748,SolarEdge Technologies


# WEIGHTED PORTFOLIO

In [11]:
# weighted portfolio settings
N_PORTFOLIO_ITEMS = 20
N_CLUSTERS = 5
TOP_INVESTORS_WITHIN_CLUSTER = 1

In [12]:
a_portfols = []
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                a_portfols.append((p["investor_name"], float(i["value"].replace("%", "")), i["company_ticker"]))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [13]:
a_df = pd.DataFrame(data=a_portfols, columns=["investor", "percent", "ticker"])
a_df = a_df.pivot_table(index="investor", columns="ticker").fillna(0)
a_df["cluster"] = KMeans(n_clusters=N_CLUSTERS).fit_predict(a_df)
fields = ["Copiers", "WeeksSinceRegistration", "DailyDD", "WeeklyDD", "RiskScore", "Gain"]

for investor_name in a_df.index:
    for field in fields:
        a_df.loc[investor_name, field] = investors[investor_name][field]
a_df_short = a_df.drop("percent", axis=1)
a_df_short

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CPHequities,3,20016.0,223.0,-4.52,-11.12,4.0,90.49
GreenbullInvest,0,8138.0,109.0,-3.93,-6.85,3.0,62.76
JeppeKirkBonde,0,27681.0,416.0,-2.85,-5.61,4.0,54.43
MarianoPardo,0,9281.0,403.0,-4.34,-8.02,5.0,70.49
Miyoshi,2,8499.0,131.0,-2.73,-5.59,4.0,25.68
Richardstroud,4,17220.0,221.0,-2.48,-4.39,4.0,30.24
SparkLiang,1,6966.0,174.0,-2.48,-4.15,4.0,27.01
Wesl3y,0,20012.0,328.0,-3.3,-6.01,5.0,47.41
eddyb123,0,10793.0,266.0,-3.47,-7.65,4.0,43.48
rubymza,0,20550.0,265.0,-4.64,-6.12,5.0,47.41


In [20]:
top_investors = a_df_short.groupby("cluster")["Gain"].nlargest(TOP_INVESTORS_WITHIN_CLUSTER).reset_index()["investor"]
a_df_short.loc[top_investors]

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
MarianoPardo,0,9281.0,403.0,-4.34,-8.02,5.0,70.49
SparkLiang,1,6966.0,174.0,-2.48,-4.15,4.0,27.01
Miyoshi,2,8499.0,131.0,-2.73,-5.59,4.0,25.68
CPHequities,3,20016.0,223.0,-4.52,-11.12,4.0,90.49
Richardstroud,4,17220.0,221.0,-2.48,-4.39,4.0,30.24


In [21]:
portfol_weight = a_df.loc[top_investors]["percent"].sum(axis=0)
portfol_weight = portfol_weight[portfol_weight != 0]
portfol_weight = portfol_weight * 100 / portfol_weight.sum()
portfol_weight = portfol_weight.sort_values(ascending=False)[:N_PORTFOLIO_ITEMS]

In [22]:

portfol_weight_new = portfol_weight.to_frame("portfol_val")
portfol_weight_new["company_name"] = portfol_weight_new.index.map(ticker_to_name_mapping)
portfol_weight_new = portfol_weight_new.reset_index()
portfol_weight_new

Unnamed: 0,ticker,portfol_val,company_name
0,AMZN,5.3437,Amazon
1,SHOP,5.222355,Shopify Inc.
2,MSFT,4.222377,Microsoft
3,FB,4.087549,Facebook
4,AAPL,3.510033,Apple
5,DIS,3.337004,Walt Disney
6,9988.HK,2.853868,Alibaba Group Holding Ltd (Hong Kong)
7,SQ,2.748253,"Square, Inc."
8,GOOG,2.615672,Alphabet
9,PYPL,2.40444,PayPal Holdings
