In [1]:
import os
import pathlib
import sys
from collections import defaultdict, OrderedDict
from datetime import datetime

import numpy as np
import pymongo
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.cluster import DBSCAN, KMeans, AgglomerativeClustering

In [2]:
PROJECT_ROOT = pathlib.Path.cwd().parent
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

# scraper_path = str(PROJECT_ROOT / "scrapy_projects" / "etoro" / "etoro")
# command = f"cd {scraper_path} && scrapy crawl etoro_dashboard && scrapy crawl etoro_investor"
# print(command)
# os.system(command)

In [3]:
class Settings:
    MONGODB_SERVER = "localhost"
    MONGODB_PORT = 27017
    MONGODB_DB = "beautiful_creature"
    MONGODB_INVESTOR_COLLECTION = "investors"
    MONGODB_PORTFOLIO_COLLECTION = "portfolios"
    MONGODB_GOOGLE_TRENDS_COLLECTION = "google_trends"

    def __getitem__(self, key):
        return getattr(self, key)

settings = Settings()
connection = pymongo.MongoClient(
    settings['MONGODB_SERVER'],
    settings['MONGODB_PORT']
)
investor_collection_name = f"{settings['MONGODB_INVESTOR_COLLECTION']}"
portfolio_collection_name = f"{settings['MONGODB_PORTFOLIO_COLLECTION']}"
google_trends_collection = settings['MONGODB_GOOGLE_TRENDS_COLLECTION']
db = connection[settings['MONGODB_DB']]
investor_collection = db[investor_collection_name]
portfolio_collection = db[portfolio_collection_name]
investors = list(investor_collection.find({}))
investors = {i["UserName"]: i for i in investors}

# SIMPLE PORTOFOLIO

In [4]:
latest_timestamp = list(portfolio_collection.find().sort("timestamp", -1).limit(1))[0]["timestamp"]
portfols = list(portfolio_collection.find({"timestamp": latest_timestamp}))

In [5]:
items = defaultdict(list)

In [6]:
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                items[i["company_ticker"]].append(round(float(i["invested"].replace("%", "")), 3))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [7]:
ticker_to_name_mapping = defaultdict(str)
for p in portfols:
    for i in p["items"]:
        ticker_to_name_mapping[i["company_ticker"]] = i.get("company_name", None)

In [20]:
# basic portfolio settings
TOP_N_BY_COUNTS = 200
N_PORTFOLIO_ITEMS = 20

In [21]:
stats = []
for item in items:
    item_stat = {"item_name": item, "count": len(items[item]),
                 "mean": np.mean(items[item]), "std": np.std(items[item])}
    stats.append(item_stat)

df = pd.DataFrame.from_records(stats)
df["std/mean"] = df["std"] / df["mean"]
df = df.set_index("item_name")
df = df.sort_values("count", ascending=False).head(TOP_N_BY_COUNTS)
df.head()

Unnamed: 0_level_0,count,mean,std,std/mean
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AMZN,8,4.48,2.848807,0.635894
MSFT,7,3.138571,1.913101,0.609545
FB,7,2.66,1.879977,0.706758
BABA,6,2.455,1.05847,0.431149
GOOG,6,2.881667,2.104752,0.730394


In [22]:
portfol_basic = df.sort_values("count", ascending=False).sort_values("std/mean").head(N_PORTFOLIO_ITEMS)
portfol_basic["portfol_val"] = portfol_basic["mean"] * 100 / portfol_basic["mean"].sum()
portfol_basic.sort_values("portfol_val", ascending=False)
portfol_basic["company_name"] = portfol_basic.index.map(ticker_to_name_mapping)
portfol_basic.reset_index().sort_values("portfol_val", ascending=False).drop(["count", "mean", "std", "std/mean"],axis=1)

Unnamed: 0,item_name,portfol_val,company_name
18,9618.HK,21.247563,JD.com Inc
5,NTDOY,11.013645,Nintendo CO Ltd
2,PETS.L,7.764782,Pets at Home Group Plc
19,AIR.PA,6.920078,AIRBUS GROUP
13,OTLY,6.237817,Oatly Group AB
0,CHKP,6.237817,Check Point Software Technologies
1,USB,5.035737,US Bancorp
7,SUMO,4.97076,Sumo Logic Inc.
8,SPOT,4.353476,Spotify
9,CRBP,3.736192,Corbus Pharmaceuticals Holding


# WEIGHTED PORTFOLIO

In [29]:
# weighted portfolio settings
N_PORTFOLIO_ITEMS = 20
N_CLUSTERS = 2
TOP_INVESTORS_WITHIN_CLUSTER = 2

In [30]:
a_portfols = []
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                a_portfols.append((p["investor_name"], float(i["value"].replace("%", "")), i["company_ticker"]))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [31]:
a_df = pd.DataFrame(data=a_portfols, columns=["investor", "percent", "ticker"])
a_df = a_df.pivot_table(index="investor", columns="ticker").fillna(0)
a_df["cluster"] = KMeans(n_clusters=N_CLUSTERS).fit_predict(a_df)
fields = ["Copiers", "WeeksSinceRegistration", "DailyDD", "WeeklyDD", "RiskScore", "Gain"]

for investor_name in a_df.index:
    for field in fields:
        a_df.loc[investor_name, field] = investors[investor_name][field]
a_df_short = a_df.drop("percent", axis=1)
a_df_short

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
CPHequities,1,20051.0,225.0,-4.52,-11.12,4.0,94.27
GreenbullInvest,0,8067.0,111.0,-3.93,-6.85,3.0,63.02
MarianoPardo,0,9450.0,405.0,-4.34,-8.02,5.0,71.73
Miyoshi,0,9940.0,133.0,-2.73,-5.59,4.0,27.3
Richardstroud,0,17494.0,223.0,-2.48,-4.39,3.0,31.41
Wesl3y,0,20007.0,330.0,-3.3,-6.01,5.0,44.94
eddyb123,0,10723.0,268.0,-3.47,-7.65,4.0,41.22
rubymza,0,20480.0,267.0,-4.64,-6.12,5.0,47.9


In [32]:
top_investors = a_df_short.groupby("cluster")["Gain"].nlargest(TOP_INVESTORS_WITHIN_CLUSTER).reset_index()["investor"]
a_df_short.loc[top_investors]

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
MarianoPardo,0,9450.0,405.0,-4.34,-8.02,5.0,71.73
GreenbullInvest,0,8067.0,111.0,-3.93,-6.85,3.0,63.02
CPHequities,1,20051.0,225.0,-4.52,-11.12,4.0,94.27


In [33]:
portfol_weight = a_df.loc[top_investors]["percent"].sum(axis=0)
portfol_weight = portfol_weight[portfol_weight != 0]
portfol_weight = portfol_weight * 100 / portfol_weight.sum()
portfol_weight = portfol_weight.sort_values(ascending=False)[:N_PORTFOLIO_ITEMS]

In [34]:

portfol_weight_new = portfol_weight.to_frame("portfol_val")
portfol_weight_new["company_name"] = portfol_weight_new.index.map(ticker_to_name_mapping)
portfol_weight_new = portfol_weight_new.reset_index()
portfol_weight_new

Unnamed: 0,ticker,portfol_val,company_name
0,MSFT,6.344861,Microsoft
1,AMZN,6.026813,Amazon
2,GOOG,5.157212,Alphabet
3,FB,4.17086,Facebook
4,MELI,3.144249,MercadoLibre
5,MA,2.781916,Mastercard
6,BRK.B,2.765812,Berkshire Hathaway Inc
7,AAPL,2.75776,Apple
8,NET,2.56049,Cloudflare
9,SE,2.528282,Sea Ltd
