In [1]:
from collections import defaultdict, OrderedDict
from datetime import datetime

import numpy as np
import pymongo
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.cluster import DBSCAN, KMeans, AgglomerativeClustering

In [2]:


class Settings:
    MONGODB_SERVER = "localhost"
    MONGODB_PORT = 27017
    MONGODB_DB = "beautiful_creature"
    MONGODB_INVESTOR_COLLECTION = "investors"
    MONGODB_PORTFOLIO_COLLECTION = "portfolios"

    def __getitem__(self, key):
        return getattr(self, key)


timestamp = datetime.now().strftime("%d-%m-%y")
settings = Settings()
connection = pymongo.MongoClient(
    settings['MONGODB_SERVER'],
    settings['MONGODB_PORT']
)
investor_collection_name = f"{settings['MONGODB_INVESTOR_COLLECTION']}_{timestamp}"
portfolio_collection_name = f"{settings['MONGODB_PORTFOLIO_COLLECTION']}_{timestamp}"
db = connection[settings['MONGODB_DB']]
investor_collection = db[investor_collection_name]
portfolio_collection = db[portfolio_collection_name]
investors = list(investor_collection.find({}))
investors = {i["UserName"]: i for i in investors}

# SIMPLE PORTOFOLIO

In [23]:
portfols = list(portfolio_collection.find({}))

In [24]:
items = defaultdict(list)

In [25]:
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                items[i["company_ticker"]].append(round(float(i["invested"].replace("%", "")), 3))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [26]:

ticker_to_name_mapping = defaultdict(str)
for p in portfols:
    for i in p["items"]:
        ticker_to_name_mapping[i["company_ticker"]] = i.get("company_name", None)

In [27]:
# basic portfolio settings
TOP_N_BY_COUNTS = 50
TOP_N_BY_RATIO = 15


In [28]:
stats = []
for item in items:
    item_stat = {"item_name": item, "count": len(items[item]),
                 "mean": np.mean(items[item]), "std": np.std(items[item])}
    stats.append(item_stat)

df = pd.DataFrame.from_records(stats)
df["std/mean"] = df["std"] / df["mean"]
df = df.set_index("item_name")
df = df.sort_values("count", ascending=False).head(TOP_N_BY_COUNTS)

In [29]:
portfol_basic = df.sort_values("count", ascending=False).sort_values("std/mean").head(TOP_N_BY_RATIO)
portfol_basic["portfol_val"] = portfol_basic["mean"] * 100 / portfol_basic["mean"].sum()
portfol_basic.sort_values("portfol_val", ascending=False)
portfol_basic["company_name"] = portfol_basic.index.map(ticker_to_name_mapping)
portfol_basic.reset_index().sort_values("portfol_val", ascending=False).drop(["count", "mean", "std", "std/mean"],axis=1)

Unnamed: 0,item_name,portfol_val,company_name
3,AMD,8.682508,Advanced Micro Devices Inc
4,1810.HK,7.986337,Xiaomi Corp
1,CSIQ,7.517029,Canadian Solar Inc.
7,DOYU,7.421353,Douyu
14,JD.US,7.326327,JD.com
8,NVDA,7.173672,NVIDIA Corporation
6,NIO,7.153835,Nio Inc.
5,BYND,6.979456,Beyond Meat Inc.
9,TSLA,6.934937,"Tesla Motors, Inc."
13,DIS,6.823574,Walt Disney


# WEIGHTED PORTFOLIO

In [10]:
# weighted portfolio settings
TOP_N_BY_VALUE = 15
N_CLUSTERS = 7
TOP_INVESTORS_WITHIN_CLUSTER = 3

In [11]:
a_portfols = []
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            try:
                a_portfols.append((p["investor_name"], float(i["value"].replace("%", "")), i["company_ticker"]))
            except ValueError as e:
                if "<0.01" in str(e):
                    pass
                else:
                    raise (e)

In [12]:
a_df = pd.DataFrame(data=a_portfols, columns=["investor", "percent", "ticker"])
a_df = a_df.pivot_table(index="investor", columns="ticker").fillna(0)
a_df["cluster"] = KMeans(n_clusters=N_CLUSTERS).fit_predict(a_df)
fields = ["Copiers", "WeeksSinceRegistration", "DailyDD", "WeeklyDD", "RiskScore", "Gain"]

for investor_name in a_df.index:
    for field in fields:
        a_df.loc[investor_name, field] = investors[investor_name][field]
a_df_short = a_df.drop("percent", axis=1)
a_df_short

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2BSmart,0,323.0,226.0,-4.38,-7.63,4.0,37.65
ABDUCT,0,584.0,95.0,-3.12,-3.12,3.0,26.57
ALnayef,0,1708.0,263.0,-2.21,-3.19,3.0,15.60
Abbroush,0,514.0,63.0,-1.32,-1.93,3.0,16.17
Aguero1010,0,1101.0,63.0,-4.26,-5.58,4.0,80.07
...,...,...,...,...,...,...,...
traderengeng,0,116.0,67.0,-3.63,-5.55,3.0,47.27
vidinho,0,133.0,627.0,-3.51,-4.45,4.0,27.47
viveredidividend,0,564.0,66.0,-3.39,-4.60,4.0,31.45
willpetch1989,0,397.0,146.0,-4.15,-9.49,4.0,23.85


In [13]:
top_investors = a_df_short.groupby("cluster")["Gain"].nlargest(TOP_INVESTORS_WITHIN_CLUSTER).reset_index()["investor"]
a_df_short.loc[top_investors]

Unnamed: 0_level_0,cluster,Copiers,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Isbelle,0,643.0,212.0,-3.72,-7.69,4.0,177.72
sgstjc,0,1440.0,85.0,-4.88,-8.51,4.0,146.61
VidovM,0,214.0,69.0,-3.82,-5.03,4.0,114.65
Samosaking,1,599.0,60.0,-4.34,-6.13,3.0,157.15
myhungetoro,2,388.0,98.0,-2.09,-3.29,3.0,5.48
Slow_and_Steady,3,3095.0,123.0,-1.69,-2.52,3.0,12.67
balticseal,4,1341.0,99.0,-2.4,-2.99,3.0,17.15
chiay0327,4,762.0,236.0,-1.9,-2.52,3.0,10.05
EliteVol,5,176.0,97.0,-4.15,-10.59,5.0,22.25
SparkLiang,6,6887.0,174.0,-2.48,-4.15,4.0,29.49


In [16]:
portfol_weight = a_df.loc[top_investors]["percent"].sum(axis=0)
portfol_weight = portfol_weight[portfol_weight != 0]
portfol_weight = portfol_weight * 100 / portfol_weight.sum()
portfol_weight = portfol_weight.sort_values(ascending=False)[:TOP_N_BY_VALUE]

In [17]:

portfol_weight_new = portfol_weight.to_frame("portfol_val")
portfol_weight_new["company_name"] = portfol_weight_new.index.map(ticker_to_name_mapping)
portfol_weight_new = portfol_weight_new.reset_index()
portfol_weight_new

Unnamed: 0,ticker,portfol_val,company_name
0,TLT,12.917218,
1,SVXY,7.141316,
2,SHV,5.384203,
3,VTI,4.953776,
4,IVV,4.034279,
5,JNK,2.630138,
6,QQQ,2.367898,
7,SHOP,2.246184,Shopify Inc.
8,SOXX,2.236226,
9,BABA,2.176475,Alibaba
