In [11]:
from collections import defaultdict, OrderedDict
from datetime import datetime

import numpy as np
import pymongo
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.cluster import DBSCAN, KMeans, AgglomerativeClustering

In [12]:


class Settings:
    MONGODB_SERVER = "localhost"
    MONGODB_PORT = 27017
    MONGODB_DB = "beautiful_creature"
    MONGODB_INVESTOR_COLLECTION = "investors"
    MONGODB_PORTFOLIO_COLLECTION = "portfolios"

    def __getitem__(self, key):
        return getattr(self, key)


timestamp = datetime.now().strftime("%d-%m-%y")
settings = Settings()
connection = pymongo.MongoClient(
    settings['MONGODB_SERVER'],
    settings['MONGODB_PORT']
)
investor_collection_name = f"{settings['MONGODB_INVESTOR_COLLECTION']}_{timestamp}"
portfolio_collection_name = f"{settings['MONGODB_PORTFOLIO_COLLECTION']}_{timestamp}"
db = connection[settings['MONGODB_DB']]
investor_collection = db[investor_collection_name]
portfolio_collection = db[portfolio_collection_name]
investors = list(investor_collection.find({}))
investors = {i["UserName"]: i for i in investors}

In [13]:
portfols = list(portfolio_collection.find({}))

In [14]:
items = defaultdict(list)

In [15]:
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            items[i["company_ticker"]].append(round(float(i["invested"].replace("%", "")), 3))

In [16]:
stats = []
for item in items:
    item_stat = {"item_name": item, "count":len(items[item]),
                 "mean": np.mean(items[item]), "std": np.std(items[item])}
    stats.append(item_stat)

df = pd.DataFrame.from_records(stats)
df["std/mean"] = df["std"] / df["mean"]
df = df.set_index("item_name")
df = df.sort_values("count",ascending=False).head(50)
# i_forest = IsolationForest()
# df["score"] = i_forest.fit(df).score_samples(df)
# df.sort_values("score").head(10)

In [17]:
df = df.sort_values("count",ascending=False).sort_values("std/mean").head(20)
df["portfol_val"] = df["mean"] * 100 / df["mean"].sum()
df.sort_values("portfol_val",ascending=False)

Unnamed: 0_level_0,count,mean,std,std/mean,portfol_val
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AMZN,49,3.425918,2.460996,0.718346,7.512148
AAPL,41,2.883902,1.555973,0.539537,6.323648
AMD,36,2.846944,1.569662,0.55135,6.242609
1810.HK,21,2.72,1.554327,0.571444,5.964252
JD.US,24,2.412917,1.679081,0.695872,5.290899
DOYU,18,2.311667,1.454225,0.629081,5.068884
NVDA,28,2.287143,1.272486,0.556365,5.015109
DIS,29,2.282414,1.482618,0.649583,5.00474
TSLA,23,2.261739,1.637595,0.724043,4.959406
SQ,20,2.244,1.606868,0.716073,4.920508


In [18]:
a_portfols = []
for p in portfols:
    for i in p["items"]:
        if i["type"] == "Buying":
            a_portfols.append((p["investor_name"], float(i["value"].replace("%","")), i["company_ticker"]))

In [19]:
a_df = pd.DataFrame(data=a_portfols,columns=["investor","percent","ticker"])
a_df = a_df.pivot_table(index="investor", columns="ticker").fillna(0)
a_df["cluster"] = KMeans(n_clusters=5).fit_predict(a_df)
fields = ["Copiers","ActiveWeeks","WeeksSinceRegistration","DailyDD","WeeklyDD","RiskScore","Gain"]

for investor_name in a_df.index:
    for field in fields:
        a_df.loc[investor_name,field] = investors[investor_name][field]
a_df = a_df.drop("percent",axis=1)

In [23]:
# a_df.sort_values("Copiers",ascending=False).head(30)
a_df.sort_values(["cluster","Gain"],ascending=False).head(30)

Unnamed: 0_level_0,cluster,Copiers,ActiveWeeks,WeeksSinceRegistration,DailyDD,WeeklyDD,RiskScore,Gain
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
investor,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
balticseal,4,1340.0,53.0,99.0,-2.4,-2.99,3.0,17.15
chiay0327,4,760.0,53.0,236.0,-1.9,-2.52,3.0,10.05
myhungetoro,3,388.0,53.0,98.0,-2.09,-3.29,3.0,5.48
Isbelle,2,643.0,53.0,212.0,-3.72,-7.69,4.0,177.72
Samosaking,2,599.0,53.0,60.0,-4.34,-6.13,3.0,157.15
sgstjc,2,1431.0,53.0,85.0,-4.88,-8.51,4.0,146.61
fastrading,2,326.0,53.0,316.0,-4.09,-8.16,5.0,111.93
Conhoulihan,2,1125.0,53.0,273.0,-4.41,-8.43,6.0,105.79
MattewL,2,460.0,53.0,116.0,-4.82,-10.65,5.0,94.87
campervans,2,2155.0,53.0,589.0,-4.75,-6.93,5.0,93.27
