In [None]:
import pandas as pd
import numpy as np
import random

import warnings
warnings.filterwarnings('ignore')

# I. Входные параметры

In [None]:
# Определяем количество слотов
df_bets = pd.DataFrame([33, 7], index=['short_term', 'medium_term'], columns = ['slots'])
df_bets

Unnamed: 0,slots
short_term,33
medium_term,7


In [None]:
# Возможные интервалы, для краткосрочных прогнозов в часах,
# для среднесрочных прогнозов в днях

short_lst = [1, 4, 12, 24]
medium_lst = [3, 7, 14]

# Доступные токены
crt_curr = ['BTC', 'ETH', 'BNB', 'SOL', 'XRP', 'TON', 'DOT']

# Вероятности выбора
short_prob = [24/1/33, 24/4/33, 24/12/33, 24/24/33]
medium_prob = [0.5, 0.3, 0.2]
curr_prob = [0.4, 0.3, 0.1, 0.05, 0.05, 0.05, 0.05]

MAX_POINT_BY_COIN_DF = pd.DataFrame([{1: 2000, 4: 6000, 12: 10000, 24: 14000}], index=crt_curr)

In [None]:
# Создаем базу данных с прогнозами, всего 100 игроков
n = 10

In [None]:
from datetime import datetime, timedelta

In [None]:
_player = []
_dt = []
_term = []
_coin = []
_time_frame = []
_err = []

# Создаем базу данных с прогнозами, всего 100 игроков
num = 0

for day in range(0, 180):
    np.random.seed(day)

    for plr in range(n + day):

      if plr % 3 == 0:
        coef = 0.8
      elif plr % 3 ==1:
        coef = 0.3
      else:
        coef = 0.15
      # coef = np.random.choice([0.8, 0.3, 0.15], p = [0.1, 0.70, 0.2])

      # Краткосрочные прогнозы

      max_border = df_bets.iloc[0,0]

      for bet in range(np.random.randint(int(0.1 * max_border), int(coef * max_border))):
          _dt.append(datetime.now() + timedelta(days=day, seconds = np.random.randint(180)))
          _player.append('player_'+str(plr))
          _term.append('short')
          _coin.append(np.random.choice(crt_curr, p = curr_prob))
          _time_frame.append(np.random.choice(short_lst, p = short_prob))
          _err.append(np.random.random())


data = pd.DataFrame({'player': _player,
                     'datetime': _dt,
                     'term': _term, 'coin': _coin,
                     'time_frame': _time_frame, 'err': _err,})

In [None]:
data.loc[data.player=='player_0'].sort_values('datetime').head(25).reset_index(drop=True)

Unnamed: 0,player,datetime,term,coin,time_frame,err
0,player_0,2024-01-01 19:31:19.833013,short,ETH,1,0.297535
1,player_0,2024-01-01 19:31:19.833378,short,ETH,1,0.957155
2,player_0,2024-01-01 19:31:19.833632,short,ETH,4,0.105908
3,player_0,2024-01-01 19:31:27.833886,short,TON,1,0.359508
4,player_0,2024-01-01 19:31:39.833551,short,BNB,1,0.639921
5,player_0,2024-01-01 19:31:44.833807,short,BTC,1,0.149675
6,player_0,2024-01-01 19:31:57.827685,short,BNB,1,0.544883
7,player_0,2024-01-01 19:32:18.834213,short,DOT,1,0.635059
8,player_0,2024-01-01 19:32:29.833467,short,XRP,24,0.799159
9,player_0,2024-01-01 19:32:38.833163,short,BTC,1,0.812169


In [None]:
data.groupby('player')['coin'].count().sum() / 6 / 102

219.33660130718954

In [None]:
data.shape

(134234, 6)

In [None]:
# задаем базовые параметры
average_points = 2
min_points = 1
max_points = average_points * 2 - min_points
target_perc = 0.1

# II. Механика расчетов

In [None]:
data.groupby(['time_frame', 'coin'])['player'].count().unstack(0)

time_frame,1,4,12,24
coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BNB,9689,2358,806,436
BTC,39100,9782,3232,1689
DOT,4875,1247,377,203
ETH,29430,7327,2410,1256
SOL,4899,1238,418,225
TON,4863,1250,393,171
XRP,4786,1208,364,202


In [None]:
# считаем таргет для каждой пары coin - time_frame

trg = data.groupby(['time_frame', 'coin'])['player'].count().unstack(0).apply(lambda x: x /x.sum()) \
        .mul(MAX_POINT_BY_COIN_DF,
            axis = 1
        ).round()

trg

time_frame,1,4,12,24
BNB,198.0,580.0,1008.0,1460.0
BTC,801.0,2404.0,4040.0,5654.0
DOT,100.0,307.0,471.0,680.0
ETH,603.0,1801.0,3012.0,4205.0
SOL,100.0,304.0,522.0,753.0
TON,100.0,307.0,491.0,572.0
XRP,98.0,297.0,455.0,676.0


In [None]:
# считаем фактическое распредление для каждой пары coin - time_frame

fact = (data.groupby(['time_frame', 'coin'])['player'].count().unstack(0) * target_perc).round()

fact

time_frame,1,4,12,24
coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BNB,969.0,236.0,81.0,44.0
BTC,3910.0,978.0,323.0,169.0
DOT,488.0,125.0,38.0,20.0
ETH,2943.0,733.0,241.0,126.0
SOL,490.0,124.0,42.0,22.0
TON,486.0,125.0,39.0,17.0
XRP,479.0,121.0,36.0,20.0


In [None]:
# награждаем минимальными баллами

def calc_min_points(group):
  group.sort_values('err', ascending=False, inplace=True)
  awarded_size = int(np.ceil(len(group) * target_perc))
  print(awarded_size)
  group['points'] = [min_points for _ in range(awarded_size)] + \
                          [0 for _ in range(awarded_size, len(group))]

  return group['points'].round()


mask = fact.unstack() < 1

grouped = fact.unstack()[mask].reset_index().groupby('time_frame')['coin'].agg(list)


data['points'] = pd.NA

for time_frame, coins in grouped.iteritems():
    print(time_frame, coins)
    data.loc[(data['time_frame'] == time_frame) & (data['coin'].isin(coins)), 'points'] =  \
                  data.loc[(data['time_frame'] == time_frame) & (data['coin'].isin(coins))] \
                    .groupby('coin', group_keys = False) \
                      .apply(calc_min_points)

In [None]:
data.loc[~data.points.isna()].sort_values(['points', 'time_frame'], ascending=False)

Unnamed: 0,player,datetime,term,coin,time_frame,err,points


In [None]:
# награждаем fact < target

def calc_cum_points2(group):

  group.sort_values('err', ascending=False, inplace=True)

  awarded_size = int(np.ceil(len(group) * target_perc))

  diff = (max_points - min_points) / (max(len(group) * target_perc - 1, 1))


  group['points'] = [min_points + diff * i for i in range(awarded_size)] +  \
                                  [0 for _ in range(awarded_size, len(group))]

  return group['points'].round()


mask = fact.unstack() < trg.unstack()

grouped = fact.unstack()[mask].reset_index().groupby('time_frame')['coin'].agg(list)

for time_frame, coins in grouped.iteritems():
  print(time_frame, coins)
  tf_fltr = data['time_frame'] == time_frame
  c_fltr = data['coin'].isin(coins)
  na_fltr = data['points'].isna()

  data.loc[tf_fltr & c_fltr & na_fltr, 'points'] = \
                              (data.loc[tf_fltr & c_fltr & na_fltr]) \
                              .groupby('coin', group_keys = False) \
                              .apply(calc_cum_points2)

4 ['BNB', 'BTC', 'DOT', 'ETH', 'SOL', 'TON', 'XRP']
12 ['BNB', 'BTC', 'DOT', 'ETH', 'SOL', 'TON', 'XRP']
24 ['BNB', 'BTC', 'DOT', 'ETH', 'SOL', 'TON', 'XRP']


In [None]:
data.loc[~data.points.isna()].sort_values(['points', 'time_frame'],ascending=False).head(1000)

Unnamed: 0,player,datetime,term,coin,time_frame,err,points
549,player_9,2023-12-25 20:35:32.401754,short,BNB,24,0.914375,3.0
1203,player_16,2023-12-29 20:33:12.494260,short,TON,24,0.897325,3.0
1396,player_18,2023-12-30 20:35:56.505581,short,TON,24,0.904479,3.0
2174,player_10,2024-01-04 20:34:07.552778,short,BTC,24,0.900719,3.0
3504,player_12,2024-01-10 20:35:41.633718,short,XRP,24,0.918345,3.0
...,...,...,...,...,...,...,...
41462,player_21,2024-03-24 20:35:20.352825,short,BTC,24,0.934608,2.0
42329,player_36,2024-03-25 20:34:42.425820,short,BNB,24,0.970022,2.0
42547,player_60,2024-03-25 20:34:09.448861,short,XRP,24,0.945324,2.0
42890,player_0,2024-03-26 20:33:05.486294,short,BTC,24,0.929152,2.0


In [None]:
# награждаем fact > target
# аналогично тому, что выше, только нужно апдейт сделать базовых параметров

average_points = 2
min_points = 1
max_points = average_points * 2 - min_points


# new_average_points = 2000 / len(group)
def calc_cum_points3(group, total_points):
  awarded_size = int(np.ceil(len(group) * target_perc))
  temp =  average_points / (awarded_size /  (total_points / average_points ))
  new_average_points = temp if temp > 1 else 1

  awarded_size = min(awarded_size, total_points)
  diff = (max_points - min_points) / (max(len(group) * target_perc - 1, 1))
  group['points'] = [new_average_points + diff * i for i in range(awarded_size)] +  \
                                  [0 for _ in range(awarded_size, len(group))]

  return group['points']


# в данном случае таких групп нет
mask = fact.unstack() > trg.unstack() # 10% от группы больше чем таргет

grouped = fact.unstack()[mask].reset_index().groupby('time_frame')['coin'].agg(list)

for time_frame, coins in grouped.iteritems():
  print(time_frame, coins)
  tf_fltr = data['time_frame'] == time_frame
  c_fltr = data['coin'].isin(coins)
  na_fltr = data['points'].isna()

  total_points = MAX_POINT_BY_COIN_DF[time_frame].values[0]

  data.loc[tf_fltr & c_fltr & na_fltr, 'points'] = \
                              (data.loc[tf_fltr & c_fltr & na_fltr]) \
                              .groupby('coin', group_keys = False) \
                              .apply(calc_cum_points3, total_points=total_points)

1 ['BNB', 'BTC', 'DOT', 'ETH', 'SOL', 'TON', 'XRP']


# III. Результаты

In [None]:
data['points'].astype('float64').describe(percentiles=[0.99, 0.95, 0.9])

count    134234.000000
mean          0.198493
std           0.786000
min           0.000000
50%           0.000000
90%           0.000000
95%           1.861418
99%           4.732904
max           6.177040
Name: points, dtype: float64

In [None]:
data.groupby('player')['points'].sum().sort_values(ascending=False).head(25)

player
player_9     1290.368912
player_15     1246.39896
player_18     1130.62673
player_3     1130.593247
player_12     1127.00422
player_6     1113.247044
player_0      1090.78179
player_21     966.432853
player_24     830.881508
player_27     806.242687
player_30     658.612701
player_36     619.442164
player_33     608.774332
player_39      580.03994
player_42     486.431567
player_10     485.993064
player_7      478.390812
player_4      442.829196
player_1      415.509022
player_16     413.168105
player_22     410.305352
player_19     405.098464
player_13     404.937988
player_25     354.988803
player_45     335.901361
Name: points, dtype: object

In [None]:
data.groupby('player')['points'].sum().sort_values(ascending=False).tail(25)

player
player_131    4.0
player_137    4.0
player_157    3.0
player_176    3.0
player_140    3.0
player_143    3.0
player_169    3.0
player_184    3.0
player_175    3.0
player_181    2.0
player_179    2.0
player_125    1.0
player_149    1.0
player_152    1.0
player_134    1.0
player_161    0.0
player_167    0.0
player_173    0.0
player_182    0.0
player_178    0.0
player_185    0.0
player_186    0.0
player_187    0.0
player_188    0.0
player_183    0.0
Name: points, dtype: object

In [None]:
data.sort_values(['points'], ascending=False).tail(20)

Unnamed: 0,player,datetime,term,coin,time_frame,err,points
50121,player_63,2024-04-03 20:35:02.264293,short,ETH,1,0.621719,0.0
50120,player_63,2024-04-03 20:35:59.264216,short,ETH,1,0.730592,0.0
50119,player_63,2024-04-03 20:33:45.264140,short,BTC,1,0.764223,0.0
50118,player_63,2024-04-03 20:35:36.264063,short,BTC,1,0.823171,0.0
50117,player_63,2024-04-03 20:35:18.263965,short,BTC,1,0.835211,0.0
50115,player_63,2024-04-03 20:33:36.263811,short,XRP,1,0.006249,0.0
50102,player_60,2024-04-03 20:35:07.262508,short,BTC,1,0.573549,0.0
50114,player_63,2024-04-03 20:33:25.263734,short,ETH,1,0.352713,0.0
50113,player_63,2024-04-03 20:33:27.263656,short,BTC,1,0.063132,0.0
50112,player_63,2024-04-03 20:34:58.263577,short,BTC,1,0.827936,0.0
