In [1]:
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from itertools import combinations
import json
import requests
import math
from typing import Optional, List, Dict

from pydantic import BaseModel
from typing import Mapping
import numpy as np
import polars as pl

from shared.utils import formalize_data, generate_id
from shared.random_rules import RandomRules

In [2]:
a = formalize_data("09 12 27 33 24 30")
print(a)

('091224273033', (9, 12, 24, 27, 30, 33))


In [3]:
rr = RandomRules(
  TOTAL_NUMBERS = 35,
  CHOOSE = 5,
  MAX_PRIMES = 3,
  N_VALUES=["1", "2", "3"],
  # Rangos de valores de las bolillas
  MIN_B_VALUES = [1, 2, 3, 4, 5],
  MAX_B_VALUES = [31, 32, 33, 34, 35]
)



In [4]:
with open('./unijump_gn.json', 'r') as file:
  JSON_UNIJUMP = set(json.load(file))

print(list(JSON_UNIJUMP)[0:5])

[0, 1, 2, 3, 4]


In [5]:
bolillas = range(1, rr.TOTAL_NUMBERS + 1)
# bolillas = range(1, 8) 
combos = combinations(bolillas, rr.CHOOSE)



lst = []
for bolillas_int in combos:
  int_unijump = rr.unijump(bolillas_int)
  if (int_unijump not in JSON_UNIJUMP):
    continue


  cols = []
  id = generate_id(bolillas_int)

  is_rare, reason = rr.is_rare_combination(bolillas_int, comboid=id)
  if (reason not in [0, 16]):
    continue

  cols.append(id)
  cols.append(bolillas_int)
  cols.extend([b for b in bolillas_int])
  
  
  cols.append(is_rare)
  cols.append(reason)

  left_scores = [round(b, 4) for b in rr.calculate_left_scores(bolillas_int)]
  right_scores = [round(b, 4) for b in rr.calculate_right_scores(bolillas_int)]
  lr_scores = [round(left + right,4) for left, right in zip(left_scores, right_scores)]
  row_score = round(sum(lr_scores),4)

  shared_values = list(set(left_scores[0:5]) & set(right_scores[1:6]))
  shared_values = [round(s, 4) for s in shared_values]
  if (len(shared_values) > 2):
    continue
  
  cols.extend([score for score in left_scores])
  cols.extend([score for score in right_scores])
  # cols.extend(lr_scores)
  cols.append(shared_values)
  cols.append(len(shared_values))
  cols.append(row_score)

  cols.append(int_unijump)
  
  lst.append(cols)



dfRows = pl.DataFrame(
  lst,
  schema=[
    'id', 'combo', 'b1', 'b2', 'b3', 'b4', 'b5',
    'is_rare_combination', 'reason',
    "score_left_b1", "score_left_b2", "score_left_b3", "score_left_b4", "score_left_b5",
    "score_r8_b1", "score_r8_b2", "score_r8_b3", "score_r8_b4", "score_r8_b5",
    # "score_lr_b1", "score_lr_b2", "score_lr_b3", "score_lr_b4", "score_lr_b5", "score_lr_b6",
    "shared_values", "shared_values_count",
    "score_row",
    "int_unijump"
  ],
  schema_overrides={
    'combo': pl.Array(pl.UInt8, 5),
    'b1': pl.UInt8,
    'b2': pl.UInt8,
    'b3': pl.UInt8,
    'b4': pl.UInt8,
    'b5': pl.UInt8,
    'b6': pl.UInt8,
    'shared_values_count': pl.UInt8
  },
  orient='row'
)

display(dfRows.head(10))

id,combo,b1,b2,b3,b4,b5,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,shared_values,shared_values_count,score_row,int_unijump
str,"array[u8, 5]",u8,u8,u8,u8,u8,bool,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64,i64
"""0102040507""","[1, 2, … 7]",1,2,4,5,7,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.3333,0.0333,[0.0333],1,3.4978,4
"""0102040508""","[1, 2, … 8]",1,2,4,5,8,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.25,0.0333,[0.0333],1,3.4145,5
"""0102040509""","[1, 2, … 9]",1,2,4,5,9,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.2,0.0333,[0.0333],1,3.3645,6
"""0102040510""","[1, 2, … 10]",1,2,4,5,10,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.1667,0.0333,[0.0333],1,3.3312,7
"""0102040511""","[1, 2, … 11]",1,2,4,5,11,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.1429,0.0333,[0.0333],1,3.3074,8
"""0102040512""","[1, 2, … 12]",1,2,4,5,12,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.125,0.0333,[0.0333],1,3.2895,9
"""0102040513""","[1, 2, … 13]",1,2,4,5,13,False,0,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.1111,0.0333,[0.0333],1,3.2756,10
"""0102040514""","[1, 2, … 14]",1,2,4,5,14,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.1,0.0333,[0.0333],1,3.2645,11
"""0102040515""","[1, 2, … 15]",1,2,4,5,15,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.0909,0.0333,[0.0333],1,3.2554,12
"""0102040516""","[1, 2, … 16]",1,2,4,5,16,True,16,1.0,0.0323,0.0323,0.0333,0.0333,1.0,0.5,0.5,0.0833,0.0333,[0.0333],1,3.2478,13


In [6]:
len(dfRows)

209702

In [8]:
dfRows.write_parquet("./saved_bolillas_gn.parquet")