In [2]:
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from itertools import combinations
import json
import requests
import math
from typing import Optional, List, Dict

from pydantic import BaseModel
from typing import Mapping
import numpy as np
import polars as pl

In [3]:
def formalize_data (bolillas_tuple: tuple[int]) -> str:
  id = "".join([str.zfill(str(b),2) for b in bolillas_tuple])
  return id

data = (1,2,3,4,5,6)
a = formalize_data(bolillas_tuple=data)
print(a)

010203040506


In [4]:
from enum import Enum

class RARE_REASONS(Enum):
  MAX_PRIMES = 1
  HAVENT_EVENS = 2
  HAVENT_ODDS = 4

# Lista de números primos entre 1 y 50
PRIME_NUMBERS = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47}

# Configuración
TOTAL_NUMBERS = 50
CHOOSE = 6

# Rangos de valores de las bolillas b1 a b6
MIN_B_VALUES = [1, 2, 3, 4, 5, 6]
MAX_B_VALUES = [45, 46, 47, 48, 49, 50]

# Función para determinar si un número es primo
# Utilizando una tabla precalculada para mayor eficiencia
def is_prime(num):
  return num in PRIME_NUMBERS

# Función para determinar si una combinación es "rara"
def is_rare_combination(combo) -> tuple[bool, int]:
  reasons = 0

  # Regla 1: Máximo 3 números primos
  prime_count = sum(1 for num in combo if is_prime(num))
  if prime_count > 3:
    reasons += RARE_REASONS.MAX_PRIMES.value

  # Regla 2: Al menos un número par
  if not any(num % 2 == 0 for num in combo):
    reasons += RARE_REASONS.HAVENT_EVENS.value

  # Regla 3: Al menos un número impar
  if not any(num % 2 == 1 for num in combo):
    reasons += RARE_REASONS.HAVENT_ODDS.value

  return reasons > 0, reasons


# Calcular probabilidades de izquierda a derecha
def calculate_left_scores(combo):
  left_scores = []

  for i, value in enumerate(combo):
    if i == 0:
      possible_values = combo[i + 1] - 1
    else:
      possible_values = MAX_B_VALUES[i] - (combo[i - 1] + 1) + 1
    
    left_scores.append(1 / possible_values)

  return left_scores

# Calcular probabilidades de derecha a izquierda
def calculate_right_scores(combo):
  right_scores = []

  for i, value in enumerate(combo):
    if (i == CHOOSE - 1):
      possible_values = MAX_B_VALUES[i] - (combo[i - 1] + 1) + 1
    else:
      possible_values = (combo[i + 1] - 1) - MIN_B_VALUES[i] + 1
    
    right_scores.append(1 / possible_values)

  return right_scores


# Calcular los saltos entre valores
def jumps_map (combo: tuple[int]):
  differences = [combo[i + 1] - combo[i] for i in range(CHOOSE-1)]
  # differences.sort()
  result = "_".join([str.zfill(str(d), 2) for d in differences])
  return result



In [5]:
l = [0.0588, 0.0303, 0.0345, 0.0455, 0.0526, 0.0588]
r = [0.0588, 0.0417, 0.0370, 0.0345, 0.0333, 0.0588]

def check_shared_values (left_arr: List[float], right_arr: List[float]):
  shared_values = []
  if (left_arr[0] == right_arr[CHOOSE-1]):
    shared_values.append(left_arr[0])
  if (left_arr[CHOOSE-1] == right_arr[0]):
    shared_values.append(left_arr[CHOOSE-1])

  for i in range(len(left_arr)):
    for j in range(1, len(right_arr) - 1):
      if left_arr[i] == right_arr[j]:
        shared_values.append(left_arr[i])
  return shared_values

# iniciar comparacion desde aqui

In [6]:
with open('./jumps_map.json', 'r') as file:
  JSON_JUMPS_MAP = set(json.load(file))

print(list(JSON_JUMPS_MAP)[20:25])

['04_02_16_02_13', '05_05_10_11_01', '05_04_13_01_06', '02_15_04_13_05', '12_05_04_04_11']


In [7]:
bolillas = range(1, TOTAL_NUMBERS + 1)
# bolillas = range(1, 8) 
combos = combinations(bolillas, CHOOSE)



lst = []
for bolillas_int in combos:
  str_jumps_map = jumps_map(bolillas_int)
  if (str_jumps_map in JSON_JUMPS_MAP):
    continue

  is_rare, reason = is_rare_combination(bolillas_int)
  if (reason not in [0, 1]):
    continue
  
  
  cols = []
  id = formalize_data(bolillas_int)

  cols.append(id)
  cols.append(bolillas_int)
  cols.extend([b for b in bolillas_int])
  
  
  cols.append(is_rare)
  cols.append(reason)

  left_scores = [round(b, 4) for b in calculate_left_scores(bolillas_int)]
  right_scores = [round(b, 4) for b in calculate_right_scores(bolillas_int)]
  lr_scores = [round(left + right,4) for left, right in zip(left_scores, right_scores)]
  row_score = round(sum(lr_scores),4)

  shared_values = list(set(left_scores[0:5]) & set(right_scores[1:6]))
  shared_values = [round(s, 4) for s in shared_values]
  if (len(shared_values) > 2):
    continue
  
  cols.extend([score for score in left_scores])
  cols.extend([score for score in right_scores])
  # cols.extend(lr_scores)
  cols.append(shared_values)
  cols.append(len(shared_values))
  cols.append(row_score)

  cols.append(str_jumps_map)
  
  lst.append(cols)



dfRows = pl.DataFrame(
  lst,
  schema=[
    'id', 'combo', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6',
    'is_rare_combination', 'reason',
    "score_left_b1", "score_left_b2", "score_left_b3", "score_left_b4", "score_left_b5", "score_left_b6",
    "score_r8_b1", "score_r8_b2", "score_r8_b3", "score_r8_b4", "score_r8_b5", "score_r8_b6",
    # "score_lr_b1", "score_lr_b2", "score_lr_b3", "score_lr_b4", "score_lr_b5", "score_lr_b6",
    "shared_values", "shared_values_count",
    "score_row",
    "jumps_map"
  ],
  schema_overrides={
    'combo': pl.Array(pl.UInt8, 6),
    'b1': pl.UInt8,
    'b2': pl.UInt8,
    'b3': pl.UInt8,
    'b4': pl.UInt8,
    'b5': pl.UInt8,
    'b6': pl.UInt8,
    'shared_values_count': pl.UInt8
  },
  orient='row'
)

display(dfRows.head(10))

id,combo,b1,b2,b3,b4,b5,b6,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_left_b6,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,score_r8_b6,shared_values,shared_values_count,score_row,jumps_map
str,"array[u8, 6]",u8,u8,u8,u8,u8,u8,bool,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64,str
"""010203040506""","[1, 2, … 6]",1,2,3,4,5,6,False,0,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,1.0,0.0222,"[0.0222, 1.0]",2,6.1332,"""01_01_01_01_01"""
"""010203040507""","[1, 2, … 7]",1,2,3,4,5,7,True,1,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.5,0.0222,"[0.0222, 1.0]",2,5.6332,"""01_01_01_01_02"""
"""010203040508""","[1, 2, … 8]",1,2,3,4,5,8,False,0,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.3333,0.0222,"[0.0222, 1.0]",2,5.4665,"""01_01_01_01_03"""
"""010203040509""","[1, 2, … 9]",1,2,3,4,5,9,False,0,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.25,0.0222,"[0.0222, 1.0]",2,5.3832,"""01_01_01_01_04"""
"""010203040510""","[1, 2, … 10]",1,2,3,4,5,10,False,0,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.2,0.0222,"[0.0222, 1.0]",2,5.3332,"""01_01_01_01_05"""
"""010203040511""","[1, 2, … 11]",1,2,3,4,5,11,True,1,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1667,0.0222,"[0.0222, 1.0]",2,5.2999,"""01_01_01_01_06"""
"""010203040512""","[1, 2, … 12]",1,2,3,4,5,12,False,0,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1429,0.0222,"[0.0222, 1.0]",2,5.2761,"""01_01_01_01_07"""
"""010203040513""","[1, 2, … 13]",1,2,3,4,5,13,True,1,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.125,0.0222,"[0.0222, 1.0]",2,5.2582,"""01_01_01_01_08"""
"""010203040514""","[1, 2, … 14]",1,2,3,4,5,14,False,0,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1111,0.0222,"[0.0222, 1.0]",2,5.2443,"""01_01_01_01_09"""
"""010203040515""","[1, 2, … 15]",1,2,3,4,5,15,False,0,1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1,0.0222,"[0.0222, 1.0]",2,5.2332,"""01_01_01_01_10"""


In [8]:
len(dfRows)

15305549

In [9]:
df_filtered = dfRows.filter([
  pl.col('score_r8_b3') > 0.0256,
  pl.col('score_r8_b3') <= 0.1667,
  
  pl.col('reason') == 0,
  pl.col('shared_values_count').is_in((0, 1)),

  pl.col('score_left_b3') >= 0.0222,
  pl.col('score_left_b3') <= 0.0556,
  pl.col('b1') >= 10,
  ~pl.col('b2').is_in((32)),
  ~pl.col('b3').is_in((34)),
  ~pl.col('b4').is_in((37)),
  ~pl.col('b5').is_in((41)),
  ~pl.col('b6').is_in((42)),
  ]
)

In [10]:
print(len(df_filtered))
df_filtered

2509299


id,combo,b1,b2,b3,b4,b5,b6,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_left_b6,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,score_r8_b6,shared_values,shared_values_count,score_row,jumps_map
str,"array[u8, 6]",u8,u8,u8,u8,u8,u8,bool,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64,str
"""101112131516""","[10, 11, … 16]",10,11,12,13,15,16,false,0,0.1,0.0278,0.0278,0.0278,0.0278,0.0286,0.1,0.1,0.1,0.0909,0.0909,0.0286,[0.1],1,0.7502,"""01_01_01_02_01"""
"""101112131517""","[10, 11, … 17]",10,11,12,13,15,17,false,0,0.1,0.0278,0.0278,0.0278,0.0278,0.0286,0.1,0.1,0.1,0.0909,0.0833,0.0286,[0.1],1,0.7426,"""01_01_01_02_02"""
"""101112131518""","[10, 11, … 18]",10,11,12,13,15,18,false,0,0.1,0.0278,0.0278,0.0278,0.0278,0.0286,0.1,0.1,0.1,0.0909,0.0769,0.0286,[0.1],1,0.7362,"""01_01_01_02_03"""
"""101112131519""","[10, 11, … 19]",10,11,12,13,15,19,false,0,0.1,0.0278,0.0278,0.0278,0.0278,0.0286,0.1,0.1,0.1,0.0909,0.0714,0.0286,[0.1],1,0.7307,"""01_01_01_02_04"""
"""101112131520""","[10, 11, … 20]",10,11,12,13,15,20,false,0,0.1,0.0278,0.0278,0.0278,0.0278,0.0286,0.1,0.1,0.1,0.0909,0.0667,0.0286,[0.1],1,0.726,"""01_01_01_02_05"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""282940414749""","[28, 29, … 49]",28,29,40,41,47,49,false,0,0.0357,0.0556,0.0556,0.125,0.125,0.3333,0.0357,0.0263,0.0263,0.0233,0.0227,0.3333,[],0,1.1978,"""01_11_01_06_02"""
"""282940414750""","[28, 29, … 50]",28,29,40,41,47,50,false,0,0.0357,0.0556,0.0556,0.125,0.125,0.3333,0.0357,0.0263,0.0263,0.0233,0.0222,0.3333,[],0,1.1973,"""01_11_01_06_03"""
"""282940414849""","[28, 29, … 49]",28,29,40,41,48,49,false,0,0.0357,0.0556,0.0556,0.125,0.125,0.5,0.0357,0.0263,0.0263,0.0227,0.0227,0.5,[],0,1.5306,"""01_11_01_07_01"""
"""282940414850""","[28, 29, … 50]",28,29,40,41,48,50,false,0,0.0357,0.0556,0.0556,0.125,0.125,0.5,0.0357,0.0263,0.0263,0.0227,0.0222,0.5,[],0,1.5301,"""01_11_01_07_02"""


In [11]:
df_filtered.sample(n=1)  

id,combo,b1,b2,b3,b4,b5,b6,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_left_b6,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,score_r8_b6,shared_values,shared_values_count,score_row,jumps_map
str,"array[u8, 6]",u8,u8,u8,u8,u8,u8,bool,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64,str
"""122023304346""","[12, 20, … 46]",12,20,23,30,43,46,False,0,0.0526,0.0294,0.037,0.04,0.0526,0.1429,0.0526,0.0476,0.037,0.0256,0.0244,0.1429,[0.037],1,0.6846,"""08_03_07_13_03"""


In [12]:
dfRows.write_parquet("./saved_bolillas.parquet")