In [22]:
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from itertools import combinations
import json
import requests
import math
from typing import Optional, List, Dict

from pydantic import BaseModel
from typing import Mapping
import numpy as np
import polars as pl

In [23]:
def formalize_data (bolillas_tuple: tuple[int]) -> str:
  id = "".join([str.zfill(str(b),2) for b in bolillas_tuple])
  return id

data = (1,2,3,4,5,6)
a = formalize_data(bolillas_tuple=data)
print(a)

010203040506


In [24]:
# Lista de números primos entre 1 y 50
PRIME_NUMBERS = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47}

# Configuración
TOTAL_NUMBERS = 50
CHOOSE = 6

# Combinaciones pasadas
past = [
  [1, 2, 3, 4, 5, 6],
  [1, 2, 3, 4, 5, 7],
  [1, 2, 3, 4, 5, 8]
]

# Rangos de valores de las bolillas b1 a b6
MIN_B_VALUES = [1, 2, 3, 4, 5, 6]
MAX_B_VALUES = [45, 46, 47, 48, 49, 50]

# Función para determinar si un número es primo
# Utilizando una tabla precalculada para mayor eficiencia
def is_prime(num):
  return num in PRIME_NUMBERS

# Función para determinar si una combinación es "rara"
def is_rare_combination(combo) -> tuple[bool, str]:
  # Regla 1: Todos los números son consecutivos
  if all(combo[i] + 1 == combo[i + 1] for i in range(len(combo) - 1)):
    return True, "CONSECUTIVES"

  # Regla 2: Saltos regulares (diferencia constante entre números)
  differences = [combo[i + 1] - combo[i] for i in range(len(combo) - 1)]
  if len(set(differences)) == 1:
    return True, "REGULAR_SKIPS"

  # Regla 3: Saltos consecutivos iguales
  if any(differences[i] == differences[i + 1] for i in range(len(differences) - 1)):
    return True, "REGULAR_JUMPS"

  # Regla 4: Máximo 3 números primos
  prime_count = sum(1 for num in combo if is_prime(num))
  if prime_count > 3:
    return True, "MAX_PRIMES"

  # Regla 5: Al menos un número par
  if not any(num % 2 == 0 for num in combo):
    return True, "HAVENT_EVENS"

  # Regla 6: No debe estar en combinaciones pasadas
  if list(combo) in past:
    return True, "PAST"

  return False, ""


# Calcular probabilidades de izquierda a derecha
def calculate_left_scores(combo):
  left_scores = []

  for i, value in enumerate(combo):
    if i == 0:
      possible_values = combo[i + 1] - 1
    else:
      possible_values = MAX_B_VALUES[i] - (combo[i - 1] + 1) + 1
    
    left_scores.append(1 / possible_values)

  return left_scores

# Calcular probabilidades de derecha a izquierda
def calculate_right_scores(combo):
  right_scores = []

  for i, value in enumerate(combo):
    if (i == CHOOSE - 1):
      possible_values = MAX_B_VALUES[i] - (combo[i - 1] + 1) + 1
    else:
      possible_values = (combo[i + 1] - 1) - MIN_B_VALUES[i] + 1
    
    right_scores.append(1 / possible_values)

  return right_scores




In [25]:
l = [0.0588, 0.0303, 0.0345, 0.0455, 0.0526, 0.0588]
r = [0.0588, 0.0417, 0.0370, 0.0345, 0.0333, 0.0588]

def check_shared_values (left_arr: List[float], right_arr: List[float]):
  shared_values = []
  if (left_arr[0] == right_arr[CHOOSE-1]):
    shared_values.append(left_arr[0])
  if (left_arr[CHOOSE-1] == right_arr[0]):
    shared_values.append(left_arr[CHOOSE-1])

  for i in range(len(left_arr)):
    for j in range(1, len(right_arr) - 1):
      if left_arr[i] == right_arr[j]:
        shared_values.append(left_arr[i])
  return shared_values

# iniciar comparacion desde aqui

In [40]:
bolillas = range(1, TOTAL_NUMBERS + 1)
# bolillas = range(1, 8) 
combos = combinations(bolillas, CHOOSE)

lst = []
for bolillas_int in combos:
  is_rare, reason = is_rare_combination(bolillas_int)
  if (reason not in ['', 'REGULAR_JUMPS']):
    continue
  
  cols = []
  id = formalize_data(bolillas_int)

  cols.append(id)
  cols.append(bolillas_int)
  cols.extend([b for b in bolillas_int])
  
  
  cols.append(is_rare)
  cols.append(reason)

  left_scores = [round(b, 4) for b in calculate_left_scores(bolillas_int)]
  right_scores = [round(b, 4) for b in calculate_right_scores(bolillas_int)]
  lr_scores = [round(left + right,4) for left, right in zip(left_scores, right_scores)]
  row_score = round(sum(lr_scores),4)

  shared_values = list(set(left_scores[0:5]) & set(right_scores[1:6]))
  shared_values = [round(s, 4) for s in shared_values]
  if (len(shared_values) > 2):
    continue
  
  cols.extend([score for score in left_scores])
  cols.extend([score for score in right_scores])
  # cols.extend(lr_scores)
  cols.append(shared_values)
  cols.append(len(shared_values))
  cols.append(row_score)
  
  lst.append(cols)



dfRows = pl.DataFrame(
  lst,
  schema=[
    'id', 'combo', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6',
    'is_rare_combination', 'reason',
    "score_left_b1", "score_left_b2", "score_left_b3", "score_left_b4", "score_left_b5", "score_left_b6",
    "score_r8_b1", "score_r8_b2", "score_r8_b3", "score_r8_b4", "score_r8_b5", "score_r8_b6",
    # "score_lr_b1", "score_lr_b2", "score_lr_b3", "score_lr_b4", "score_lr_b5", "score_lr_b6",
    "shared_values", "shared_values_count",
    "score_row"
  ],
  schema_overrides={
    'combo': pl.Array(pl.UInt8, 6),
    'b1': pl.UInt8,
    'b2': pl.UInt8,
    'b3': pl.UInt8,
    'b4': pl.UInt8,
    'b5': pl.UInt8,
    'b6': pl.UInt8,
    'shared_values_count': pl.UInt8
  },
  orient='row'
)

display(dfRows.head(10))

id,combo,b1,b2,b3,b4,b5,b6,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_left_b6,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,score_r8_b6,shared_values,shared_values_count,score_row
str,"array[u8, 6]",u8,u8,u8,u8,u8,u8,bool,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64
"""010203040507""","[1, 2, … 7]",1,2,3,4,5,7,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.5,0.0222,"[0.0222, 1.0]",2,5.6332
"""010203040508""","[1, 2, … 8]",1,2,3,4,5,8,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.3333,0.0222,"[0.0222, 1.0]",2,5.4665
"""010203040509""","[1, 2, … 9]",1,2,3,4,5,9,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.25,0.0222,"[0.0222, 1.0]",2,5.3832
"""010203040510""","[1, 2, … 10]",1,2,3,4,5,10,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.2,0.0222,"[0.0222, 1.0]",2,5.3332
"""010203040511""","[1, 2, … 11]",1,2,3,4,5,11,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1667,0.0222,"[0.0222, 1.0]",2,5.2999
"""010203040512""","[1, 2, … 12]",1,2,3,4,5,12,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1429,0.0222,"[0.0222, 1.0]",2,5.2761
"""010203040513""","[1, 2, … 13]",1,2,3,4,5,13,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.125,0.0222,"[0.0222, 1.0]",2,5.2582
"""010203040514""","[1, 2, … 14]",1,2,3,4,5,14,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1111,0.0222,"[0.0222, 1.0]",2,5.2443
"""010203040515""","[1, 2, … 15]",1,2,3,4,5,15,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.1,0.0222,"[0.0222, 1.0]",2,5.2332
"""010203040516""","[1, 2, … 16]",1,2,3,4,5,16,True,"""REGULAR_JUMPS""",1.0,0.0222,0.0222,0.0222,0.0222,0.0222,1.0,1.0,1.0,1.0,0.0909,0.0222,"[0.0222, 1.0]",2,5.2241


In [41]:
len(dfRows)

14963876

In [75]:
df_filtered = dfRows.filter([
  pl.col('score_r8_b3') > 0.0256,
  pl.col('score_r8_b3') <= 0.1667,

  pl.col('score_left_b3') >= 0.0222,
  pl.col('score_left_b3') <= 0.0556,
  pl.col('b1').is_in((3,4,5,6,7,8,9)),
  pl.col('b2') <= 20,
  pl.col('b3') <= 30,
  ~pl.col('b5').is_in((48, 49)),
  pl.col('b4') >= 30,
  pl.col('b5') >= 35,
  pl.col('b6') >= 35,
  ~pl.col('b6').is_in((49, 50)),
  
  ]
)

In [76]:
df_filtered

id,combo,b1,b2,b3,b4,b5,b6,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_left_b6,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,score_r8_b6,shared_values,shared_values_count,score_row
str,"array[u8, 6]",u8,u8,u8,u8,u8,u8,bool,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64
"""030405303536""","[3, 4, … 36]",3,4,5,30,35,36,true,"""REGULAR_JUMPS""",0.3333,0.0233,0.0233,0.0233,0.0526,0.0667,0.3333,0.3333,0.037,0.0323,0.0323,0.0667,[0.3333],1,1.3574
"""030405303537""","[3, 4, … 37]",3,4,5,30,35,37,true,"""REGULAR_JUMPS""",0.3333,0.0233,0.0233,0.0233,0.0526,0.0667,0.3333,0.3333,0.037,0.0323,0.0312,0.0667,[0.3333],1,1.3563
"""030405303538""","[3, 4, … 38]",3,4,5,30,35,38,true,"""REGULAR_JUMPS""",0.3333,0.0233,0.0233,0.0233,0.0526,0.0667,0.3333,0.3333,0.037,0.0323,0.0303,0.0667,[0.3333],1,1.3554
"""030405303539""","[3, 4, … 39]",3,4,5,30,35,39,true,"""REGULAR_JUMPS""",0.3333,0.0233,0.0233,0.0233,0.0526,0.0667,0.3333,0.3333,0.037,0.0323,0.0294,0.0667,[0.3333],1,1.3545
"""030405303540""","[3, 4, … 40]",3,4,5,30,35,40,true,"""REGULAR_JUMPS""",0.3333,0.0233,0.0233,0.0233,0.0526,0.0667,0.3333,0.3333,0.037,0.0323,0.0286,0.0667,[0.3333],1,1.3537
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""092030414547""","[9, 20, … 47]",9,20,30,41,45,47,false,"""""",0.0526,0.027,0.037,0.0556,0.125,0.2,0.0526,0.0357,0.0263,0.0244,0.0238,0.2,[],0,0.86
"""092030414548""","[9, 20, … 48]",9,20,30,41,45,48,false,"""""",0.0526,0.027,0.037,0.0556,0.125,0.2,0.0526,0.0357,0.0263,0.0244,0.0233,0.2,[],0,0.8595
"""092030414647""","[9, 20, … 47]",9,20,30,41,46,47,false,"""""",0.0526,0.027,0.037,0.0556,0.125,0.25,0.0526,0.0357,0.0263,0.0238,0.0238,0.25,[],0,0.9594
"""092030414648""","[9, 20, … 48]",9,20,30,41,46,48,false,"""""",0.0526,0.027,0.037,0.0556,0.125,0.25,0.0526,0.0357,0.0263,0.0238,0.0233,0.25,[],0,0.9589


In [109]:
df_filtered.sample(n=1)  

id,combo,b1,b2,b3,b4,b5,b6,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_left_b6,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,score_r8_b6,shared_values,shared_values_count,score_row
str,"array[u8, 6]",u8,u8,u8,u8,u8,u8,bool,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64
"""071025324445""","[7, 10, … 45]",7,10,25,32,44,45,False,"""""",0.1111,0.0256,0.027,0.0435,0.0588,0.1667,0.1111,0.0435,0.0345,0.025,0.025,0.1667,[0.0435],1,0.8385


In [110]:
dfRows.write_parquet("./saved_bolillas.parquet")