In [1]:
from bs4 import BeautifulSoup
# import pandas as pd
from datetime import datetime, timedelta
from itertools import combinations
import json
import requests
import math
from typing import Optional, List, Dict

from pydantic import BaseModel
from typing import Mapping
import numpy as np
import polars as pl
import math

from shared.utils import formalize_data
from shared.random_rules import RandomRules


In [2]:
a = formalize_data("09 12 27 33 24 30")
print(a)

('091224273033', (9, 12, 24, 27, 30, 33))


In [3]:
rr = RandomRules(
  TOTAL_NUMBERS = 35,
  CHOOSE = 5,
  MAX_PRIMES = 3,
  N_VALUES=["1", "2", "3"],
  # Rangos de valores de las bolillas
  MIN_B_VALUES = [1, 2, 3, 4, 5],
  MAX_B_VALUES = [31, 32, 33, 34, 35]
)



In [4]:
url = 'https://resultados.latinka.com.pe/i.do?m=historico&t=0&s=164'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

tables = soup.select('table')
rows = tables[1].find_all('tr')





In [5]:
lst = []
for row in rows:
  cols = [data.text.strip() for data in row.find_all('td')]
  
  id, bolillas_int = formalize_data(bolillas_str=cols[2])
  cols.append(id)
  cols.append(bolillas_int)
  cols.extend([b for b in bolillas_int])
  
  is_rare, reason = rr.is_rare_combination(bolillas_int, id)
  cols.append(is_rare)
  cols.append(reason)

  left_scores = [round(b, 4) for b in rr.calculate_left_scores(bolillas_int)]
  right_scores = [round(b, 4) for b in rr.calculate_right_scores(bolillas_int)]
  lr_scores = [round(left + right,4) for left, right in zip(left_scores, right_scores)]
  row_score = round(sum(lr_scores),4)

  shared_values = list(set(left_scores[0:5]) & set(right_scores[1:6]))
  shared_values = [round(s, 4) for s in shared_values]
  cols.extend([score for score in left_scores])
  cols.extend([score for score in right_scores])
  # cols.extend(lr_scores)
  cols.append(shared_values)
  cols.append(len(shared_values))
  cols.append(row_score)
  cols.append(rr.jumps_map(bolillas_int))
  cols.append(rr.unijump(bolillas_int))
  
  lst.append(cols)

with open('./gnche.json', 'w') as file: 
  json.dump(lst, file)

dfRows = pl.DataFrame(
  lst,
  schema=[
    'fecha', 'sorteo', 'bolillas',
    'id', 'combo', 'b1', 'b2', 'b3', 'b4', 'b5',
    'is_rare_combination', 'reason',
    "score_left_b1", "score_left_b2", "score_left_b3", "score_left_b4", "score_left_b5",
    "score_r8_b1", "score_r8_b2", "score_r8_b3", "score_r8_b4", "score_r8_b5",
    # "score_lr_b1", "score_lr_b2", "score_lr_b3", "score_lr_b4", "score_lr_b5", "score_lr_b6",
    "shared_values", "shared_values_count",
    "score_row",
    "jumps_map", "unijump"
  ],
  schema_overrides={
    'combo': pl.Array(pl.UInt8, 5),
    'b1': pl.UInt8,
    'b2': pl.UInt8,
    'b3': pl.UInt8,
    'b4': pl.UInt8,
    'b5': pl.UInt8,
    'b6': pl.UInt8,
    'shared_values_count': pl.UInt8,
    'reason': pl.UInt8
  },
  orient='row'
)

TOTAL_LINES = len(dfRows)
print(f'{TOTAL_LINES=}')
display(dfRows.head(10))


TOTAL_LINES=7677


fecha,sorteo,bolillas,id,combo,b1,b2,b3,b4,b5,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,shared_values,shared_values_count,score_row,jumps_map,unijump
str,str,str,str,"array[u8, 5]",u8,u8,u8,u8,u8,bool,u8,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64,str,i64
"""20/02/2025""","""4133""","""34 15 26 04 30""","""0415263034""","[4, 15, … 34]",4,15,26,30,34,True,8,0.0714,0.0357,0.0556,0.125,0.2,0.0714,0.0417,0.037,0.0333,0.2,[0.2],1,0.8711,"""11_11_04_04""",14
"""19/02/2025""","""4132""","""18 27 28 20 21""","""1820212728""","[18, 20, … 28]",18,20,21,27,28,True,16,0.0526,0.0714,0.0769,0.0769,0.125,0.0526,0.0526,0.0417,0.0417,0.125,"[0.0526, 0.125]",2,0.7164,"""02_01_06_01""",-16
"""18/02/2025""","""4131""","""26 13 05 20 01""","""0105132026""","[1, 5, … 26]",1,5,13,20,26,False,0,0.25,0.0323,0.0357,0.0476,0.0667,0.25,0.0909,0.0588,0.0455,0.0667,[0.0667],1,0.9442,"""04_08_07_06""",5
"""17/02/2025""","""4130""","""12 33 22 19 20""","""1219202233""","[12, 19, … 33]",12,19,20,22,33,False,0,0.0556,0.05,0.0714,0.0714,0.0769,0.0556,0.0556,0.0526,0.0345,0.0769,"[0.0556, 0.0769]",2,0.6005,"""07_01_02_11""",1
"""16/02/2025""","""4129""","""33 07 12 04 32""","""0407123233""","[4, 7, … 33]",4,7,12,32,33,False,0,0.1667,0.0357,0.0385,0.0455,0.3333,0.1667,0.1,0.0345,0.0345,0.3333,[0.3333],1,1.2887,"""03_05_20_01""",-47
"""15/02/2025""","""4128""","""21 09 26 31 32""","""0921263132""","[9, 21, … 32]",9,21,26,31,32,True,8,0.05,0.0435,0.0833,0.125,0.25,0.05,0.0417,0.0357,0.0357,0.25,[0.25],1,0.9649,"""12_05_05_01""",-11
"""14/02/2025""","""4127""","""15 09 28 26 14""","""0914152628""","[9, 14, … 28]",9,14,15,26,28,True,16,0.0769,0.0435,0.0526,0.0526,0.1111,0.0769,0.0769,0.0435,0.0417,0.1111,"[0.0769, 0.0435, 0.1111]",3,0.6868,"""05_01_11_02""",-33
"""13/02/2025""","""4126""","""17 30 21 33 15""","""1517213033""","[15, 17, … 33]",15,17,21,30,33,False,0,0.0625,0.0588,0.0625,0.0769,0.2,0.0625,0.0526,0.037,0.0345,0.2,[0.2],1,0.8473,"""02_04_09_03""",-14
"""12/02/2025""","""4125""","""31 33 20 12 13""","""1213203133""","[12, 13, … 33]",12,13,20,31,33,False,0,0.0833,0.05,0.05,0.0714,0.25,0.0833,0.0556,0.0357,0.0345,0.25,[0.25],1,0.9638,"""01_07_11_02""",-11
"""11/02/2025""","""4124""","""08 32 28 19 17""","""0817192832""","[8, 17, … 32]",8,17,19,28,32,False,0,0.0625,0.0417,0.0625,0.0667,0.1429,0.0625,0.0588,0.04,0.0357,0.1429,[0.1429],1,0.7162,"""09_02_09_04""",-26


# ANALYZE RANDOM!
You can't analyze random bc it's random! But you can find "order" in "entropy" I guess. Like when you have your room messed up but somehow you can find everything. Nevermid.
Just look at the results below

In [6]:
unijumps = dfRows.group_by(
  'unijump'
).agg(
  pl.count('unijump').alias('count')
).sort(
  "count", descending=True
)


In [7]:
unijumps.write_excel("unijump.xlsx")

<xlsxwriter.workbook.Workbook at 0x15e9ced5880>

In [8]:
import json
percent_one = math.ceil(TOTAL_LINES * 0.01)
print(f'{percent_one=}')
print(unijumps[0:percent_one]['unijump'].to_list())
lst_unijumps = unijumps[0:percent_one]['unijump'].to_list()

with open('unijump_gn.json', 'w') as f:
  f.write(json.dumps(lst_unijumps))
  

percent_one=77
[-1, 7, -2, -3, 3, -4, 0, 4, 5, 2, 6, 10, -5, -12, -6, 1, -9, -7, 11, 9, -11, 8, 12, -10, -8, -13, 14, -15, -14, 16, 13, 15, 19, -16, 21, -17, 18, 17, -18, -19, -20, -21, 23, -22, -23, 22, 20, 25, -24, 26, 24, 27, -28, -26, 29, -27, -25, -30, -33, -32, -29, 28, 35, 34, 31, 30, 33, 36, 32, -35, -38, -31, -34, 39, 37, -39, 40]


In [9]:
dfRows.group_by(
  'reason'
).agg(
  pl.count('reason').alias('count')
).sort(
  "count", descending=True
)


reason,count
u8,u32
0,3775
16,1925
8,930
24,552
1,97
…,…
18,19
11,13
19,5
27,3


In [10]:
dfRows.group_by(
  'shared_values_count'
).agg(pl.count('shared_values_count').alias('count'))

shared_values_count,count
u8,u32
3,283
1,4775
4,3
2,2616


In [11]:
df_jumps_map = dfRows.group_by(
  'jumps_map'
).agg(pl.count('jumps_map').alias('count')).sort("count", descending=True)

display(df_jumps_map[0:10])

repeated_jumps_map = df_jumps_map.filter(pl.col("count") > 1)["jumps_map"].to_list()
print(f'MORE THAN 1 {len(repeated_jumps_map)=}')


q = dfRows.filter(
  pl.col("jumps_map").is_in(repeated_jumps_map) 
).sort("jumps_map")

display(q)

with open ("./jumps_map_gn.json", "w") as file:
  json.dump(df_jumps_map["jumps_map"].to_list(), file)



jumps_map,count
str,u32
"""01_06_14_02""",4
"""03_06_02_03""",4
"""01_01_09_07""",4
"""01_05_02_03""",4
"""02_06_05_01""",4
"""02_02_03_03""",4
"""03_06_02_01""",4
"""01_03_02_09""",4
"""03_02_01_04""",4
"""03_01_05_07""",3


MORE THAN 1 len(repeated_jumps_map)=853


fecha,sorteo,bolillas,id,combo,b1,b2,b3,b4,b5,is_rare_combination,reason,score_left_b1,score_left_b2,score_left_b3,score_left_b4,score_left_b5,score_r8_b1,score_r8_b2,score_r8_b3,score_r8_b4,score_r8_b5,shared_values,shared_values_count,score_row,jumps_map,unijump
str,str,str,str,"array[u8, 5]",u8,u8,u8,u8,u8,bool,u8,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,list[f64],u8,f64,str,i64
"""02/06/2010""","""2431""","""04 05 06 07 13""","""0405060713""","[4, 5, … 13]",4,5,6,7,13,true,24,0.25,0.0357,0.0357,0.0357,0.0357,0.25,0.25,0.25,0.1111,0.0357,"[0.25, 0.0357]",2,1.2896,"""01_01_01_06""",5
"""14/12/2003""","""68""","""22 23 25 24 31""","""2223242531""","[22, 23, … 31]",22,23,24,25,31,true,8,0.0455,0.1,0.1,0.1,0.1,0.0455,0.0455,0.0455,0.037,0.1,"[0.0455, 0.1]",2,0.719,"""01_01_01_06""",5
"""21/05/2022""","""3127""","""26 19 22 18 20""","""1819202226""","[18, 19, … 26]",18,19,20,22,26,true,24,0.0556,0.0714,0.0714,0.0714,0.0769,0.0556,0.0556,0.0526,0.0455,0.0769,"[0.0556, 0.0769]",2,0.6329,"""01_01_02_04""",0
"""09/03/2012""","""3077""","""01 02 03 05 09""","""0102030509""","[1, 2, … 9]",1,2,3,5,9,true,8,1.0,0.0323,0.0323,0.0323,0.0333,1.0,1.0,0.5,0.2,0.0333,"[0.0333, 1.0]",2,3.8635,"""01_01_02_04""",0
"""17/05/2007""","""1318""","""12 14 16 20 13""","""1213141620""","[12, 13, … 20]",12,13,14,16,20,true,8,0.0833,0.05,0.05,0.05,0.0526,0.0833,0.0833,0.0769,0.0625,0.0526,"[0.0833, 0.0526]",2,0.6445,"""01_01_02_04""",0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""19/12/2007""","""1534""","""32 30 29 25 02""","""0225293032""","[2, 25, … 32]",2,25,29,30,32,true,16,0.0417,0.0333,0.125,0.2,0.2,0.0417,0.037,0.037,0.0357,0.2,[0.2],1,0.9514,"""23_04_01_02""",-12
"""29/01/2015""","""0535""","""03 26 30 32 33""","""0326303233""","[3, 26, … 33]",3,26,30,32,33,true,16,0.04,0.0345,0.1429,0.25,0.3333,0.04,0.0357,0.0345,0.0345,0.3333,"[0.0345, 0.3333]",2,1.2787,"""23_04_02_01""",-16
"""22/01/2013""","""3396""","""03 26 30 32 33""","""0326303233""","[3, 26, … 33]",3,26,30,32,33,true,16,0.04,0.0345,0.1429,0.25,0.3333,0.04,0.0357,0.0345,0.0345,0.3333,"[0.0345, 0.3333]",2,1.2787,"""23_04_02_01""",-16
"""08/03/2018""","""1669""","""25 30 01 28 32""","""0125283032""","[1, 25, … 32]",1,25,28,30,32,true,8,0.0417,0.0323,0.125,0.1667,0.2,0.0417,0.0385,0.037,0.0357,0.2,[0.2],1,0.9186,"""24_03_02_02""",-19


In [12]:
def show_b_stats (col: str):
  df_viz = dfRows.group_by(
    col
  ).agg(pl.count(col).alias('count'))

  chart = df_viz.plot.bar(
    x=col,
    y='count',
    color=col,
  )

  display(df_viz)
  display(chart)



In [13]:
show_b_stats('score_left_b1')
show_b_stats('score_r8_b1')


score_left_b1,count
f64,u32
0.0476,161
0.125,477
0.0588,294
0.0769,398
0.0333,3
…,…
0.1667,470
0.0909,500
0.0556,235
0.0526,198


score_r8_b1,count
f64,u32
0.125,477
0.0476,161
0.0333,3
0.0588,294
0.0769,398
…,…
0.1667,470
0.0909,500
0.0526,198
0.0556,235


In [14]:
show_b_stats('score_left_b2')
show_b_stats('score_r8_b2')

score_left_b2,count
f64,u32
0.0476,289
0.125,8
0.0769,43
0.0333,964
0.0588,109
…,…
0.0385,569
0.0909,24
0.0556,143
0.0526,180


score_r8_b2,count
f64,u32
0.125,216
0.0476,380
0.0769,417
0.0333,35
0.0588,405
…,…
0.0385,202
0.0909,387
0.0556,442
0.0526,390


In [15]:
show_b_stats('score_left_b3')
show_b_stats('score_r8_b3')

score_left_b3,count
f64,u32
0.0476,500
0.125,80
0.0333,236
0.0588,325
0.0769,198
…,…
0.1667,42
0.0909,161
0.0556,378
0.0526,398


score_r8_b3,count
f64,u32
0.125,72
0.0476,474
0.0588,330
0.0333,223
0.0769,182
…,…
0.1667,32
0.0909,157
0.0526,426
0.0556,386


In [16]:
show_b_stats('score_left_b4')
show_b_stats('score_r8_b4')

score_left_b4,count
f64,u32
0.125,251
0.0476,387
0.0588,424
0.0333,38
0.0769,390
…,…
0.0385,180
0.0909,380
0.0556,418
0.0526,417


score_r8_b4,count
f64,u32
0.125,11
0.0476,237
0.0588,106
0.0333,889
0.0769,36
…,…
0.1667,5
0.0909,18
0.0556,154
0.0526,158


In [17]:
show_b_stats('score_left_b5')
show_b_stats('score_r8_b5')

score_left_b5,count
f64,u32
0.125,486
0.0476,157
0.0588,270
0.0333,4
0.0769,426
…,…
0.1667,456
0.0909,474
0.0556,245
0.0526,182


score_r8_b5,count
f64,u32
0.0476,157
0.125,486
0.0588,270
0.0769,426
0.0333,4
…,…
0.1667,456
0.0909,474
0.0526,182
0.0556,245
