# SETUP

## IMPORTS

In [1]:
import pandas as pd
import numpy as np

from sqlalchemy import create_engine
from sqlalchemy import URL

from dbhelpers.config import build_config
from dbhelpers.db import get_db, test_connection, get_db_url


In [2]:
pd.options.display.width = 200

## DB CONFIG

In [3]:
DB_NAME = 'testing'
CACHE_TTL = None
config = build_config(database=DB_NAME)
config

{'host': 'localhost',
 'user': 'root',
 'password': '10010',
 'database': 'testing'}

# FUNCTIONS

## get_winners()

In [4]:
def get_winners() -> pd.DataFrame:
    db_url = get_db_url(**config)
    engine = create_engine(db_url)
    sql = 'select * from lotteria'
    params = ()
    df =  pd.read_sql(sql, engine, params=tuple(params), index_col='index')
    return df


## split_df()

In [5]:
def split_df(df:pd.DataFrame, rows:int) -> list[pd.DataFrame]:
    splitdf = [df.iloc[i: i+rows -1, :] for i in range(
                0, len(df), rows)
    ]
    
    return splitdf


# START
## get data

In [6]:
winners = get_winners()
winners.head()

Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,T,173756,SOMAGLIA,LO,5000000
1,1,T,378442,PESARO,PU,2500000
2,1,G,330068,PALERMO,PA,2000000
3,1,G,173817,TORINO,TO,1500000
4,1,S,185025,DOLO,VE,1000000


## split_df
### no sort, filter

In [7]:
total_split = split_df(winners, 10)
total_split[0]

Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,T,173756,SOMAGLIA,LO,5000000
1,1,T,378442,PESARO,PU,2500000
2,1,G,330068,PALERMO,PA,2000000
3,1,G,173817,TORINO,TO,1500000
4,1,S,185025,DOLO,VE,1000000
5,2,S,30585,DISTRIBUTORE LOCALE MONTENERO DI BISACCIA,CB,100000
6,2,C,439458,AVELLINO,AV,100000
7,2,M,217442,MASSAFRA,TA,100000
8,2,D,138065,FOLIGNO,PG,100000


In [8]:
len(total_split)

28

In [9]:
total_split[-1]

Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
270,4,B,189342,MESAGNE,BR,20000
271,4,Q,44926,DISTRIBUTORE LOCALE ROMA,RM,20000
272,4,N,445610,SALA CONSILINA,SA,20000
273,4,G,394402,BUSSOLENO,TO,20000
274,4,M,479982,CASTIGLIONE DEI PEPOLI,BO,20000
275,4,N,342401,TORINO,TO,20000
276,4,M,41952,DISTRIBUTORE LOCALE ROMA,RM,20000
277,4,T,404382,ROMA,RM,20000
278,4,Q,270923,BAGHERIA,PA,20000


### sort, no filter

In [10]:
sorted_split = split_df(
        winners.sort_values(by='categoria'), 
        10
)
sorted_split[0].head()


Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,T,173756,SOMAGLIA,LO,5000000
1,1,T,378442,PESARO,PU,2500000
2,1,G,330068,PALERMO,PA,2000000
3,1,G,173817,TORINO,TO,1500000
4,1,S,185025,DOLO,VE,1000000


# the problem

In [11]:
ts0 = total_split[0]
ts1 = total_split[1]
ss0 = sorted_split[0]
ss1 = sorted_split[1]   # empty

# DEBUG

In [12]:
# splitdf = [df.loc[i: i+rows -1, :] for i in range(
#             0, len(df), rows)
# ]

rows = 10
indexes = [i for i in range(0, len(winners), rows)]
# print(indexes)

chunks = [(i, i+rows-1) for i in indexes]
# print(chunks)

print(chunks)

[(0, 9), (10, 19), (20, 29), (30, 39), (40, 49), (50, 59), (60, 69), (70, 79), (80, 89), (90, 99), (100, 109), (110, 119), (120, 129), (130, 139), (140, 149), (150, 159), (160, 169), (170, 179), (180, 189), (190, 199), (200, 209), (210, 219), (220, 229), (230, 239), (240, 249), (250, 259), (260, 269), (270, 279)]


In [13]:
source = winners
# source = winners.sort_values(by='categoria', ignore_index=False)

for i, i1 in chunks:
    tempdf = source.iloc[i:i1]
    print(tempdf.head(2))
    print('-' * 80)

       categoria serie  numero     luogo prov   premio
index                                                 
0              1     T  173756  SOMAGLIA   LO  5000000
1              1     T  378442    PESARO   PU  2500000
--------------------------------------------------------------------------------
       categoria serie  numero   luogo prov  premio
index                                              
10             2     Q  330398  FOGGIA   FG  100000
11             2     L  320913  ISPICA   RG  100000
--------------------------------------------------------------------------------
       categoria serie  numero      luogo prov  premio
index                                                 
20             2     Q  311109  VIDIGULFO   PV  100000
21             2     O  115657     CERVIA   RA  100000
--------------------------------------------------------------------------------
       categoria serie  numero                                 luogo prov  premio
index                      

In [14]:
pages = [source.iloc[i:i1, :] for i, i1 in chunks]
for page in pages:
    print(page.head(2))

       categoria serie  numero     luogo prov   premio
index                                                 
0              1     T  173756  SOMAGLIA   LO  5000000
1              1     T  378442    PESARO   PU  2500000
       categoria serie  numero   luogo prov  premio
index                                              
10             2     Q  330398  FOGGIA   FG  100000
11             2     L  320913  ISPICA   RG  100000
       categoria serie  numero      luogo prov  premio
index                                                 
20             2     Q  311109  VIDIGULFO   PV  100000
21             2     O  115657     CERVIA   RA  100000
       categoria serie  numero                                 luogo prov  premio
index                                                                            
30             3     A  151684  DISTRIBUTORE LOCALE SESTO FIORENTINO   FI   50000
31             3     C  044674         DISTRIBUTORE LOCALE FROSINONE   FR   50000
       categoria serie  

In [15]:
pages[1]

Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10,2,Q,330398,FOGGIA,FG,100000
11,2,L,320913,ISPICA,RG,100000
12,2,D,52288,DISTRIBUTORE LOCALE GREVE IN CHIANTI,FI,100000
13,2,I,265285,ROMA,RM,100000
14,2,B,431066,SPINEA,VE,100000
15,2,B,475642,OMEGNA,VB,100000
16,2,I,479949,MONTEPULCIANO,SI,100000
17,2,I,336915,IVREA,TO,100000
18,2,E,322777,BORGOSESIA,VC,100000


In [16]:
# df = winners
df = winners.sort_values(by='categoria')

rows = 10
indexes = [i for i in range(0, len(winners), rows)]
# print(indexes)

chunks = [(i, i+rows-1) for i in indexes]
# print(chunks)


splitdf = [df.loc[i: i1] for i, i1 in chunks
]
splitdf[1]


Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [17]:
df = winners.sort_values(by='categoria')
# df = winners
df

Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,T,173756,SOMAGLIA,LO,5000000
1,1,T,378442,PESARO,PU,2500000
2,1,G,330068,PALERMO,PA,2000000
3,1,G,173817,TORINO,TO,1500000
4,1,S,185025,DOLO,VE,1000000
...,...,...,...,...,...,...
149,4,R,12826,DISTRIBUTORE LOCALE ROMA,RM,20000
150,4,R,296645,SAN VITTORE OLONA,MI,20000
151,4,T,293112,MILANO,MI,20000
141,4,R,211535,ROMA,RM,20000


In [18]:
rows = 10
indexes = [i for i in range(0, len(winners), rows)]
chunks = [(i, i+rows-1) for i in indexes]

pages = []
for i, i1 in chunks:
    pages.append(
        (
            i, i1,
            df.iloc[i: i1]
        )
        
    )

pages[0]

(0,
 9,
        categoria serie  numero       luogo prov   premio
 index                                                   
 0              1     T  173756    SOMAGLIA   LO  5000000
 1              1     T  378442      PESARO   PU  2500000
 2              1     G  330068     PALERMO   PA  2000000
 3              1     G  173817      TORINO   TO  1500000
 4              1     S  185025        DOLO   VE  1000000
 19             2     I  336889  VALMADRERA   LC   100000
 20             2     Q  311109   VIDIGULFO   PV   100000
 21             2     O  115657      CERVIA   RA   100000
 22             2     I  459455    SORRENTO   NA   100000)

In [19]:
df.iloc[0:9]

Unnamed: 0_level_0,categoria,serie,numero,luogo,prov,premio
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,T,173756,SOMAGLIA,LO,5000000
1,1,T,378442,PESARO,PU,2500000
2,1,G,330068,PALERMO,PA,2000000
3,1,G,173817,TORINO,TO,1500000
4,1,S,185025,DOLO,VE,1000000
19,2,I,336889,VALMADRERA,LC,100000
20,2,Q,311109,VIDIGULFO,PV,100000
21,2,O,115657,CERVIA,RA,100000
22,2,I,459455,SORRENTO,,100000
