In [1]:
from MySQLdb import connect
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
# Connect to your MySQLdb
#conn = connect(host='localhost', user='root')
conn = connect(host='127.0.0.1', user='root', password = 'root', port=8890)

In [5]:
# Open a cursor to perform database operations
cur = conn.cursor()

The SQL SELECT TOP Clause
- The SELECT TOP clause is used to specify the number of records to return.
- The SELECT TOP clause is useful on large tables with thousands of records. Returning a large number of records can impact performance.

In [6]:
# Select only the first 3 records of the Customers table:
#cur.execute('''SELECT TOP 3 * FROM sqlw3s.Customers;''')
cur.execute('''SELECT * FROM sqlw3s.Customers LIMIT 3''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,CustomerID,CustomerName,ContactName,Address,City,PostalCode,Country
0,1,Alfreds Futterkiste,Maria Anders,Obere Str. 57,Berlin,12209,Germany
1,2,Ana Trujillo Emparedados y helados,Ana Trujillo,Avda. de la Constitución 2222,México D.F.,5021,Mexico
2,3,Antonio Moreno Taquería,Antonio Moreno,Mataderos 2312,México D.F.,5023,Mexico


Note: Not all database systems support the SELECT TOP clause. MySQL supports the LIMIT clause to select a limited number of records, while Oracle uses FETCH FIRST n ROWS ONLY and ROWNUM.


>SQL Server / MS Access Syntax:
>
>SELECT TOP number|percent column_name(s)
>FROM table_name
>WHERE condition;

>MySQL Syntax:
>
>SELECT column_name(s)
>FROM table_name
>WHERE condition
>LIMIT number;

>Oracle 12 Syntax:
>
>SELECT column_name(s)
>FROM table_name
>ORDER BY column_name(s)
>FETCH FIRST number ROWS ONLY;

>Older Oracle Syntax:
>
>SELECT column_name(s)
>FROM table_name
>WHERE ROWNUM <= number;

In [7]:
# Moje - funguje i OFFSET jako u postgre
cur.execute('''SELECT * FROM sqlw3s.Customers LIMIT 3 OFFSET 2''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,CustomerID,CustomerName,ContactName,Address,City,PostalCode,Country
0,3,Antonio Moreno Taquería,Antonio Moreno,Mataderos 2312,México D.F.,05023,Mexico
1,4,Around the Horn,Thomas Hardy,120 Hanover Sq.,London,WA1 1DP,UK
2,5,Berglunds snabbköp,Christina Berglund,Berguvsvägen 8,Luleå,S-958 22,Sweden


SQL TOP PERCENT Example
- The following SQL statement selects the first 50% of the records from the "Customers" table (for SQL Server/MS Access):

Example:
SELECT TOP 50 PERCENT * FROM Customers;

The following SQL statement shows the equivalent example for Oracle:
SELECT * FROM Customers
FETCH FIRST 50 PERCENT ROWS ONLY;

In [8]:
# Moje - v MySQL nejde limit percent, musi se obejit:
cur.execute('''SELECT CAST(COUNT(*) * 0.1 AS UNSIGNED) FROM sqlw3s.Customers;''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,CAST(COUNT(*) * 0.1 AS UNSIGNED)
0,9


In [9]:
# Moje - v MySQL nejde limit percent, musi se obejit:
cur.execute('''SELECT FLOOR(COUNT(*) * 0.1) FROM sqlw3s.Customers;''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,FLOOR(COUNT(*) * 0.1)
0,9


In [10]:
# Moje - v MySQL nejde limit percent, musi se obejit, ale neslo ani
#SELECT *
#FROM table_name.
#ORDER BY id.
#LIMIT (SELECT COUNT(*) * 0.1 FROM table_name);:
cur.execute('''SELECT * FROM sqlw3s.Customers HAVING RAND() > 0.9;''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,CustomerID,CustomerName,ContactName,Address,City,PostalCode,Country
0,5,Berglunds snabbköp,Christina Berglund,Berguvsvägen 8,Luleå,S-958 22,Sweden
1,12,Cactus Comidas para llevar,Patricio Simpson,Cerrito 333,Buenos Aires,1010,Argentina
2,27,Franchi S.p.A.,Paolo Accorti,Via Monte Bianco 34,Torino,10100,Italy
3,34,Hanari Carnes,Mario Pontes,"Rua do Paço, 67",Rio de Janeiro,05454-876,Brazil
4,35,HILARIÓN-Abastos,Carlos Hernández,Carrera 22 con Ave. Carlos Soublette #8-35,San Cristóbal,5022,Venezuela
5,42,Laughing Bacchus Wine Cellars,Yoshi Tannamuri,1900 Oak St.,Vancouver,V3F 2K1,Canada
6,53,North/South,Simon Crowther,South House 300 Queensbridge,London,SW7 1RZ,UK
7,64,Rancho grande,Sergio Gutiérrez,Av. del Libertador 900,Buenos Aires,1010,Argentina


ADD a WHERE CLAUSE
- The following SQL statement selects the first three records from the "Customers" table, where the country is "Germany" (for SQL Server/MS Access):

Example:
SELECT TOP 3 * FROM Customers
WHERE Country='Germany';

The following SQL statement shows the equivalent example for MySQL:
SELECT * FROM Customers
WHERE Country='Germany'
LIMIT 3;

The following SQL statement shows the equivalent example for Oracle:
SELECT * FROM Customers
WHERE Country='Germany'
FETCH FIRST 3 ROWS ONLY;

In [11]:
# The following SQL statement shows the equivalent example for MySQL:
cur.execute('''SELECT * FROM sqlw3s.Customers WHERE Country='Germany' LIMIT 3;''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,CustomerID,CustomerName,ContactName,Address,City,PostalCode,Country
0,1,Alfreds Futterkiste,Maria Anders,Obere Str. 57,Berlin,12209,Germany
1,6,Blauer See Delikatessen,Hanna Moos,Forsterstr. 57,Mannheim,68306,Germany
2,17,Drachenblut Delikatessend,Sven Ottlieb,Walserweg 21,Aachen,52066,Germany


ADD the ORDER BY Keyword
- Add the ORDER BY keyword when you want to sort the result, and return the first 3 records of the sorted result.

For SQL Server and MS Access:

Sort the result reverse alphabetically by CustomerName, and return the first 3 records:
SELECT TOP 3 * FROM Customers
ORDER BY CustomerName DESC;


The following SQL statement shows the equivalent example for MySQL:
SELECT * FROM Customers
ORDER BY CustomerName DESC
LIMIT 3;

The following SQL statement shows the equivalent example for Oracle:
SELECT * FROM Customers
ORDER BY CustomerName DESC
FETCH FIRST 3 ROWS ONLY;

In [12]:
# The following SQL statement shows the equivalent example for MySQL:
cur.execute('''SELECT * FROM sqlw3s.Customers ORDER BY CustomerName DESC LIMIT 3;''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

Unnamed: 0,CustomerID,CustomerName,ContactName,Address,City,PostalCode,Country
0,91,Wolski,Zbyszek,ul. Filtrowa 68,Walla,01-012,Poland
1,90,Wilman Kala,Matti Karttunen,Keskuskatu 45,Helsinki,21240,Finland
2,89,White Clover Markets,Karl Jablonski,305 - 14th Ave. S. Suite 3B,Seattle,98128,USA


In [18]:
#nejde, zkus nejak
cur.execute('''SELECT * FROM sqlw3s.Customers ORDER BY CustomerName LIMIT FLOOR((SELECT COUNT(*) FROM sqlw3s.Customers)*0.1);''')
df = pd.DataFrame(tuple(t) for t in cur.fetchall())
df.columns = [i[0] for i in cur.description]
df

OperationalError: (1327, 'Undeclared variable: FLOOR')