## Bibliotecas Necessárias

In [1]:
import sqlite3

## Script de Criação da Tabela e DB

In [47]:
# CRIA CONEXÃO COM O BANCO DE DADOS SQLite3 EXISTENTE, CASO NÃO EXISTA, CRIA UM NOVO DB

def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Exception as e:
        print(e)
    return conn

conn = create_connection('scraping.db')

try:
    cur = conn.cursor()
    cur.execute('''CREATE TABLE IF NOT EXISTS pastebin 
                (id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
                page_link TEXT NOT NULL, word_count INT NOT NULL,
                word_diff_count INT NOT NULL, syntax TEXT NOT NULL,
                pickup_time TEXT NOT NULL);''')
    conn.commit()
except sqlite3.DatabaseError as e:
    conn.rollback()
    raise e
finally:
    conn.close()

## Visualizando os Dados da Tabela

In [2]:
# SELECIONA OS 'N' PRIMEIROS REGISTROS DA TABELA

def select_head(conn, table, n=5):
    cur = conn.cursor()
    try:
        cur.execute(f'SELECT * FROM {table} LIMIT {n}')
    except sqlite3.DatabaseError as e:
        print(e)
    rows = cur.fetchall()
    for row in rows:
        print(row)

In [11]:
conn = create_connection('scraping.db')

select_head(conn, 'pastebin', n = 10) # N = x (opcional)

conn.close()

(1, 'https://pastebin.com/GhXcvGiJ', 8366, 609, 'Python', '2022-06-17 01:53:04')
(2, 'https://pastebin.com/gKUymEWP', 2893, 516, 'Python', '2022-06-17 01:53:06')
(3, 'https://pastebin.com/nVmcbXPZ', 8499, 698, 'Python', '2022-06-17 01:53:08')
(4, 'https://pastebin.com/8ZHLNL3r', 2374, 493, 'Python', '2022-06-17 01:53:09')
(5, 'https://pastebin.com/A4PychBN', 2498, 494, 'Python', '2022-06-17 01:53:10')
(6, 'https://pastebin.com/Vy4zRFDg', 2775, 484, 'Python', '2022-06-17 01:53:12')
(7, 'https://pastebin.com/pmJgeT2x', 2276, 471, 'Python', '2022-06-17 01:53:13')
(8, 'https://pastebin.com/w263dtGx', 2336, 478, 'Python', '2022-06-17 01:53:14')
(9, 'https://pastebin.com/5zq9KAPK', 2594, 500, 'Python', '2022-06-17 01:53:15')
(10, 'https://pastebin.com/46pAxJuV', 2106, 486, 'Python', '2022-06-17 01:53:17')


## Testando Obter os Dados da Tabela do DB

In [12]:
# OBTENDO DADOS DA BASE DE DADOS

def get_data(conn, table):
    cur = conn.cursor()
    try:
        cur.execute(f'SELECT * FROM {table}')
    except sqlite3.DatabaseError as e:
        print(e)
    rows = cur.fetchall()
    return rows

In [27]:
conn = create_connection('scraping.db')

data = get_data(conn, 'pastebin')

conn.close()

In [28]:
type(data)

list

In [29]:
print(data)

[(1, 'https://pastebin.com/GhXcvGiJ', 8366, 609, 'Python', '2022-06-17 01:53:04'), (2, 'https://pastebin.com/gKUymEWP', 2893, 516, 'Python', '2022-06-17 01:53:06'), (3, 'https://pastebin.com/nVmcbXPZ', 8499, 698, 'Python', '2022-06-17 01:53:08'), (4, 'https://pastebin.com/8ZHLNL3r', 2374, 493, 'Python', '2022-06-17 01:53:09'), (5, 'https://pastebin.com/A4PychBN', 2498, 494, 'Python', '2022-06-17 01:53:10'), (6, 'https://pastebin.com/Vy4zRFDg', 2775, 484, 'Python', '2022-06-17 01:53:12'), (7, 'https://pastebin.com/pmJgeT2x', 2276, 471, 'Python', '2022-06-17 01:53:13'), (8, 'https://pastebin.com/w263dtGx', 2336, 478, 'Python', '2022-06-17 01:53:14'), (9, 'https://pastebin.com/5zq9KAPK', 2594, 500, 'Python', '2022-06-17 01:53:15'), (10, 'https://pastebin.com/46pAxJuV', 2106, 486, 'Python', '2022-06-17 01:53:17'), (11, 'https://pastebin.com/8j17Vn8p', 2159, 499, 'Python', '2022-06-17 01:53:18'), (12, 'https://pastebin.com/04DFuCL2', 3408, 519, 'Python', '2022-06-17 01:53:19'), (13, 'https:

## Transformando a Lista em DF do Pandas

In [35]:
import pandas as pd

df = pd.DataFrame(data, columns=['id','link page','word count','different words','syntax','timestamp'])

df

Unnamed: 0,id,link page,word count,different words,syntax,timestamp
0,1,https://pastebin.com/GhXcvGiJ,8366,609,Python,2022-06-17 01:53:04
1,2,https://pastebin.com/gKUymEWP,2893,516,Python,2022-06-17 01:53:06
2,3,https://pastebin.com/nVmcbXPZ,8499,698,Python,2022-06-17 01:53:08
3,4,https://pastebin.com/8ZHLNL3r,2374,493,Python,2022-06-17 01:53:09
4,5,https://pastebin.com/A4PychBN,2498,494,Python,2022-06-17 01:53:10
...,...,...,...,...,...,...
345,346,https://pastebin.com/s5nRabz5,3538,534,Lua,2022-06-17 02:01:14
346,347,https://pastebin.com/GtrdYhkS,4761,580,Lua,2022-06-17 02:01:15
347,348,https://pastebin.com/g8gaSN7f,5094,542,Lua,2022-06-17 02:01:18
348,349,https://pastebin.com/dwe0b0iN,9624,596,Lua,2022-06-17 02:01:20
