# Заливка данных SQLite3

#### Импорт библиотек

In [1]:
import pandas as pd
import numpy as np
import sqlite3

##### Проверка версии sqlite3 (должна быть больше, чем 3.25)

In [2]:
sqlite3.sqlite_version

'3.31.1'

#### Подключение к БД

In [3]:
con = sqlite3.connect('db')

#### Чтение данных

https://drive.google.com/file/d/1cUYNwlHjUZ6gZZMSFBdX0NCnvgRTprsQ/view?usp=sharing

In [4]:
! gdown --id 1cUYNwlHjUZ6gZZMSFBdX0NCnvgRTprsQ

Downloading...
From: https://drive.google.com/uc?id=1cUYNwlHjUZ6gZZMSFBdX0NCnvgRTprsQ
To: /content/german_credit_augmented.csv
100% 69.6k/69.6k [00:00<00:00, 76.3MB/s]


In [5]:
df = pd.read_csv('german_credit_augmented.csv')

In [6]:
df.head()

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358


#### Форматирование даты из csv

In [7]:
df.contract_dt = pd.to_datetime(df.contract_dt, format='%Y-%m-%d %H:%M:%S')

In [8]:
df.dtypes

age                          int64
sex                         object
job                          int64
housing                     object
saving_accounts             object
checking_account            object
credit_amount                int64
duration                     int64
purpose                     object
default                      int64
contract_dt         datetime64[ns]
client_id                    int64
dtype: object

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   age               1000 non-null   int64         
 1   sex               1000 non-null   object        
 2   job               1000 non-null   int64         
 3   housing           1000 non-null   object        
 4   saving_accounts   817 non-null    object        
 5   checking_account  606 non-null    object        
 6   credit_amount     1000 non-null   int64         
 7   duration          1000 non-null   int64         
 8   purpose           1000 non-null   object        
 9   default           1000 non-null   int64         
 10  contract_dt       1000 non-null   datetime64[ns]
 11  client_id         1000 non-null   int64         
dtypes: datetime64[ns](1), int64(6), object(5)
memory usage: 93.9+ KB


#### Загрузка данных в БД

In [10]:
df.to_sql('german_credit', con, index=False, if_exists='replace')

1000

##### Вспомогательная функция для чтения из БД

In [11]:
def select(sql):
    return pd.read_sql(sql, con)

## Селект к таблице

In [12]:
sql = '''select t.* from german_credit t limit 5'''
# Запрос на чтение 5 строк

In [13]:
select(sql)
# Выполнение запроса

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358


In [14]:
t = select(sql)

In [15]:
t

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358


### Подключение к гугл драйву

In [16]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [17]:
! ls '/content/drive/MyDrive/JN/Data/'

german_credit.db


#### Сохранение БД на гугл диске

In [18]:
con = sqlite3.connect('/content/drive/MyDrive/JN/Data/german_credit.db')

In [19]:
df.to_sql('german_credit', con, index=False, if_exists='replace')

1000

# Заливка данных POSTGRESQL
### elephantsql.com + POSTGRESQL

Импортируем библиотеку

In [20]:
from sqlalchemy import create_engine
from sqlalchemy import text

1. Создаём бесплатный сервер на elephantsql.com и копируем его URL
2. Создаём подключение (conn) для хранения там БД
3. Вписываем URL по образцу ('postgresql+psycopg2://kcycbfwg:dY5daXDri_sESFSTu8zZKFufD0GN2EBY@balarama.db.elephantsql.com/kcycbfwg') 

!!! ДО 2 СЛЕШЕЙ НИЧЕГО НЕ МЕНЯТЬ !!!

postgres://bjmfiovz:zW0g7vZ8iC29JOykwyp-qvjWAa2_2WSc@balarama.db.elephantsql.com/bjmfiovz

#### Создаём подключение к elephantsql

In [21]:
engine = create_engine('postgresql+psycopg2://bjmfiovz:zW0g7vZ8iC29JOykwyp-qvjWAa2_2WSc@balarama.db.elephantsql.com/bjmfiovz')

In [22]:
conn = engine.connect()

postgres://kcycbfwg:dY5daXDri_sESFSTu8zZKFufD0GN2EBY@balarama.db.elephantsql.com/kcycbfwg

In [23]:
df.head()

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   age               1000 non-null   int64         
 1   sex               1000 non-null   object        
 2   job               1000 non-null   int64         
 3   housing           1000 non-null   object        
 4   saving_accounts   817 non-null    object        
 5   checking_account  606 non-null    object        
 6   credit_amount     1000 non-null   int64         
 7   duration          1000 non-null   int64         
 8   purpose           1000 non-null   object        
 9   default           1000 non-null   int64         
 10  contract_dt       1000 non-null   datetime64[ns]
 11  client_id         1000 non-null   int64         
dtypes: datetime64[ns](1), int64(6), object(5)
memory usage: 93.9+ KB


In [25]:
df.dtypes

age                          int64
sex                         object
job                          int64
housing                     object
saving_accounts             object
checking_account            object
credit_amount                int64
duration                     int64
purpose                     object
default                      int64
contract_dt         datetime64[ns]
client_id                    int64
dtype: object

#### Для ускорения загрузки БД в Postgresql ↓

In [26]:
import csv
from io import StringIO

from sqlalchemy import create_engine

def psql_insert_copy(table, conn, keys, data_iter):
    # gets a DBAPI connection that can provide a cursor
    dbapi_conn = conn.connection
    with dbapi_conn.cursor() as cur:
        s_buf = StringIO()
        writer = csv.writer(s_buf)
        writer.writerows(data_iter)
        s_buf.seek(0)

        columns = ', '.join('"{}"'.format(k) for k in keys)
        if table.schema:
            table_name = '{}.{}'.format(table.schema, table.name)
        else:
            table_name = table.name

        sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
            table_name, columns)
        cur.copy_expert(sql=sql, file=s_buf)

Перед заливкой, загрузить только часть БД для проверки что всё ОК

(В данном случае 10 случайных строк)

In [27]:
df.sample(50).to_sql('german_credit', engine,index=False,if_exists='replace',method=psql_insert_copy)

Если после проверки всё ок, то можно заливать

In [28]:
df.to_sql('german_credit', engine,index=False,if_exists='replace',method=psql_insert_copy)

## Селект к таблице

In [29]:
def select_conn(query):
    return pd.read_sql(query, conn)

In [30]:
query = text('''select * from german_credit t''')

In [31]:
df1 = pd.read_sql_query(query, conn)

In [32]:
df1

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,43,male,1,own,little,,1533,18,furniture/equipment,1,2008-06-09 20:14:01,948
1,38,male,1,own,little,moderate,1308,15,repairs,0,2007-08-12 03:28:06,641
2,24,male,2,own,moderate,moderate,6560,48,car,1,2007-07-11 18:49:28,706
3,30,male,3,own,quite rich,,2333,30,radio/TV,0,2007-05-05 01:25:39,40
4,28,female,2,own,little,little,3660,24,radio/TV,0,2008-01-26 12:52:50,630
5,56,male,2,own,little,,618,12,radio/TV,0,2007-06-16 17:42:15,90
6,20,female,2,own,rich,moderate,1577,11,furniture/equipment,0,2007-07-03 09:23:24,167
7,37,female,2,own,little,moderate,3612,18,furniture/equipment,0,2007-12-31 02:58:16,537
8,31,male,2,own,,rich,3148,24,radio/TV,0,2008-04-06 05:34:53,718
9,28,male,2,own,little,,9572,36,business,1,2008-01-24 23:02:24,180


In [33]:
df2 = select_conn(query)

In [34]:
df2

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,43,male,1,own,little,,1533,18,furniture/equipment,1,2008-06-09 20:14:01,948
1,38,male,1,own,little,moderate,1308,15,repairs,0,2007-08-12 03:28:06,641
2,24,male,2,own,moderate,moderate,6560,48,car,1,2007-07-11 18:49:28,706
3,30,male,3,own,quite rich,,2333,30,radio/TV,0,2007-05-05 01:25:39,40
4,28,female,2,own,little,little,3660,24,radio/TV,0,2008-01-26 12:52:50,630
5,56,male,2,own,little,,618,12,radio/TV,0,2007-06-16 17:42:15,90
6,20,female,2,own,rich,moderate,1577,11,furniture/equipment,0,2007-07-03 09:23:24,167
7,37,female,2,own,little,moderate,3612,18,furniture/equipment,0,2007-12-31 02:58:16,537
8,31,male,2,own,,rich,3148,24,radio/TV,0,2008-04-06 05:34:53,718
9,28,male,2,own,little,,9572,36,business,1,2008-01-24 23:02:24,180


In [35]:
df3 = select_conn(query)

In [36]:
df3

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,43,male,1,own,little,,1533,18,furniture/equipment,1,2008-06-09 20:14:01,948
1,38,male,1,own,little,moderate,1308,15,repairs,0,2007-08-12 03:28:06,641
2,24,male,2,own,moderate,moderate,6560,48,car,1,2007-07-11 18:49:28,706
3,30,male,3,own,quite rich,,2333,30,radio/TV,0,2007-05-05 01:25:39,40
4,28,female,2,own,little,little,3660,24,radio/TV,0,2008-01-26 12:52:50,630
5,56,male,2,own,little,,618,12,radio/TV,0,2007-06-16 17:42:15,90
6,20,female,2,own,rich,moderate,1577,11,furniture/equipment,0,2007-07-03 09:23:24,167
7,37,female,2,own,little,moderate,3612,18,furniture/equipment,0,2007-12-31 02:58:16,537
8,31,male,2,own,,rich,3148,24,radio/TV,0,2008-04-06 05:34:53,718
9,28,male,2,own,little,,9572,36,business,1,2008-01-24 23:02:24,180
