Ingest Data

In [7]:
import pandas as pd

transactions = pd.read_excel(r'./transactions.xlsx')
collaterals = pd.read_excel(r'./collaterals.xlsx')
customers = pd.read_excel(r'./customers.xlsx')
sectors = pd.read_excel(r'./sectors.xlsx')

transactions["REF_DATE"] = transactions["REF_DATE"].dt.strftime('%Y-%m-%d') 
transactions["END_DATE"] = transactions["END_DATE"].dt.strftime('%Y-%m-%d') 
collaterals["REF_DATE"] = collaterals["REF_DATE"].dt.strftime('%Y-%m-%d') 
customers["REF_DATE"] = customers["REF_DATE"].dt.strftime('%Y-%m-%d')

In [8]:
# import pandas as pd

# transactions = pd.read_csv(r'./transactions.csv', sep=';')
# collaterals = pd.read_csv(r'./collateral.csv', sep=';')
# customers = pd.read_csv(r'./customers.csv', sep=';')
# sectors = pd.read_csv(r'./sectors.csv', sep=';')

# transactions["REF_DATE"] = pd.to_datetime(transactions["REF_DATE"], format = '%Y-%m-%d', errors = 'coerce')
# transactions["END_DATE"] = pd.to_datetime(transactions["END_DATE"], format = '%Y-%m-%d', errors = 'coerce')
# transactions["REF_DATE"] = transactions["REF_DATE"].dt.strftime('%Y-%m-%d') 
# transactions["END_DATE"] = transactions["END_DATE"].dt.strftime('%Y-%m-%d') 

# collaterals["REF_DATE"] = pd.to_datetime(collaterals["REF_DATE"], format = '%Y-%m-%d', errors = 'coerce')
# collaterals["REF_DATE"] = collaterals["REF_DATE"].dt.strftime('%Y-%m-%d') 

# customers["REF_DATE"] = pd.to_datetime(customers["REF_DATE"], format = '%Y-%m-%d', errors = 'coerce')
# customers["REF_DATE"] = customers["REF_DATE"].dt.strftime('%Y-%m-%d')

# transactions.to_sql('transactions', conn, if_exists='append', index = False)
# collaterals.to_sql('collaterals', conn, if_exists='append', index = False)
# customers.to_sql('customers', conn, if_exists='append', index = False)
# sectors.to_sql('sectors', conn, if_exists='append', index = False)

DataBase preparation (SQLite)

In [9]:
sql_statements = [ 
    """CREATE TABLE IF NOT EXISTS transactions (
            REF_DATE DATE NOT NULL,
            LOAN_ID TEXT NOT NULL,
            PARTNER_ID TEXT NOT NULL,
            COLL_ID TEXT,
            COLL_TYPE TEXT NOT NULL,
            PROCESSING TEXT NOT NULL,   
            CURRENCY TEXT,         
            REM_MTR INT NOT NULL,
            ORIG_MTR INT NOT NULL,
            END_DATE DATE,
            PRODUCT TEXT,
            EXPOSURE REAL,
            EAD REAL,
            LGD REAL,
            EL REAL,
            RWA REAL
        );""",

    """CREATE TABLE IF NOT EXISTS collaterals (
            REF_DATE DATE NOT NULL, 
            COLL_ID TEXT NOT NULL,    
            MKT_VALUE REAL NOT NULL
        );""",

    """CREATE TABLE IF NOT EXISTS customers (
            REF_DATE DATE NOT NULL,
            PARTNER_ID TEXT NOT NULL,
            STATUS TEXT NOT NULL,
            PD REAL NOT NULL,
            COUNTRY TEXT,
            RATING_MODEL TEXT NOT NULL,
            NACE TEXT NOT NULL,
            COMPANY_SIZE REAL
        );""",

    """CREATE TABLE IF NOT EXISTS sectors (
            NACE TEXT,
            SECTOR TEXT NOT NULL
        );"""
]

In [10]:
import sqlite3

conn = sqlite3.connect('credit-risk.db', detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES)
cursor = conn.cursor()

for statement in sql_statements:
    cursor.execute(statement)

conn.commit()

In [11]:
transactions.to_sql('transactions', conn, if_exists='append', index = False)
collaterals.to_sql('collaterals', conn, if_exists='append', index = False)
customers.to_sql('customers', conn, if_exists='append', index = False)
sectors.to_sql('sectors', conn, if_exists='append', index = False)

823

In [12]:
conn.commit()
cursor.close()
conn.close()

In [13]:
from sqlite3 import connect
from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:///credit-risk.db", sample_rows_in_table_info = 2)

print(db.table_info)


CREATE TABLE collaterals (
	"REF_DATE" DATE NOT NULL, 
	"COLL_ID" TEXT NOT NULL, 
	"MKT_VALUE" REAL NOT NULL
)

/*
2 rows from collaterals table:
REF_DATE	COLL_ID	MKT_VALUE
2023-09-29	oehym174le7795wipiwu	7123.64
2023-09-29	cojvx420gk3315hmyhyc	2000.0
*/


CREATE TABLE customers (
	"REF_DATE" DATE NOT NULL, 
	"PARTNER_ID" TEXT NOT NULL, 
	"STATUS" TEXT NOT NULL, 
	"PD" REAL NOT NULL, 
	"COUNTRY" TEXT, 
	"RATING_MODEL" TEXT NOT NULL, 
	"NACE" TEXT NOT NULL, 
	"COMPANY_SIZE" REAL
)

/*
2 rows from customers table:
REF_DATE	PARTNER_ID	STATUS	PD	COUNTRY	RATING_MODEL	NACE	COMPANY_SIZE
2023-09-29	qvajo192sx6083	D	1.0	BG	FKBG21	4649	2.85765402
2023-09-29	jvxeh774ov6289	D	1.0	BG	FKBG20	4711	2.31925543
*/


CREATE TABLE sectors (
	"NACE" TEXT, 
	"SECTOR" TEXT NOT NULL
)

/*
2 rows from sectors table:
NACE	SECTOR
100	Agriculture, Forestry
110	Agriculture, Forestry
*/


CREATE TABLE transactions (
	"REF_DATE" DATE NOT NULL, 
	"LOAN_ID" TEXT NOT NULL, 
	"PARTNER_ID" TEXT NOT NULL, 
	"COLL_ID" TEX

Checks

In [14]:
import sqlite3
import pandas as pd

# pd.options.display.float_format = '{:.0f}'.format
# pd.options.display.float_format = '{:,}'.format
pd.options.display.max_columns = None

conn = sqlite3.connect('credit-risk.db')
c = conn.cursor()

In [15]:
list_tables = """
    SELECT name FROM sqlite_master
    WHERE type='table';
"""
c.execute(list_tables)
print(c.fetchall())

[('transactions',), ('collaterals',), ('customers',), ('sectors',)]


In [16]:
table = pd.read_sql('''
            SELECT * FROM transactions
            ''', 
            conn
)
table.head()

Unnamed: 0,REF_DATE,LOAN_ID,PARTNER_ID,COLL_ID,COLL_TYPE,PROCESSING,CURRENCY,REM_MTR,ORIG_MTR,END_DATE,PRODUCT,EXPOSURE,EAD,LGD,EL,RWA
0,2023-09-29,8798820044shar41,pdzti292tt5069,,UNS,CE,BGN,671,741,2025-07-31,Credit Cards,1827.65,1827.65,0.691078,490.08,7736.18
1,2023-09-29,9668394468ofqc94,pdzti292tt5069,,UNS,OB,BGN,671,741,2025-07-31,Credit Cards,3352.67,1247.25,0.691078,334.45,5279.43
2,2023-09-29,6697399302ozuo33,tfxwh972by3061,,UNS,CE,BGN,123,365,2024-01-30,Overdraft,0.5,0.5,0.538128,0.27,0.42
3,2023-09-29,6865171360ufsw44,tfxwh972by3061,,UNS,OB,BGN,123,365,2024-01-30,Overdraft,0.0,0.0,0.538128,0.0,0.01
4,2023-09-29,2542071158vsau86,divst614td7275,,UNS,CE,BGN,274,365,2024-06-29,Overdraft,3.69,3.69,0.505866,1.87,1.28


In [None]:
print(table.head().to_markdown())

|    | DATE       | LOAN_ID          | DATE_PARTNER         | PARTNER_ID     | DATE_COLLATERAL   | COLLATERAL_ID   | EXPOSURE_TYPE   | COLLATERAL_TYPE   |   REMAINING_MATURITY_DAYS | CURRENCY   |   ORIGINAL_MATURITY_DAYS | END_DATE   | PRODUCT                     |   EXPOSURE |     EAD |      LGD |     RWA |   EXPECTED_LOSS |
|---:|:-----------|:-----------------|:---------------------|:---------------|:------------------|:----------------|:----------------|:------------------|--------------------------:|:-----------|-------------------------:|:-----------|:----------------------------|-----------:|--------:|---------:|--------:|----------------:|
|  0 | 2023-09-29 | 8798820044shar41 | 45198_pdzti292tt5069 | pdzti292tt5069 |                   |                 | CREDIT          | UNSECURED         |                       671 | BGN        |                      741 | 2025-07-31 | Credit Cards                |    1827.65 | 1827.65 | 0.691078 | 7736.18 |          490.08 |
|  1 | 2023-09-29 | 9668394468ofqc94 | 45198_pdzti292tt5069 | pdzti292tt5069 |                   |                 | OFF_BALANCE     | UNSECURED         |                       671 | BGN        |                      741 | 2025-07-31 | Credit Cards                |    3352.67 | 1247.25 | 0.691078 | 5279.43 |          334.45 |
|  2 | 2023-09-29 | 6697399302ozuo33 | 45198_tfxwh972by3061 | tfxwh972by3061 |                   |                 | CREDIT          | UNSECURED         |                       123 | BGN        |                      365 | 2024-01-30 | Current accounts corporates |       0.5  |    0.5  | 0.538128 |    0.42 |            0.27 |
|  3 | 2023-09-29 | 6865171360ufsw44 | 45198_tfxwh972by3061 | tfxwh972by3061 |                   |                 | OFF_BALANCE     | UNSECURED         |                       123 | BGN        |                      365 | 2024-01-30 | Current accounts corporates |       0    |    0    | 0.538128 |    0.01 |            0    |
|  4 | 2023-09-29 | 2542071158vsau86 | 45198_divst614td7275 | divst614td7275 |                   |                 | CREDIT          | UNSECURED         |                       274 | BGN        |                      365 | 2024-06-29 | Current accounts corporates |       3.69 |    3.69 | 0.505866 |    1.28 |            1.87 |

In [None]:
print(table.head().to_html())

In [None]:
q = """
    SELECT PRODUCT FROM transactions GROUP BY PRODUCT;
"""
c.execute(q)
print(c.fetchall())

In [None]:
q = """
    SELECT 
		CASE WHEN PRODUCT = 'Consumer loan' THEN 'Consumer loan' 
		WHEN PRODUCT = 'Real estate loan' THEN 'Real estate loan' 
		ELSE 'Other' 
		END AS Category, 
		SUM(EXPOSURE) AS Total_Exposure 
		FROM transactions 
		WHERE DATE LIKE '2023-06%' 
		GROUP BY Category;
"""
c.execute(q)
print(c.fetchall())

In [None]:
result = pd.read_sql('''
            SELECT SUM(t.EXPOSURE) AS total_exposure
            FROM transactions t
            JOIN customers c ON t.PARTNER_ID = c.PARTNER_ID
            JOIN industries i ON c.NACE = i.NACE
            WHERE t.DATE LIKE '2023-09%'
            AND c.DATE LIKE '2023-09%'
            AND t.COLLATERAL_TYPE = 'RRE'
            AND i.INDUSTRY = 'Consumer Goods'
            '''
            , 
            conn
)
result