In [1]:
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('eiti_data.db')

# Create a cursor object using the cursor() method
cursor = conn.cursor()

In [2]:
import pandas as pd

# Load the data into a pandas DataFrame
data_df = pd.read_csv('data/consolidated/Part 5 - Company data.csv')

# Use pandas to write the data from pandas DataFrame to the SQLite table
data_df.to_sql('ledger_detailed_data', conn, if_exists='append', index=False)

31882

In [3]:
# Query the table to retrieve the first few rows
cursor.execute('SELECT * FROM ledger_detailed_data LIMIT 5;')

# Fetch and print the results
results = cursor.fetchall()
for row in results:
    print(row)

('North Coal Enterprise (NCE)', 'Ministry of Mines and Petroleum (Revenue Department)', 'Royalties', 'Yes', 'Yes', 'EXP 1/2014', 'AFN', '442801100', 'No', 'Not applicable', 'Not applicable', '2018-09-22', 'Afghanistan', 'AFG', 2018, '2017-12-21', '2018-12-20')
('North Coal Enterprise (NCE)', 'Ministry of Mines and Petroleum (Revenue Department)', 'Royalties', 'Yes', 'Yes', 'EXP 1/2014', 'AFN', '386169944', 'No', 'Not applicable', 'Not applicable', '2018-06-24', 'Afghanistan', 'AFG', 2018, '2017-12-21', '2018-12-20')
('North Coal Enterprise (NCE)', 'Ministry of Mines and Petroleum (Revenue Department)', 'Royalties', 'Yes', 'Yes', 'EXP 1/2014', 'AFN', '336623658', 'No', 'Not applicable', 'Not applicable', '2018-04-18', 'Afghanistan', 'AFG', 2018, '2017-12-21', '2018-12-20')
('North Coal Enterprise (NCE)', 'Ministry of Mines and Petroleum (Revenue Department)', 'Royalties', 'Yes', 'Yes', 'EXP 1/2014', 'AFN', '300000000', 'No', 'Not applicable', 'Not applicable', '2018-11-19', 'Afghanistan

In [4]:
# Load and insert Government Revenues data
gov_revenue_df = pd.read_csv('data/consolidated/Part 4 - Government revenues.csv')
gov_revenue_df.to_sql('ledger_government_revenue', conn, if_exists='replace', index=False)

2319

In [9]:
# Load and insert Reporting Projects data
projects_df = pd.read_csv('data/consolidated/Part 3 - Reporting projects\' list.csv')
projects_df.to_sql('ledger_reporting_projects', conn, if_exists='replace', index=False)

4979

In [10]:
# Load and insert Reporting Government Entities data
gov_entities_df = pd.read_csv('data/consolidated/Part 3 - Reporting government entities list.csv')
gov_entities_df.to_sql('ledger_reporting_government_entities', conn, if_exists='replace', index=False)

547

In [13]:
# Load and insert Reporting Companies data
companies_df = pd.read_csv('data/consolidated/Part 3 - Reporting companies\' list.csv')
companies_df.to_sql('ledger_reporting_companies', conn, if_exists='replace', index=False)

3792

In [19]:
# Define the SQL query for creating the view
sql_create_view = '''
CREATE VIEW IF NOT EXISTS ledger_full_company_data AS
SELECT 
    d.*,
    c."Company type" AS Company_type,
    c."Company ID number" AS Company_ID_number,
    c.Sector,
    c."Commodities (comma-seperated)" AS Commodities,
    c."Stock exchange listing or company website" AS Stock_exchange_listing_or_company_website,
    c."Audited financial statement (or balance sheet, cash flows, profit/loss statement if unavailable)" AS Audited_financial_statement,
    c."Payments to Governments Report" AS Payments_to_Governments_Report,
    c.Country AS Reporting_Country,
    c."ISO Code" AS Reporting_ISO_Code,
    c.Year AS Reporting_Year,
    c."Start Date" AS Reporting_Start_Date,
    c."End Date" AS Reporting_End_Date
FROM 
    ledger_detailed_data d
JOIN 
    ledger_reporting_companies c ON d.Company = c."Full company name"
WHERE 
    c."Company type" = 'State-owned enterprises & public corporations';
'''

# Execute the SQL command
cursor.execute(sql_create_view)
conn.commit()  # Commit the transaction

In [40]:
# Query to find all table names in the database
query = "SELECT name FROM sqlite_master WHERE type='table';"

# Execute the query
cursor.execute(query)

# Fetch all results
tables = cursor.fetchall()

# Print the list of tables
for table in tables:
    print(table[0])

ledger_detailed_data
ledger_government_revenue
ledger_reporting_projects
ledger_reporting_government_entities
ledger_reporting_companies


In [24]:
# Check the columns of the table
cursor.execute("PRAGMA table_info(ledger_reporting_companies);")
columns = cursor.fetchall()
for column in columns:
    print(column)

(0, 'Full company name', 'TEXT', 0, None, 0)
(1, 'Company type', 'TEXT', 0, None, 0)
(2, 'Company ID number', 'TEXT', 0, None, 0)
(3, 'Sector', 'TEXT', 0, None, 0)
(4, 'Commodities (comma-seperated)', 'TEXT', 0, None, 0)
(5, 'Stock exchange listing or company website', 'TEXT', 0, None, 0)
(6, 'Audited financial statement (or balance sheet, cash flows, profit/loss statement if unavailable)', 'TEXT', 0, None, 0)
(7, 'Payments to Governments Report', 'TEXT', 0, None, 0)
(8, 'Country', 'TEXT', 0, None, 0)
(9, 'ISO Code', 'TEXT', 0, None, 0)
(10, 'Year', 'INTEGER', 0, None, 0)
(11, 'Start Date', 'TEXT', 0, None, 0)
(12, 'End Date', 'TEXT', 0, None, 0)


In [25]:
# Check the columns of the table
cursor.execute("PRAGMA table_info(ledger_detailed_data);")
columns = cursor.fetchall()
for column in columns:
    print(column)

(0, 'Company', 'TEXT', 0, None, 0)
(1, 'Government entity', 'TEXT', 0, None, 0)
(2, 'Revenue stream name', 'TEXT', 0, None, 0)
(3, 'Levied on project (Y/N)', 'TEXT', 0, None, 0)
(4, 'Reported by project (Y/N)', 'TEXT', 0, None, 0)
(5, 'Project name', 'TEXT', 0, None, 0)
(6, 'Reporting currency', 'TEXT', 0, None, 0)
(7, 'Revenue value', 'TEXT', 0, None, 0)
(8, 'Payment made in-kind (Y/N)', 'TEXT', 0, None, 0)
(9, 'In-kind volume (if applicable)', 'TEXT', 0, None, 0)
(10, 'Unit (if applicable)', 'TEXT', 0, None, 0)
(11, 'Comments', 'TEXT', 0, None, 0)
(12, 'Country', 'TEXT', 0, None, 0)
(13, 'ISO Code', 'TEXT', 0, None, 0)
(14, 'Year', 'INTEGER', 0, None, 0)
(15, 'Start Date', 'TEXT', 0, None, 0)
(16, 'End Date', 'TEXT', 0, None, 0)


In [37]:
# Basic select query to check column recognition
try:
    cursor.execute('SELECT "Company type" FROM ledger_reporting_companies LIMIT 1')
    print("Column found:", cursor.fetchone())
except Exception as e:
    print("Error:", e)

Column found: ('Private',)


In [38]:
# Start by creating a simpler version of the view
sql_create_view = '''
CREATE VIEW IF NOT EXISTS ledger_full_company_data AS
SELECT 
    d.Company,
    c."Full company name",
    c."Company type"
FROM 
    ledger_detailed_data d
JOIN 
    ledger_reporting_companies c ON d.Company = c."Full company name"
WHERE 
    c."Company type" = 'State-owned enterprises & public corporations';
'''

try:
    cursor.execute(sql_create_view)
    conn.commit()
    print("View created successfully.")
except Exception as e:
    print("Error creating view:", e)


View created successfully.


In [41]:
# Drop the existing view if it exists
sql_drop_view = '''
DROP VIEW IF EXISTS ledger_full_company_data;
'''

# Define the updated SQL query for recreating the view
sql_create_view = '''
CREATE VIEW IF NOT EXISTS ledger_full_company_data AS
SELECT 
    d.*,
    c."Company type",
    c."Company ID number",
    c.Sector,
    -- Add more columns as needed
    c."Commodities (comma-separated)" AS Commodities,
    c."Stock exchange listing or company website" AS Stock_exchange_listing_or_company_website,
    c."Audited financial statement (or balance sheet, cash flows, profit/loss statement if unavailable)" AS Audited_financial_statement,
    c."Payments to Governments Report" AS Payments_to_Governments_Report,
    c.Country AS Reporting_Country,
    c."ISO Code" AS Reporting_ISO_Code,
    c.Year AS Reporting_Year,
    c."Start Date" AS Reporting_Start_Date,
    c."End Date" AS Reporting_End_Date
FROM 
    ledger_detailed_data d
JOIN 
    ledger_reporting_companies c ON d.Company = c."Full company name"
WHERE 
    c."Company type" = 'State-owned enterprises & public corporations';
'''

# Execute the drop and recreate commands
try:
    cursor.execute(sql_drop_view)
    cursor.execute(sql_create_view)
    conn.commit()
    print("View recreated successfully with updated columns.")
except Exception as e:
    print("Error updating view:", e)

View recreated successfully with updated columns.


In [42]:
# SQL query to check if the view exists
sql_check_view = "SELECT name FROM sqlite_master WHERE type='view' AND name='ledger_full_company_data';"

# Execute the query
cursor.execute(sql_check_view)
result = cursor.fetchone()

# Check if the view exists
if result:
    print("The view 'ledger_full_company_data' exists.")
else:
    print("The view 'ledger_full_company_data' does not exist.")

The view 'ledger_full_company_data' exists.


In [43]:
# Close the cursor and the connection
cursor.close()
conn.close()