In [19]:
from pathlib import Path
ROOT = Path.cwd()
(DATA := ROOT/"data").mkdir(exist_ok=True)
(QUERY_DIR := ROOT/"sql"/"queries").mkdir(parents=True, exist_ok=True)
(RESULT_DIR := ROOT/"sql"/"results").mkdir(parents=True, exist_ok=True)
print("Dirs:", DATA, QUERY_DIR, RESULT_DIR, sep="\n")

Dirs:
D:\projects\ds-bootcamp\data
D:\projects\ds-bootcamp\sql\queries
D:\projects\ds-bootcamp\sql\results


In [20]:
import urllib.request, shutil

db_url = "https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
db_path = DATA / "Chinook_Sqlite.sqlite"

if not db_path.exists():
    with urllib.request.urlopen(db_url) as r, open(db_path, "wb") as f:
        shutil.copyfileobj(r, f)

# Hızlı bütünlük kontrolü: header "SQLite format 3\0" olmalı
with open(db_path, "rb") as f:
    header = f.read(16)
print("DB:", db_path, "| Header:", header)

DB: D:\projects\ds-bootcamp\data\Chinook_Sqlite.sqlite | Header: b'SQLite format 3\x00'


In [25]:
import sqlite3, pandas as pd

def run_sql(sql: str, params=None):
    with sqlite3.connect(db_path) as con:
        return pd.read_sql_query(sql, con, params=params)

def run_sql_file_to_csv(sql_file, out_csv):
    sql_text = Path(sql_file).read_text(encoding="utf-8")
    df = run_sql(sql_text)
    Path(out_csv).parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(out_csv, index=False)
    return df

run_sql("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")

Unnamed: 0,name
0,Album
1,Artist
2,Customer
3,Employee
4,Genre
5,Invoice
6,InvoiceLine
7,MediaType
8,Playlist
9,PlaylistTrack


In [22]:
# Q1
(QUERY_DIR/"q01_top_artists.sql").write_text("""-- Top 5 artists by revenue
SELECT ar.Name AS artist,
       SUM(il.UnitPrice * il.Quantity) AS revenue
FROM InvoiceLine il
JOIN Track t   ON t.TrackId = il.TrackId
JOIN Album al  ON al.AlbumId = t.AlbumId
JOIN Artist ar ON ar.ArtistId = al.ArtistId
GROUP BY 1
ORDER BY revenue DESC
LIMIT 5;""", encoding="utf-8")

# Q2
(QUERY_DIR/"q02_rev_by_country.sql").write_text("""-- Revenue and order count by country
SELECT BillingCountry AS country,
       COUNT(DISTINCT InvoiceId) AS orders,
       SUM(Total) AS revenue
FROM Invoice
GROUP BY 1
ORDER BY revenue DESC;""", encoding="utf-8")

# Q3
(QUERY_DIR/"q03_monthly_revenue.sql").write_text("""-- Monthly revenue trend (SQLite)
SELECT strftime('%Y-%m-01', InvoiceDate) AS month,
       SUM(Total) AS revenue
FROM Invoice
GROUP BY 1
ORDER BY 1;""", encoding="utf-8")

# Koştur ve CSV’leri üret
for sql_name in ["q01_top_artists.sql", "q02_rev_by_country.sql", "q03_monthly_revenue.sql"]:
    sql_file = QUERY_DIR / sql_name
    out_csv = RESULT_DIR / (sql_file.stem + ".csv")
    df = run_sql_file_to_csv(sql_file, out_csv)
    print("Saved:", out_csv, "| rows:", len(df))

Saved: D:\projects\ds-bootcamp\sql\results\q01_top_artists.csv | rows: 5
Saved: D:\projects\ds-bootcamp\sql\results\q02_rev_by_country.csv | rows: 24
Saved: D:\projects\ds-bootcamp\sql\results\q03_monthly_revenue.csv | rows: 60


In [23]:
import pandas as pd
pd.read_csv(RESULT_DIR/"q02_rev_by_country.csv").head()

Unnamed: 0,country,orders,revenue
0,USA,91,523.06
1,Canada,56,303.96
2,France,35,195.1
3,Brazil,35,190.1
4,Germany,28,156.48


In [24]:
import sqlite3, pandas as pd
from pathlib import Path

db_path = "data/Chinook_Sqlite.sqlite"
sql_file = Path("sql/queries/q02_rev_by_country.sql")

with sqlite3.connect(db_path) as con:
    query = sql_file.read_text(encoding="utf-8")
    df = pd.read_sql_query(query, con)

df

Unnamed: 0,country,orders,revenue
0,USA,91,523.06
1,Canada,56,303.96
2,France,35,195.1
3,Brazil,35,190.1
4,Germany,28,156.48
5,United Kingdom,21,112.86
6,Czech Republic,14,90.24
7,Portugal,14,77.24
8,India,13,75.26
9,Chile,7,46.62
