# Chapter 7: Reading and Writing Data

In [1]:
import polars as pl
pl.show_versions()

--------Version info---------
Polars:               0.20.7
Index type:           UInt32
Platform:             macOS-12.5-arm64-arm-64bit
Python:               3.11.7 (main, Jan 16 2024, 14:42:22) [Clang 14.0.0 (clang-1400.0.29.202)]

----Optional dependencies----
adbc_driver_manager:  0.8.0
cloudpickle:          3.0.0
connectorx:           0.3.2
deltalake:            0.15.0
fsspec:               2023.12.2
gevent:               23.9.1
hvplot:               0.9.2
matplotlib:           3.8.2
numpy:                1.26.3
openpyxl:             3.1.2
pandas:               2.1.4
pyarrow:              14.0.2
pydantic:             2.5.3
pyiceberg:            0.5.1
pyxlsb:               <not installed>
sqlalchemy:           2.0.25
xlsx2csv:             0.8.2
xlsxwriter:           3.1.9


## Reading CSV Files

In [3]:
penguins = pl.read_csv("data/penguins.csv")
penguins

rowid,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
i64,str,str,str,str,str,str,str,i64
1,"""Adelie""","""Torgersen""","""39.1""","""18.7""","""181""","""3750""","""male""",2007
2,"""Adelie""","""Torgersen""","""39.5""","""17.4""","""186""","""3800""","""female""",2007
3,"""Adelie""","""Torgersen""","""40.3""","""18""","""195""","""3250""","""female""",2007
4,"""Adelie""","""Torgersen""","""NA""","""NA""","""NA""","""NA""","""NA""",2007
5,"""Adelie""","""Torgersen""","""36.7""","""19.3""","""193""","""3450""","""female""",2007
6,"""Adelie""","""Torgersen""","""39.3""","""20.6""","""190""","""3650""","""male""",2007
7,"""Adelie""","""Torgersen""","""38.9""","""17.8""","""181""","""3625""","""female""",2007
8,"""Adelie""","""Torgersen""","""39.2""","""19.6""","""195""","""4675""","""male""",2007
9,"""Adelie""","""Torgersen""","""34.1""","""18.1""","""193""","""3475""","""NA""",2007
10,"""Adelie""","""Torgersen""","""42""","""20.2""","""190""","""4250""","""NA""",2007


## Parsing Missing Values Correctly

In [5]:
penguins = pl.read_csv("data/penguins.csv", null_values="NA")
penguins

rowid,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
i64,str,str,f64,f64,i64,i64,str,i64
1,"""Adelie""","""Torgersen""",39.1,18.7,181,3750,"""male""",2007
2,"""Adelie""","""Torgersen""",39.5,17.4,186,3800,"""female""",2007
3,"""Adelie""","""Torgersen""",40.3,18.0,195,3250,"""female""",2007
4,"""Adelie""","""Torgersen""",,,,,,2007
5,"""Adelie""","""Torgersen""",36.7,19.3,193,3450,"""female""",2007
6,"""Adelie""","""Torgersen""",39.3,20.6,190,3650,"""male""",2007
7,"""Adelie""","""Torgersen""",38.9,17.8,181,3625,"""female""",2007
8,"""Adelie""","""Torgersen""",39.2,19.6,195,4675,"""male""",2007
9,"""Adelie""","""Torgersen""",34.1,18.1,193,3475,,2007
10,"""Adelie""","""Torgersen""",42.0,20.2,190,4250,,2007


In [6]:
(
    penguins
    .null_count()
    .transpose(include_header=True, column_names=["null_count"]) # <1>
)

column,null_count
str,u32
"""rowid""",0
"""species""",0
"""island""",0
"""bill_length_mm…",2
"""bill_depth_mm""",2
"""flipper_length…",2
"""body_mass_g""",2
"""sex""",11
"""year""",0


## Reading Files with Encodings Other than UTF-8

In [8]:
pl.read_csv("data/directors.csv")

ComputeError: could not parse `����` as dtype `str` at column 'name' (column number 1)

The current offset in the file is 19 bytes.

You might want to try:
- increasing `infer_schema_length` (e.g. `infer_schema_length=10000`),
- specifying correct dtype with the `dtypes` argument
- setting `ignore_errors` to `True`,
- adding `����` to the `null_values` list.

Original error: ```invalid utf-8 sequence```

In [9]:
pl.read_csv("data/directors.csv", encoding="EUC-CN")

name,born,country
str,i64,str
"""考侯""",1930,"""泣塑"""
"""Verhoeven""",1938,"""オランダ"""
"""弟宏""",1942,"""泣塑"""
"""Tarantino""",1963,"""势柜"""


In [10]:
import chardet

def detect_encoding(filename: str) -> str:
    """Return the most probable character encoding for a file."""

    with open(filename, "rb") as f:
        raw_data = f.read()
        result = chardet.detect(raw_data)
        return result["encoding"]


detect_encoding("data/directors.csv")

'EUC-JP'

In [11]:
pl.read_csv("data/directors.csv", encoding="EUC-JP")

name,born,country
str,i64,str
"""深作""",1930,"""日本"""
"""Verhoeven""",1938,"""オランダ"""
"""宮崎""",1942,"""日本"""
"""Tarantino""",1963,"""米国"""


## Reading Excel Spreadsheets

In [13]:
songs_fixed = pl.read_excel(
    "data/top2000-2023.xlsx", read_options={"skip_rows_after_header": 1}
)
songs_fixed

positie,titel,artiest,jaar
i64,str,str,i64
1,"""Bohemian Rhaps…","""Queen""",1975
2,"""Roller Coaster…","""Danny Vera""",2019
3,"""Hotel Californ…","""Eagles""",1977
4,"""Piano Man""","""Billy Joel""",1974
5,"""Fix You""","""Coldplay""",2005
6,"""Stairway To He…","""Led Zeppelin""",1971
7,"""Black""","""Pearl Jam""",1992
8,"""Avond""","""Boudewijn de G…",1997
9,"""Nothing Else M…","""Metallica""",1992
10,"""Love Of My Lif…","""Queen""",1975


## Working with Multiple Files

In [15]:
pl.read_csv("data/stock/nvda/201[0-9].csv")

symbol,date,open,high,low,close,adj close,volume
str,str,f64,f64,f64,f64,f64,i64
"""NVDA""","""2010-01-04""",4.6275,4.655,4.5275,4.6225,4.240429,80020400
"""NVDA""","""2010-01-05""",4.605,4.74,4.605,4.69,4.30235,72864800
"""NVDA""","""2010-01-06""",4.6875,4.73,4.6425,4.72,4.32987,64916800
"""NVDA""","""2010-01-07""",4.695,4.715,4.5925,4.6275,4.245015,54779200
"""NVDA""","""2010-01-08""",4.59,4.67,4.5625,4.6375,4.254189,47816800
"""NVDA""","""2010-01-11""",4.6625,4.6825,4.5075,4.5725,4.194561,55661200
"""NVDA""","""2010-01-12""",4.505,4.5225,4.3225,4.4175,4.052372,62743200
"""NVDA""","""2010-01-13""",4.4475,4.4925,4.275,4.4775,4.107413,50886800
"""NVDA""","""2010-01-14""",4.4225,4.455,4.3325,4.4075,4.0432,60852400
"""NVDA""","""2010-01-15""",4.375,4.4125,4.22,4.2775,3.923946,81819200


In [16]:
all_stocks = pl.read_csv("data/stock/*/*.csv")
all_stocks

symbol,date,open,high,low,close,adj close,volume
str,str,f64,f64,f64,f64,f64,i64
"""ASML""","""1999-01-04""",11.765625,12.28125,11.765625,12.140625,7.522523,1801867
"""ASML""","""1999-01-05""",11.859375,14.25,11.71875,13.96875,8.655257,8241600
"""ASML""","""1999-01-06""",14.25,17.601563,14.203125,16.875,10.456018,16400267
"""ASML""","""1999-01-07""",14.742188,17.8125,14.53125,16.851563,10.441495,17722133
"""ASML""","""1999-01-08""",16.078125,16.289063,15.023438,15.796875,9.787995,10696000
"""ASML""","""1999-01-11""",15.046875,15.515625,14.671875,15.234375,9.439458,4037867
"""ASML""","""1999-01-12""",14.953125,15.234375,14.753906,14.765625,9.149015,1118400
"""ASML""","""1999-01-13""",13.6875,16.007813,13.640625,15.796875,9.787995,5788267
"""ASML""","""1999-01-14""",15.609375,15.796875,14.976563,15.28125,9.468504,4803733
"""ASML""","""1999-01-15""",15.070313,16.40625,15.046875,16.21875,10.049394,4847733


In [17]:
import calendar

filenames = [
    f"data/stock/asml/{year}.csv"
    for year in range(1999, 2024)
    if calendar.isleap(year)
]

filenames

['data/stock/asml/2000.csv',
 'data/stock/asml/2004.csv',
 'data/stock/asml/2008.csv',
 'data/stock/asml/2012.csv',
 'data/stock/asml/2016.csv',
 'data/stock/asml/2020.csv']

In [18]:
pl.concat(pl.read_csv(f) for f in filenames)

symbol,date,open,high,low,close,adj close,volume
str,str,f64,f64,f64,f64,f64,i64
"""ASML""","""2000-01-03""",43.875,43.875,41.90625,43.640625,27.040424,1121600
"""ASML""","""2000-01-04""",41.953125,42.5625,40.59375,40.734375,25.239666,968800
"""ASML""","""2000-01-05""",39.28125,39.703125,37.757813,39.609375,24.542597,1458133
"""ASML""","""2000-01-06""",36.75,37.59375,35.226563,37.171875,23.032274,3517867
"""ASML""","""2000-01-07""",36.867188,38.0625,36.65625,38.015625,23.555077,1631200
"""ASML""","""2000-01-10""",40.125,41.671875,40.125,41.625,25.791506,1341867
"""ASML""","""2000-01-11""",40.59375,42.1875,39.796875,40.265625,24.949213,1095467
"""ASML""","""2000-01-12""",40.875,41.296875,40.171875,41.015625,25.413929,778933
"""ASML""","""2000-01-13""",41.625,42.9375,40.78125,42.820313,26.532146,1834933
"""ASML""","""2000-01-14""",45.046875,48.375,45.0,46.921875,29.07354,5256533


## Reading Parquet

In [20]:
trips = pl.read_parquet("data/taxi/yellow_tripdata_*.parquet")
trips

VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID,store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,congestion_surcharge,airport_fee
i64,datetime[ns],datetime[ns],f64,f64,f64,str,i64,i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1,2022-01-01 00:35:40,2022-01-01 00:53:29,2.0,3.8,1.0,"""N""",142,236,1,14.5,3.0,0.5,3.65,0.0,0.3,21.95,2.5,0.0
1,2022-01-01 00:33:43,2022-01-01 00:42:07,1.0,2.1,1.0,"""N""",236,42,1,8.0,0.5,0.5,4.0,0.0,0.3,13.3,0.0,0.0
2,2022-01-01 00:53:21,2022-01-01 01:02:19,1.0,0.97,1.0,"""N""",166,166,1,7.5,0.5,0.5,1.76,0.0,0.3,10.56,0.0,0.0
2,2022-01-01 00:25:21,2022-01-01 00:35:23,1.0,1.09,1.0,"""N""",114,68,2,8.0,0.5,0.5,0.0,0.0,0.3,11.8,2.5,0.0
2,2022-01-01 00:36:48,2022-01-01 01:14:20,1.0,4.3,1.0,"""N""",68,163,1,23.5,0.5,0.5,3.0,0.0,0.3,30.3,2.5,0.0
1,2022-01-01 00:40:15,2022-01-01 01:09:48,1.0,10.3,1.0,"""N""",138,161,1,33.0,3.0,0.5,13.0,6.55,0.3,56.35,2.5,0.0
2,2022-01-01 00:20:50,2022-01-01 00:34:58,1.0,5.07,1.0,"""N""",233,87,1,17.0,0.5,0.5,5.2,0.0,0.3,26.0,2.5,0.0
2,2022-01-01 00:13:04,2022-01-01 00:22:45,1.0,2.02,1.0,"""N""",238,152,2,9.0,0.5,0.5,0.0,0.0,0.3,12.8,2.5,0.0
2,2022-01-01 00:30:02,2022-01-01 00:44:49,1.0,2.71,1.0,"""N""",166,236,1,12.0,0.5,0.5,2.25,0.0,0.3,18.05,2.5,0.0
2,2022-01-01 00:48:52,2022-01-01 00:53:28,1.0,0.78,1.0,"""N""",236,141,2,5.0,0.5,0.5,0.0,0.0,0.3,8.8,2.5,0.0


## Reading JSON and NDJSON

### JSON

In [23]:
pokedex = pl.read_json("data/pokedex.json")
pokedex

pokemon
list[struct[17]]
"[{1,""001"",""Bulbasaur"",""http://www.serebii.net/pokemongo/pokemon/001.png"",[""Grass"", ""Poison""],""0.71 m"",""6.9 kg"",""Bulbasaur Candy"",25,""2 km"",0.69,69.0,""20:00"",[1.58],[""Fire"", ""Ice"", … ""Psychic""],null,[{""002"",""Ivysaur""}, {""003"",""Venusaur""}]}, {2,""002"",""Ivysaur"",""http://www.serebii.net/pokemongo/pokemon/002.png"",[""Grass"", ""Poison""],""0.99 m"",""13.0 kg"",""Bulbasaur Candy"",100,""Not in Eggs"",0.042,4.2,""07:00"",[1.2, 1.6],[""Fire"", ""Ice"", … ""Psychic""],[{""001"",""Bulbasaur""}],[{""003"",""Venusaur""}]}, … {151,""151"",""Mew"",""http://www.serebii.net/pokemongo/pokemon/151.png"",[""Psychic""],""0.41 m"",""4.0 kg"",""None"",null,""Not in Eggs"",0.0,0.0,""N/A"",null,[""Bug"", ""Ghost"", ""Dark""],null,null}]"


In [24]:
(
    pokedex.explode("pokemon")
    .unnest("pokemon")
    .select("id", "name", "type", "height", "weight")
)

id,name,type,height,weight
i64,str,list[str],str,str
1,"""Bulbasaur""","[""Grass"", ""Poison""]","""0.71 m""","""6.9 kg"""
2,"""Ivysaur""","[""Grass"", ""Poison""]","""0.99 m""","""13.0 kg"""
3,"""Venusaur""","[""Grass"", ""Poison""]","""2.01 m""","""100.0 kg"""
4,"""Charmander""","[""Fire""]","""0.61 m""","""8.5 kg"""
5,"""Charmeleon""","[""Fire""]","""1.09 m""","""19.0 kg"""
6,"""Charizard""","[""Fire"", ""Flying""]","""1.70 m""","""90.5 kg"""
7,"""Squirtle""","[""Water""]","""0.51 m""","""9.0 kg"""
8,"""Wartortle""","[""Water""]","""0.99 m""","""22.5 kg"""
9,"""Blastoise""","[""Water""]","""1.60 m""","""85.5 kg"""
10,"""Caterpie""","[""Bug""]","""0.30 m""","""2.9 kg"""


### NDJSON

In [26]:
from json import loads
from pprint import pprint

with open("data/wikimedia.ndjson") as f:
    pprint(loads(f.readline()))

{'$schema': '/mediawiki/recentchange/1.0.0',
 'bot': False,
 'comment': '/* League champions, runners-up and play-off finalists */',
 'id': 1659529639,
 'length': {'new': 91166, 'old': 91108},
 'meta': {'domain': 'en.wikipedia.org',
          'dt': '2023-07-29T07:51:39Z',
          'id': '0416300b-980c-45bb-b0a2-c9d7a9e2b7eb',
          'offset': 4820784717,
          'partition': 0,
          'request_id': 'ea0541fb-4e72-4fc3-82f0-6c26651b2043',
          'stream': 'mediawiki.recentchange',
          'topic': 'eqiad.mediawiki.recentchange',
          'uri': 'https://en.wikipedia.org/wiki/EFL_Championship'},
 'minor': False,
 'namespace': 0,
 'notify_url': 'https://en.wikipedia.org/w/index.php?diff=1167689309&oldid=1166824248',
 'parsedcomment': '<span dir="auto"><span class="autocomment"><a '
                  'href="/wiki/EFL_Championship#League_champions,_runners-up_and_play-off_finalists" '
                  'title="EFL Championship">→\u200eLeague champions, '
                  'ru

In [27]:
wikimedia = pl.read_ndjson("data/wikimedia.ndjson")
wikimedia

$schema,meta,id,type,namespace,title,title_url,comment,timestamp,user,bot,notify_url,minor,length,revision,server_url,server_name,server_script_path,wiki,parsedcomment
str,struct[9],i64,str,i64,str,str,str,i64,str,bool,str,bool,struct[2],struct[2],str,str,str,str,str
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/EFL_Championship"",""ea0541fb-4e72-4fc3-82f0-6c26651b2043"",""0416300b-980c-45bb-b0a2-c9d7a9e2b7eb"",""2023-07-29T07:51:39Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784717}",1659529639,"""edit""",0,"""EFL Championsh…","""https://en.wik…","""/* League cham…",1690617099,"""87.12.215.232""",false,"""https://en.wik…",false,"{91108,91166}","{1166824248,1167689309}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""","""<span dir=""aut…"
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Lim_Sang-choon"",""01a0f468-7553-48db-b553-7ac392b2187c"",""97e4dc39-fb32-4774-9c9a-b2caea391c9e"",""2023-07-29T07:51:42Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784731}",1659529640,"""edit""",0,"""Lim Sang-choon…","""https://en.wik…","""""",1690617102,"""Preferwiki""",false,"""https://en.wik…",false,"{9807,10480}","{1167689034,1167689310}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""",""""""
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Higher"",""5f053899-a2ab-4dec-8e98-1d01cd86093d"",""0da41aa2-ceb6-443f-8a0a-0633f83de6ec"",""2023-07-29T07:51:44Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784748}",1659529642,"""edit""",0,"""Higher""","""https://en.wik…","""/* Albums */ a…",1690617104,"""Ss112""",false,"""https://en.wik…",false,"{5452,5548}","{1162509981,1167689312}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""","""<span dir=""aut…"
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/International_Poker_Rules"",""10c4886c-95f5-4cd2-8db9-333cb45f041b"",""af9cc405-26b2-485d-b20c-edd21b2a2a4c"",""2023-07-29T07:51:44Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784763}",1659529643,"""edit""",0,"""International …","""https://en.wik…","""Nominated for …",1690617104,"""Piotrus""",false,"""https://en.wik…",false,"{2452,2896}","{1055827921,1167689313}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""","""Nominated for …"
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Abdul_Hamid_Khan_Bhashani"",""198e47ae-fa33-4059-970a-550536e7bc7c"",""2012e36f-9fa1-49dc-ba68-5946de740cd4"",""2023-07-29T07:51:45Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784800}",1659529653,"""edit""",0,"""Abdul Hamid Kh…","""https://en.wik…","""Rescuing 1 sou…",1690617105,"""InternetArchiv…",true,"""https://en.wik…",false,"{31503,31687}","{1163358967,1167689318}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""","""Rescuing 1 sou…"
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Wikipedia:WikiProject_Deletion_sorting/Games"",""8ab3db5c-746e-4633-92ba-0af44ffcff23"",""b25cf398-bd6d-431c-b9c4-7e577adc7857"",""2023-07-29T07:51:46Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784825}",1659529654,"""edit""",4,"""Wikipedia:Wiki…","""https://en.wik…","""Listing [[:Wik…",1690617106,"""Piotrus""",false,"""https://en.wik…",false,"{965,1027}","{1167689146,1167689319}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""","""Listing <a hre…"
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Wikipedia:Articles_for_deletion/Log/2023_July_29"",""16fe5145-a76a-4d36-8ea6-f4464567f51f"",""caa52460-e60f-429e-bbf0-af22676e3421"",""2023-07-29T07:51:45Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784827}",1659529655,"""edit""",4,"""Wikipedia:Arti…","""https://en.wik…","""Adding [[:Wiki…",1690617105,"""Piotrus""",false,"""https://en.wik…",false,"{2723,2785}","{1167689143,1167689317}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""","""Adding <a href…"
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Brett_Eibner"",""24740db6-6e84-476f-897f-7ff5ec64e91e"",""31a5220e-e3a0-4741-9b6f-6f345fdb0eed"",""2023-07-29T07:51:49Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784885}",1659529659,"""edit""",0,"""Brett Eibner""","""https://en.wik…","""""",1690617109,"""Stonecold415""",false,"""https://en.wik…",false,"{15597,15595}","{1167689278,1167689320}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""",""""""
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Cuckoo_wrasse"",""858ad8d4-346d-4880-8cd1-a62a5ddcab41"",""65dcd928-bc2d-4472-a936-f0ec3550e563"",""2023-07-29T07:51:51Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784903}",1659529660,"""edit""",0,"""Cuckoo wrasse""","""https://en.wik…","""""",1690617111,"""Couiros22""",false,"""https://en.wik…",false,"{7918,7877}","{1127956235,1167689322}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""",""""""
"""/mediawiki/rec…","{""https://en.wikipedia.org/wiki/Billy_Strachan"",""32865489-ab95-4365-8161-09d13af11174"",""75f43217-dedd-469d-b9ed-301fdbafb10f"",""2023-07-29T07:51:51Z"",""en.wikipedia.org"",""mediawiki.recentchange"",""eqiad.mediawiki.recentchange"",0,4820784946}",1659529665,"""edit""",0,"""Billy Strachan…","""https://en.wik…","""added [[Catego…",1690617111,"""The History Wi…",false,"""https://en.wik…",false,"{68830,68878}","{1167689304,1167689326}","""https://en.wik…","""en.wikipedia.o…","""/w""","""enwiki""","""added <a href=…"


In [28]:
(
    wikimedia.rename({"id": "edit_id"})
    .unnest("meta")
    .select("timestamp", "title", "user", "comment")
)

timestamp,title,user,comment
i64,str,str,str
1690617099,"""EFL Championsh…","""87.12.215.232""","""/* League cham…"
1690617102,"""Lim Sang-choon…","""Preferwiki""",""""""
1690617104,"""Higher""","""Ss112""","""/* Albums */ a…"
1690617104,"""International …","""Piotrus""","""Nominated for …"
1690617105,"""Abdul Hamid Kh…","""InternetArchiv…","""Rescuing 1 sou…"
1690617106,"""Wikipedia:Wiki…","""Piotrus""","""Listing [[:Wik…"
1690617105,"""Wikipedia:Arti…","""Piotrus""","""Adding [[:Wiki…"
1690617109,"""Brett Eibner""","""Stonecold415""",""""""
1690617111,"""Cuckoo wrasse""","""Couiros22""",""""""
1690617111,"""Billy Strachan…","""The History Wi…","""added [[Catego…"


## Other File Formats

In [30]:
import pandas as pd

url = "https://en.wikipedia.org/wiki/List_of_Latin_abbreviations"
pl.from_pandas(pd.read_html(url)[0])

abbreviation,Latin,translation,usage and notes
str,str,str,str
"""A.D.""","""anno Domini""","""""in the year o…","""Used to label …"
"""A.I.""","""ad interim""","""""temporarily""""","""Used in busine…"
"""a.m.""","""ante meridiem""","""""before midday…","""Used on the tw…"
"""ca./c.""","""circa""","""""around"", ""abo…","""Used with date…"
"""Cap.""","""capitulus""","""""chapter""""","""Used before a …"
"""cf.""","""confer""","""""bring togethe…","""Confer is an i…"
"""C.P.""","""ceteris paribu…","""""other things …","""Commonly used …"
"""C.V.""","""curriculum vit…","""""course of lif…","""A document con…"
"""cwt.""","""centum weight""","""""hundredweight…","""This is a mixt…"
"""D.V.""","""Deo volente""","""""God willing""""",


## Querying Databases

In [32]:
pl.read_database_uri(
    query="""
    SELECT
        f.film_id,
        f.title,
        c.name AS category,
        f.rating,
        f.length / 60.0 AS length
    FROM
        film AS f,
        film_category AS fc,
        category AS c
    WHERE
        fc.film_id = f.film_id
        AND fc.category_id = c.category_id
    LIMIT 10
    """,
    uri="sqlite:::data/sakila.db",
)

film_id,title,category,rating,length
i64,str,str,str,f64
1,"""ACADEMY DINOSA…","""Documentary""","""PG""",1.433333
2,"""ACE GOLDFINGER…","""Horror""","""G""",0.8
3,"""ADAPTATION HOL…","""Documentary""","""NC-17""",0.833333
4,"""AFFAIR PREJUDI…","""Horror""","""G""",1.95
5,"""AFRICAN EGG""","""Family""","""G""",2.166667
6,"""AGENT TRUMAN""","""Foreign""","""PG""",2.816667
7,"""AIRPLANE SIERR…","""Comedy""","""PG-13""",1.033333
8,"""AIRPORT POLLOC…","""Horror""","""R""",0.9
9,"""ALABAMA DEVIL""","""Horror""","""PG-13""",1.9
10,"""ALADDIN CALEND…","""Sports""","""NC-17""",1.05


In [33]:
db = "sqlite:::data/sakila.db"
films = pl.read_database_uri("SELECT * FROM film", db)
film_categories = pl.read_database_uri("SELECT * FROM film_category", db)
categories = pl.read_database_uri("SELECT * FROM category", db)

(
    films.join(film_categories, on="film_id", suffix="_fc")
    .join(categories, on="category_id", suffix="_c")
    .select(
        "film_id",
        "title",
        pl.col("name").alias("category"),
        "rating",
        pl.col("length") / 60,
    )
    .limit(10)
)

film_id,title,category,rating,length
i64,str,str,str,f64
1,"""ACADEMY DINOSA…","""Documentary""","""PG""",1.433333
2,"""ACE GOLDFINGER…","""Horror""","""G""",0.8
3,"""ADAPTATION HOL…","""Documentary""","""NC-17""",0.833333
4,"""AFFAIR PREJUDI…","""Horror""","""G""",1.95
5,"""AFRICAN EGG""","""Family""","""G""",2.166667
6,"""AGENT TRUMAN""","""Foreign""","""PG""",2.816667
7,"""AIRPLANE SIERR…","""Comedy""","""PG-13""",1.033333
8,"""AIRPORT POLLOC…","""Horror""","""R""",0.9
9,"""ALABAMA DEVIL""","""Horror""","""PG-13""",1.9
10,"""ALADDIN CALEND…","""Sports""","""NC-17""",1.05


## Writing Data

### CSV Format

### Excel Format

### Parquet Format

### Other Considerations

## Conclusion