# The Ally - DuckDB

![Duckdb logo](images/duckdb_logo.png)

In [1]:
import duckdb

In [41]:
sql = "SELECT * FROM '../data/csv/10.csv' LIMIT 2"

# DB-API 2.0 style

In [42]:
with duckdb.connect('demo.ddb') as conn:
    r = conn.execute(sql)
    results = list(r.fetchall())

In [43]:
results

[(147937429,
  'english',
  1696875102,
  1717510986,
  1,
  3,
  0,
  0.5267999172210693,
  0,
  1,
  0,
  0,
  1,
  None,
  76561199550893216,
  35,
  23,
  59161,
  4738,
  58753,
  1717541057),
 (166664841,
  'russian',
  1717510100,
  1717510100,
  1,
  0,
  0,
  0.0,
  0,
  1,
  0,
  0,
  1,
  None,
  76561199161536896,
  24,
  11,
  436,
  71,
  385,
  1717512997)]

## To Pandas

In [51]:
with duckdb.connect('demo.ddb') as conn:
    df = conn.sql(sql).df()

In [52]:
df

Unnamed: 0,recommendationid,language,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,...,written_during_early_access,hidden_in_steam_china,steam_china_location,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played
0,147937429,english,1696875102,1717510986,1,3,0,0.5268,0,1,...,0,1,,76561199550893216,35,23,59161,4738,58753,1717541057
1,166664841,russian,1717510100,1717510100,1,0,0,0.0,0,1,...,0,1,,76561199161536896,24,11,436,71,385,1717512997


## To Polars

In [53]:
with duckdb.connect('demo.ddb') as conn:
    polars_df = conn.sql(sql).pl()

In [54]:
polars_df

recommendationid,language,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,received_for_free,written_during_early_access,hidden_in_steam_china,steam_china_location,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played
i64,str,i64,i64,i64,i64,i64,f64,i64,i64,i64,i64,i64,str,i64,i64,i64,i64,i64,i64,i64
147937429,"""english""",1696875102,1717510986,1,3,0,0.5268,0,1,0,0,1,,76561199550893216,35,23,59161,4738,58753,1717541057
166664841,"""russian""",1717510100,1717510100,1,0,0,0.0,0,1,0,0,1,,76561199161536896,24,11,436,71,385,1717512997


## To Arrow

In [55]:
with duckdb.connect('demo.ddb') as conn:
    arrow_table = conn.sql(sql).arrow()

In [56]:
arrow_table

pyarrow.Table
recommendationid: int64
language: string
timestamp_created: int64
timestamp_updated: int64
voted_up: int64
votes_up: int64
votes_funny: int64
weighted_vote_score: double
comment_count: int64
steam_purchase: int64
received_for_free: int64
written_during_early_access: int64
hidden_in_steam_china: int64
steam_china_location: string
author_steamid: int64
author_num_games_owned: int64
author_num_reviews: int64
author_playtime_forever: int64
author_playtime_last_two_weeks: int64
author_playtime_at_review: int64
author_last_played: int64
----
recommendationid: [[147937429,166664841]]
language: [["english","russian"]]
timestamp_created: [[1696875102,1717510100]]
timestamp_updated: [[1717510986,1717510100]]
voted_up: [[1,1]]
votes_up: [[3,0]]
votes_funny: [[0,0]]
weighted_vote_score: [[0.5267999172210693,0]]
comment_count: [[0,0]]
steam_purchase: [[1,1]]
...

# Relation style

In [21]:
rel = duckdb.read_csv("../data/csv/10.csv")

In [37]:
duckdb.sql('select COUNT(*) from rel as num_reviews').show()

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│       238828 │
└──────────────┘



In [29]:
rel.limit(10).show()

┌──────────────────┬───────────┬───┬──────────────────────┬──────────────────────┬────────────────────┐
│ recommendationid │ language  │ … │ author_playtime_la…  │ author_playtime_at…  │ author_last_played │
│      int64       │  varchar  │   │        int64         │        int64         │       int64        │
├──────────────────┼───────────┼───┼──────────────────────┼──────────────────────┼────────────────────┤
│        147937429 │ english   │ … │                 4738 │                58753 │         1717541057 │
│        166664841 │ russian   │ … │                   71 │                  385 │         1717512997 │
│        166664763 │ russian   │ … │                    7 │                23743 │         1717510490 │
│        166663001 │ turkish   │ … │                   19 │                  356 │         1717508513 │
│        166658743 │ brazilian │ … │                    0 │                 1497 │         1478272196 │
│        161810892 │ schinese  │ … │                  524 │     

In [59]:
rel.select('recommendationid', "language", 'voted_up').filter("language = 'english'").limit(10).show()

┌──────────────────┬──────────┬──────────┐
│ recommendationid │ language │ voted_up │
│      int64       │ varchar  │  int64   │
├──────────────────┼──────────┼──────────┤
│        147937429 │ english  │        1 │
│        166652969 │ english  │        1 │
│        166652933 │ english  │        1 │
│        137537621 │ english  │        1 │
│        154253089 │ english  │        1 │
│        166638678 │ english  │        1 │
│        166638235 │ english  │        1 │
│        127794149 │ english  │        1 │
│        166629955 │ english  │        1 │
│        166623062 │ english  │        1 │
├──────────────────┴──────────┴──────────┤
│ 10 rows                      3 columns │
└────────────────────────────────────────┘



## In-memory sharing

In [60]:
import pandas as pd

In [61]:
df = pd.read_csv('../data/csv/10.csv')

In [65]:
df.tail(10)

Unnamed: 0,recommendationid,language,timestamp_created,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,...,written_during_early_access,hidden_in_steam_china,steam_china_location,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played
238818,15043110.0,english,1427259949,1571474648,1,0,0,0.0,0,0,...,0.0,0,,76561198072567552,38,34,7302,0,4661.0,1571062000.0
238819,13639509.0,russian,1419695652,1419695652,1,0,0,0.0,0,0,...,0.0,0,,76561198124826128,176,4,5124,0,4463.0,1631001000.0
238820,13140541.0,portuguese,1417265132,1417265132,1,0,0,0.0,0,0,...,0.0,0,,76561198055582432,0,3,4013,0,1998.0,1587926000.0
238821,10349773.0,russian,1401920934,1623438450,1,0,1,0.0,0,0,...,0.0,0,,76561198036529024,0,12,20892,592,16575.0,1698087000.0
238822,8227337.0,polish,1387815443,1387815443,1,0,0,0.0,0,0,...,0.0,0,,76561198054766880,114,2,68669,0,47901.0,1524517000.0
238823,,1351380029,1,0,0,0,0,1.0,0,0,...,,76561198008269840,0.0,2,95521,0,37952,1547225350,,
238824,149330962.0,russian,1698868683,1698868683,1,0,0,0.0,0,1,...,0.0,1,,76561199093871104,62,26,29,10,29.0,1698427000.0
238825,149284037.0,english,1698800321,1698800321,1,0,0,0.0,0,0,...,0.0,1,,76561199052025216,46,3,3367,2694,3016.0,1698892000.0
238826,127959835.0,schinese,1670214704,1698915106,1,0,0,0.0,0,1,...,0.0,1,,76561199209656688,47,11,1179,0,1179.0,1694818000.0
238827,1092034.0,english,1291918689,1291918689,1,0,0,0.0,0,0,...,0.0,0,,76561197986485904,0,2,119070,0,116910.0,1396685000.0


In [63]:
duckdb.sql('select recommendationid, voted_up, timestamp_created from df limit 2').show()

┌──────────────────┬──────────┬───────────────────┐
│ recommendationid │ voted_up │ timestamp_created │
│      double      │  int64   │       int64       │
├──────────────────┼──────────┼───────────────────┤
│      147937429.0 │        1 │        1696875102 │
│      166664841.0 │        1 │        1717510100 │
└──────────────────┴──────────┴───────────────────┘



In [64]:
df[['recommendationid', 'voted_up', 'timestamp_created']].head(2)

Unnamed: 0,recommendationid,voted_up,timestamp_created
0,147937429.0,1,1696875102
1,166664841.0,1,1717510100
