In [None]:
import polars as pl

from src.config import DATA_PATH

In [9]:
CANADA_USD_RATIO = 0.76

In [None]:
canada_sales = pl.read_csv(DATA_PATH / "canada_sales.csv", infer_schema_length=10000)
usa_sales = pl.read_csv(DATA_PATH / "usa_sales.csv", infer_schema_length=10000)

In [23]:
canada_sales

Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
str,str,str,str,str,str
"""176558""","""USB-C Charging Cable""","""2""","""11.95""","""04/19/19 08:46""","""128 Elm St, Quebec City"""
,,,,,"""113 Oak St, Ottawa"""
"""176559""","""Bose SoundSport Headphones""","""1""","""99.99""","""04/07/19 22:30""","""116 Elm St, Winnipeg"""
"""176560""","""Google Phone""","""1""","""600""","""04/12/19 14:38""","""149 Broadway, Edmonton"""
"""176560""","""Wired Headphones""","""1""","""11.99""","""04/12/19 14:38""","""121 Maple St, Vancouver"""
…,…,…,…,…,…
"""194090""","""Google Phone""","""1""","""600""","""04/08/19 17:11""","""124 Market St, Quebec City"""
"""194091""","""AA Batteries (4-pack)""","""1""","""3.84""","""04/15/19 16:02""","""130 Cedar Ave, Ottawa"""
"""194092""","""AAA Batteries (4-pack)""","""2""","""2.99""","""04/28/19 14:36""","""137 Maple St, Vancouver"""
"""194093""","""AA Batteries (4-pack)""","""1""","""3.84""","""04/14/19 15:09""","""133 Elm St, Vancouver"""


In [53]:
canada_sales_fix = (
    canada_sales
    .drop_nulls()
    .filter(pl.col("Price Each") != "Price Each")
    .with_columns([
        pl.col("Price Each").cast(pl.Float64),
    ])
    .with_columns([
        pl.col("Price Each") * CANADA_USD_RATIO,
        pl.col("Order ID").cast(pl.Int64),
        pl.col("Quantity Ordered").cast(pl.Int64),
        pl.col("Order Date").str.strptime(pl.Datetime, "%m/%d/%y %H:%M"),
    ])
)

canada_sales_fix

Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
i64,str,i64,f64,datetime[μs],str
176558,"""USB-C Charging Cable""",2,9.082,2019-04-19 08:46:00,"""128 Elm St, Quebec City"""
176559,"""Bose SoundSport Headphones""",1,75.9924,2019-04-07 22:30:00,"""116 Elm St, Winnipeg"""
176560,"""Google Phone""",1,456.0,2019-04-12 14:38:00,"""149 Broadway, Edmonton"""
176560,"""Wired Headphones""",1,9.1124,2019-04-12 14:38:00,"""121 Maple St, Vancouver"""
176561,"""Wired Headphones""",1,9.1124,2019-04-30 09:27:00,"""123 Cedar Ave, Vancouver"""
…,…,…,…,…,…
194090,"""Google Phone""",1,456.0,2019-04-08 17:11:00,"""124 Market St, Quebec City"""
194091,"""AA Batteries (4-pack)""",1,2.9184,2019-04-15 16:02:00,"""130 Cedar Ave, Ottawa"""
194092,"""AAA Batteries (4-pack)""",2,2.2724,2019-04-28 14:36:00,"""137 Maple St, Vancouver"""
194093,"""AA Batteries (4-pack)""",1,2.9184,2019-04-14 15:09:00,"""133 Elm St, Vancouver"""


In [44]:
canada_sales["Order Date"]

Order Date
str
"""04/19/19 08:46"""
""
"""04/07/19 22:30"""
"""04/12/19 14:38"""
"""04/12/19 14:38"""
…
"""04/08/19 17:11"""
"""04/15/19 16:02"""
"""04/28/19 14:36"""
"""04/14/19 15:09"""


In [51]:
usa_sales_fix = (
    usa_sales
    .drop_nulls()
    .filter(pl.col("Price Each") != "Price Each")
    .with_columns([
        pl.col("Price Each").cast(pl.Float64),
        pl.col("Order ID").cast(pl.Int64),
        pl.col("Quantity Ordered").cast(pl.Int64),
        pl.col("Order Date").str.strptime(pl.Datetime, "%m/%d/%y %H:%M"),
    ])
)

usa_sales_fix

Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
i64,str,i64,f64,datetime[μs],str
236670,"""Wired Headphones""",2,11.99,2019-08-31 22:21:00,"""359 Spruce St, Seattle, WA 981…"
236671,"""Bose SoundSport Headphones""",1,99.99,2019-08-15 15:11:00,"""492 Ridge St, Dallas, TX 75001"""
236672,"""iPhone""",1,700.0,2019-08-06 14:40:00,"""149 7th St, Portland, OR 97035"""
236673,"""AA Batteries (4-pack)""",2,3.84,2019-08-29 20:59:00,"""631 2nd St, Los Angeles, CA 90…"
236674,"""AA Batteries (4-pack)""",2,3.84,2019-08-15 19:53:00,"""736 14th St, New York City, NY…"
…,…,…,…,…,…
248146,"""Bose SoundSport Headphones""",1,99.99,2019-08-29 22:19:00,"""868 Hickory St, San Francisco,…"
248147,"""AAA Batteries (4-pack)""",3,2.99,2019-08-31 16:26:00,"""206 Lakeview St, Boston, MA 02…"
248148,"""AA Batteries (4-pack)""",1,3.84,2019-08-02 07:25:00,"""568 13th St, Seattle, WA 98101"""
248149,"""USB-C Charging Cable""",1,11.95,2019-08-08 12:10:00,"""495 Walnut St, San Francisco, …"


In [52]:
combined_sales = pl.concat([canada_sales_fix, usa_sales_fix], how="vertical")
combined_sales

Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
i64,str,i64,f64,datetime[μs],str
176558,"""USB-C Charging Cable""",2,9.082,2019-04-19 08:46:00,"""128 Elm St, Quebec City"""
176559,"""Bose SoundSport Headphones""",1,75.9924,2019-04-07 22:30:00,"""116 Elm St, Winnipeg"""
176560,"""Google Phone""",1,456.0,2019-04-12 14:38:00,"""149 Broadway, Edmonton"""
176560,"""Wired Headphones""",1,9.1124,2019-04-12 14:38:00,"""121 Maple St, Vancouver"""
176561,"""Wired Headphones""",1,9.1124,2019-04-30 09:27:00,"""123 Cedar Ave, Vancouver"""
…,…,…,…,…,…
248146,"""Bose SoundSport Headphones""",1,99.99,2019-08-29 22:19:00,"""868 Hickory St, San Francisco,…"
248147,"""AAA Batteries (4-pack)""",3,2.99,2019-08-31 16:26:00,"""206 Lakeview St, Boston, MA 02…"
248148,"""AA Batteries (4-pack)""",1,3.84,2019-08-02 07:25:00,"""568 13th St, Seattle, WA 98101"""
248149,"""USB-C Charging Cable""",1,11.95,2019-08-08 12:10:00,"""495 Walnut St, San Francisco, …"


In [59]:
combined_sales.unique(subset=["Order ID"])

Order ID,Product,Quantity Ordered,Price Each,Order Date,Purchase Address
i64,str,i64,f64,datetime[μs],str
182449,"""Apple Airpods Headphones""",1,114.0,2019-04-29 00:18:00,"""115 Pine St, Toronto"""
177405,"""Lightning Charging Cable""",1,11.362,2019-04-09 16:38:00,"""128 Market St, Hamilton"""
240282,"""Wired Headphones""",1,11.99,2019-08-31 19:33:00,"""288 Maple St, Los Angeles, CA …"
244698,"""Apple Airpods Headphones""",1,150.0,2019-08-30 13:43:00,"""725 Walnut St, Seattle, WA 981…"
180535,"""iPhone""",1,532.0,2019-04-27 21:51:00,"""132 Cedar Ave, Toronto"""
…,…,…,…,…,…
180452,"""Lightning Charging Cable""",1,11.362,2019-04-24 14:47:00,"""137 Maple St, Quebec City"""
245503,"""Lightning Charging Cable""",1,14.95,2019-08-28 18:01:00,"""453 9th St, New York City, NY …"
186604,"""Lightning Charging Cable""",2,11.362,2019-04-22 14:36:00,"""115 Oak St, Ottawa"""
185038,"""AAA Batteries (4-pack)""",1,2.2724,2019-04-25 08:10:00,"""137 Elm St, Toronto"""
