In [10]:
import re
import csv
import pandas as pd
from dateutil import parser

In [11]:
def strip_all_quotes(val: str) -> str:
    s = str(val)
    s = s.replace('&quot;', '"')
    s = re.sub(r'"', '', s)
    return s.strip()

In [12]:
def to_int(val):
    s = strip_all_quotes(val)
    if not s or not re.fullmatch(r'-?\d+', s):
        return 0
    return int(s)

In [13]:
def to_float(val):
    s = strip_all_quotes(val)
    try:
        return float(s) if s else 0.0
    except ValueError:
        return 0.0

In [14]:
def to_datetime(val):
    s = strip_all_quotes(val)
    if not s:
        return ""
    try:
        return parser.parse(s, dayfirst=False).date().isoformat()
    except (ValueError, OverflowError):
        return ""

In [15]:
def compute_total_price(row):
    return row['quantity'] * row['price_per_unit']

In [16]:
def clean_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df['order_id']       = df['order_id'].apply(to_int)
    df['product_id']     = df['product_id'].apply(to_int)
    df['quantity']       = df['quantity'].apply(to_int)
    df['price_per_unit'] = df['price_per_unit'].apply(to_float)
    df['order_date']     = df['order_date'].apply(to_datetime)
    df['total_price']    = df.apply(compute_total_price, axis=1)
    return df

In [17]:
def main():
    df_raw = pd.read_csv(
        'raw_sales.csv',
        dtype=str,
        engine='python',
        quoting=csv.QUOTE_NONE
    )
    df_raw.columns = df_raw.columns.str.replace(r'"', '', regex=True)
    df_clean = clean_dataframe(df_raw)
    df_clean.to_csv('cleaned_sales.csv', index=False)
    print(df_clean)

In [18]:
if __name__ == '__main__':
    main()

   order_id  product_id  quantity  price_per_unit  order_date  total_price
0         1         101         2           20.00  2025-06-01        40.00
1         2         102        -1           15.50  2025-06-01       -15.50
2         3         103         1           35.00  2025-06-01        35.00
3         4         104         3           20.00  2025-06-02        60.00
4         5         105         0           99.00  2025-06-03         0.00
5         6         106         2           25.99  2025-06-03        51.98
