# 1. Convert Excel File to CSV File

In [3]:
import polars as pl
import glob
import os

# Folder containing excel files
input_folder = r"C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet"
output_folder = r"C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Loop over all excel files
for excel_file in glob.glob(os.path.join(input_folder, "*.xlsx")):
    # Read excel into Polars DataFrame
    df = pl.read_excel(excel_file)
    
    # Build output filename with .csv extension
    base_name = os.path.splitext(os.path.basename(excel_file))[0]
    csv_file = os.path.join(output_folder, f"{base_name}.csv")
    
    # Write to csv
    df.write_csv(csv_file)
    print(f"Converted: {excel_file} -> {csv_file}")


Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\april-04.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\april-04.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\february-02.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\february-02.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\january-01.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\january-01.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\march-03.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\march-03.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\may-05.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\may-05.csv


# 2. Convert CSV File to Parquet File

In [5]:
# Folder containing CSV files
input_folder = r"C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv"
output_folder = r"C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\csv_to_parquet"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Loop over all CSV files
for csv_file in glob.glob(os.path.join(input_folder, "*.csv")):
    # Read CSV into Polars DataFrame
    df = pl.read_csv(csv_file)
    
    # Build output filename with .parquet extension
    base_name = os.path.splitext(os.path.basename(csv_file))[0]
    parquet_file = os.path.join(output_folder, f"{base_name}.parquet")
    
    # Write to parquet
    df.write_parquet(parquet_file)
    print(f"Converted: {csv_file} -> {parquet_file}")

Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\april-04.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\csv_to_parquet\april-04.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\february-02.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\csv_to_parquet\february-02.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\january-01.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\csv_to_parquet\january-01.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\march-03.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\csv_to_parquet\march-03.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\excel_to_csv\may-05.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet\csv_to_parquet\may-05.parquet


# 3. Convert Excel File from Multiple Folders to CSV File and Save the Result in the Same Folder

In [7]:
# Search recursively for all excel files in the root folder
root_folder = r"C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders"
excel_files = glob.glob(os.path.join(root_folder, "**", "*.xlsx"), recursive=True)

for excel_file in excel_files:
    # Read excel into Polars DataFrame
    df = pl.read_excel(excel_file)
    
    # Build csv file path in the same folder as excel
    base_name = os.path.splitext(excel_file)[0]   # removes .xlsx extension
    csv_file = f"{base_name}.csv"
    
    # Write to csv (overwrite if exists)
    df.write_csv(csv_file)
    print(f"Converted: {excel_file} -> {csv_file}")


Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_a_april_2019.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_a_april_2019.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_b_april_2019.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_b_april_2019.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_c_april_2019.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_c_april_2019.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\february_2019\taxi_a_february_2019.xlsx -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\february_2019\taxi_a_february_2019.csv
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\february_2019\taxi_b_fe

# 4. Convert CSV File from Multiple Folders to Parquet File and Save the Result in the Same Folder

In [8]:
# Search recursively for all CSV files in the root folder
root_folder = r"C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders"
csv_files = glob.glob(os.path.join(root_folder, "**", "*.csv"), recursive=True)

for csv_file in csv_files:
    # Read CSV into Polars DataFrame
    df = pl.read_csv(csv_file)
    
    # Build parquet file path in the same folder as CSV
    base_name = os.path.splitext(csv_file)[0]   # removes .csv extension
    parquet_file = f"{base_name}.parquet"
    
    # Write to parquet (overwrite if exists)
    df.write_parquet(parquet_file)
    print(f"Converted: {csv_file} -> {parquet_file}")

Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_a_april_2019.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_a_april_2019.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_b_april_2019.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_b_april_2019.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_c_april_2019.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\april_2019\taxi_c_april_2019.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\february_2019\taxi_a_february_2019.csv -> C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\february_2019\taxi_a_february_2019.parquet
Converted: C:\Users\ekadw\Documents\DATA\Example\csv_to_parquet_multiple_folders\february_20