In [1]:
import pandas as pd
import os
from ydata_profiling import ProfileReport
from pandas.tseries.offsets import MonthBegin

# Converting excel files to csv

In [2]:
# Folder with all your forex .xlsx files
forex_folder = "raw_data/Forex Rates/"

In [3]:
# Map filenames to clean column names
forex_files = {
    "USD-AUD.xlsx": "USD/AUD",
    "USD-CAD.xlsx": "USD/CAD",
    "USD-CHF.xlsx": "USD/CHF",
    "USD-CNY.xlsx": "USD/CNY",
    "USD-EUR.xlsx": "USD/EUR",
    "USD-GBP.xlsx": "USD/GBP",
    "USD-HKD.xlsx": "USD/HKD",
    "USD-JPY.xlsx": "USD/JPY",
    "USD-NZD.xlsx": "USD/NZD",
    "USD-XAU.xlsx": "USD/XAU"  # gold
}

In [4]:
merged_forex_df = None

In [5]:
import pandas as pd
import os

input_folder = "raw_data/Forex Rates/"
output_folder = "raw_data/Forex_csv_clean/"

os.makedirs(output_folder, exist_ok=True)

for filename in os.listdir(input_folder):
    if filename.endswith(".xlsx"):
        df = pd.read_excel(os.path.join(input_folder, filename))
        output_file = filename.replace(".xlsx", ".csv")
        df.to_csv(os.path.join(output_folder, output_file), index=False, encoding="utf-8")


# Merging the csv forex files

In [6]:
import os
import pandas as pd
from pandas.tseries.offsets import MonthBegin

# Setup
data_folder = "raw_data/Forex_csv_clean/"
forex_files = {
    "USD-AUD.csv": "USD-AUD",
    "USD-CAD.csv": "USD-CAD",
    "USD-CHF.csv": "USD-CHF",
    "USD-CNY.csv": "USD-CNY",
    "USD-EUR.csv": "USD-EUR",
    "USD-GBP.csv": "USD-GBP",
    "USD-HKD.csv": "USD-HKD",
    "USD-JPY.csv": "USD-JPY",
    "USD-NZD.csv": "USD-NZD",
    "USD-XAU.csv": "USD-XAU"
}

forex_df = None

for file, colname in forex_files.items():
    filepath = os.path.join(data_folder, file)
    
    # Skip first row, second row is the header
    df = pd.read_csv(filepath, header=1)

    # Clean column names
    df.columns = df.columns.str.strip().str.lower()

    # Drop unnamed or empty column if present
    if '' in df.columns or 'unnamed: 0' in df.columns:
        df = df.drop(columns=[''] if '' in df.columns else ['unnamed: 0'])

    # Rename known columns
    df = df.rename(columns={"date": "DATE", "last price": colname})

    # Convert to datetime and shift to 1st of next month
    df["DATE"] = pd.to_datetime(df["DATE"]) + MonthBegin(1)

    # Merge into master DataFrame
    if forex_df is None:
        forex_df = df
    else:
        forex_df = pd.merge(forex_df, df, on="DATE", how="outer")

# Final steps
forex_df = forex_df.sort_values("DATE").reset_index(drop=True)
forex_df.to_csv("forex_merged_cleaned.csv", index=False)

### No need to run the cell below, you should be able to see the html report uploaded in the repo

In [7]:
# Load final forex dataset
df = pd.read_csv("forex_merged_cleaned.csv", parse_dates=["DATE"])
df = df.set_index("DATE")  

# Generate profiling report
profile = ProfileReport(df, 
                        title="Forex EDA Report",
                        html={"style": {"full_width": True}}, 
                        minimal=False)

# Save as HTML
profile.to_file("forex_eda_report.html")


HBox(children=(FloatProgress(value=0.0, description='Summarize dataset', max=5.0, style=ProgressStyle(descript…

100%|████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 1249.79it/s]





HBox(children=(FloatProgress(value=0.0, description='Generate report structure', max=1.0, style=ProgressStyle(…




HBox(children=(FloatProgress(value=0.0, description='Render HTML', max=1.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Export report to file', max=1.0, style=ProgressStyle(desc…


