In [1]:
import pandas as pd
import numpy as np
import os

# Data setup
data = {
    "Transaction_ID": [1, 2, 3, 4, 5],
    "Product": ["A", "B", "C", "A", "B"],
    "Quantity": [10, -5, 20, 15, 8],
    "Price": [15.0, 20.0, 25.0, None, 30.0],
    "Date": ["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-05"],
}

# Step 1: Load the dataset
df = pd.DataFrame(data)
print(f"Task 1: Loaded Dataset\n{df.head()}")

# Step 2: Handle missing values (fill NaN values with 0)
missing_cols = df.columns[df.isnull().any()]
df.fillna(0, inplace=True)
print(f"\nTask 2: Missing Values - After Handling\n{df}")

# Step 3: Handle negative values in numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns
df[numeric_cols] = df[numeric_cols].abs()
print(f"\nTask 3: Negative Values - After Handling\n{df}")

# Step 4: Convert 'Date' column to DateTime format
df["Date"] = pd.to_datetime(df["Date"])
print(f"\nTask 4: Converted 'Date' column to DateTime format\n{df}")

# Step 5: Extract day, month, and year information from 'Date' column
df["Day"] = df["Date"].dt.day
df["Month"] = df["Date"].dt.month
df["Year"] = df["Date"].dt.year
print(f"\nTask 5: Extracted day, month, and year information from 'Date'\n{df}")

# Step 6: Calculate 'Total Sales'
df["Total Sales"] = df["Quantity"] * df["Price"]
print(f"\nTask 6: Calculated 'Total Sales'\n{df}")

# Step 7: Group and aggregate data by 'Product'
grouped_df = df.groupby("Product").agg({"Quantity": "sum", "Total Sales": "sum"}).reset_index()
print(f"\nTask 7: Grouped and Aggregated Data by 'Product'\n{grouped_df}")

# Step 8: Save the wrangled dataset to a CSV file
wrangled_file_path = os.path.abspath("wrangled_dataset.csv")
df.to_csv(wrangled_file_path, index=False)
print("\nTask 8: Saved the wrangled Dataset to", wrangled_file_path)

# Step 9: Save the transformed dataset (same as wrangled) to a CSV file
transformed_file_path = os.path.abspath("transformed_dataset.csv")
df.to_csv(transformed_file_path, index=False)
print("\nTask 9: Saved the Transformed Dataset to", transformed_file_path)

# Step 10: Transpose the dataset and save it to a CSV file
transposed_df = df.transpose()
transposed_file_path = os.path.abspath("transposed_dataset.csv")
transposed_df.to_csv(transposed_file_path, header=False)
print("\nTask 10: Saved the Transposed Dataset to", transposed_file_path)


Task 1: Loaded Dataset
   Transaction_ID Product  Quantity  Price        Date
0               1       A        10   15.0  2023-01-01
1               2       B        -5   20.0  2023-01-02
2               3       C        20   25.0  2023-01-03
3               4       A        15    NaN  2023-01-04
4               5       B         8   30.0  2023-01-05

Task 2: Missing Values - After Handling
   Transaction_ID Product  Quantity  Price        Date
0               1       A        10   15.0  2023-01-01
1               2       B        -5   20.0  2023-01-02
2               3       C        20   25.0  2023-01-03
3               4       A        15    0.0  2023-01-04
4               5       B         8   30.0  2023-01-05

Task 3: Negative Values - After Handling
   Transaction_ID Product  Quantity  Price        Date
0               1       A        10   15.0  2023-01-01
1               2       B         5   20.0  2023-01-02
2               3       C        20   25.0  2023-01-03
3             