In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import pandas as pd

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/Student_Marks.csv")

log = []

# Check missing values
missing = df.isnull().sum()
log.append(f"Missing values before cleaning:\n{missing}\n")

# Handle missing values (if any)
df = df.dropna()

# Fix data types
df["number_courses"] = df["number_courses"].astype(int)
df["time_study"] = df["time_study"].astype(float)
df["Marks"] = df["Marks"].astype(float)

log.append("Data types fixed.\n")

# Remove duplicates
before = len(df)
df = df.drop_duplicates()
after = len(df)
log.append(f"Duplicates removed: {before - after}\n")

# Standardize column names
df.columns = df.columns.str.lower()
log.append("Column names standardized to lowercase.\n")

# Save cleaned dataset
df.to_csv("cleaned_dataset.csv", index=False)

# Save cleaning log
with open("cleaning_log.txt", "w") as f:
    f.writelines(log)

print("✅ Data cleaning completed successfully!")


✅ Data cleaning completed successfully!


In [10]:
cleaned_path = '/content/drive/MyDrive/cleaned_dataset.csv'
df.to_csv(cleaned_path, index=False)

In [11]:
log_path = '/content/drive/MyDrive/cleaning_log.txt'

with open(log_path, 'w') as f:
    for item in log:
        f.write(item + '\n')

print("Cleaning completed successfully!")

Cleaning completed successfully!
