In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = "Test.csv"
df = pd.read_csv(file_path)

# Display basic information about the dataset
print(df.info())
print(df.head())


In [None]:
# I. Summarizing each column (min, max, mean, etc.)
summary_statistics = df.describe()
print(summary_statistics)

In [None]:
# II. Identifying missing values
missing_values = df.isnull().sum()
print("Missing values:\n", missing_values)

In [None]:
# III. Replacing missing values in 'F6' with the mean of the column
df['F6'].fillna(df['F6'].mean(), inplace=True)

# Verify if missing values are handled
print("Missing values after filling:\n", df.isnull().sum())

In [None]:
# IV. Displaying the frequency table of “Class” vs. F6
frequency_table = df.groupby("Class")["F6"].value_counts().unstack()
print("Frequency Table:\n", frequency_table)

In [None]:
# V. Displaying the scatter plot of F1 to F6, one pair at a time
features = ['F1', 'F2', 'F3', 'F4', 'F5', 'F6']

for i in range(len(features)):
    for j in range(i+1, len(features)):
        plt.figure(figsize=(6,4))
        sns.scatterplot(x=df[features[i]], y=df[features[j]])
        plt.xlabel(features[i])
        plt.ylabel(features[j])
        plt.title(f'Scatter Plot of {features[i]} vs {features[j]}')
        plt.show()

In [None]:
# VI. Show histogram and box plot for columns F7 to F9
features_f7_f9 = ['F7', 'F8', 'F9']

for feature in features_f7_f9:
    plt.figure(figsize=(12, 5))

    # Histogram
    plt.subplot(1, 2, 1)
    sns.histplot(df[feature], bins=10, kde=True)
    plt.title(f'Histogram of {feature}')

    # Box plot
    plt.subplot(1, 2, 2)
    sns.boxplot(y=df[feature])
    plt.title(f'Box Plot of {feature}')

    plt.show()

In [None]:
# 2- Delete all objects from Python environment
del df

# Reload the “Test.csv” into Python
df = pd.read_csv(file_path)

# Remove any row with a missing value in any of the columns
df_cleaned = df.dropna()

# Display the cleaned dataset info
print(df_cleaned.info())
print(df_cleaned.head())