In [1]:
import pandas as pd

# Load the dataset
file_path = r'bank-full.csv'
data = pd.read_csv(file_path, delimiter=';')

In [None]:
# Check uniqueness of each row using all columns to see if a composite key is necessary
if data.duplicated().sum() == 0:
    print("Each row is unique, indicating that the whole record set may act as a primary key if no simpler key is found.")
else:
    print("Some rows are duplicates and should be investigated or removed.")

In [None]:
# Check potential candidate for primary key by counting unique values in combinations of columns
candidate_keys = ['day', 'month', 'duration', 'campaign', 'pdays', 'previous']
combinations = pd.Series(data[candidate_keys].apply(lambda row: '_'.join(row.values.astype(str)), axis=1))
if combinations.is_unique:
    print("The combination of day, month, duration, campaign, pdays, and previous can be a candidate for primary key.")
else:
    print("The chosen combination is not unique.")


In [4]:
# Handling potential transitive dependency for 3NF normalization
# Extracting unique mapping of pdays, previous to poutcome
poutcome_mapping = data[['pdays', 'previous', 'poutcome']].drop_duplicates()
data.drop(columns='poutcome', inplace=True)  # Optionally remove poutcome from the main table if normalization is required

In [5]:
# Save the normalized tables if needed
data.to_csv(r'Final tables\main-table.csv', index=False)
poutcome_mapping.to_csv(r'Final tables\p-outcome-table.csv', index=False)

In [None]:
print("Normalization steps completed. Main data and poutcome mapping have been saved separately.")