In [None]:
import numpy as np
import pandas as pd

In [None]:
# Step 1: Create a sample DataFrame
data = {
    'ID': [101, 102, 103, 104, 105],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [25, 30, 35, 40, 45],
    'Salary': [50000, 60000, 70000, 80000, 90000],
    'Department': ['HR', 'IT', 'Finance', 'Marketing', 'Operations']
}

df = pd.DataFrame(data)
df

In [None]:
# Step 2: Adding new columns with more variability
df['Experience'] = df['Age'] - 22 + np.random.randint(-2, 3, size=len(df))  # Adding randomness to experience
df['Bonus'] = df['Salary'] * (0.05 + np.random.random(len(df)) * 0.10)  # Random bonus between 5% and 15%
df['Seniority Level'] = pd.cut(df['Experience'], bins=[0, 5, 10, 15, 20], labels=['Junior', 'Mid', 'Senior', 'Lead'], right=False)
df

In [None]:
# Step 3: Investigating the DataFrame
print("Full DataFrame:\n", df)
print("\nDataFrame Info:")
print(df.info())
print("\nStatistical Summary:")
print(df.describe())
print("\nMissing Values:")
print(df.isnull().sum())

# Select only numeric columns for the correlation matrix
numeric_df = df.select_dtypes(include=['number'])
print("\nCorrelation Matrix:")
print(numeric_df.corr())


In [None]:
save_checkpoint = True

In [None]:
# Step 4: Full Investigation Report
def full_investigation_report(df):
    report = ""
    report += "Full DataFrame:\n" + df.to_string() + "\n\n"
    report += "DataFrame Info:\n" + str(df.info()) + "\n\n"
    report += "Statistical Summary:\n" + df.describe().to_string() + "\n\n"
    report += "Missing Values:\n" + df.isnull().sum().to_string() + "\n\n"
    report += "Correlation Matrix:\n" + df.select_dtypes(include=['number']).corr().to_string() + "\n"
    return report

# Generate the report
report = full_investigation_report(df)
print(report)

In [None]:
save_checkpoint = True

In [None]:
# Step 5: Save the final DataFrame to a CSV file
df.to_csv('final_dataframe.csv', index=False)