In [6]:
import pandas as pd
import numpy as np

def create_dataframe(size=100, null_percentage=0.1):
    # Ensure the null_percentage is between 0 and 1
    if null_percentage < 0 or null_percentage > 1:
        raise ValueError("null_percentage must be between 0 and 1.")
    
    # Creating the DataFrame
    data = {
        'ID': range(1, size + 1),
        'Temperature': np.random.uniform(20, 35, size),
        'City': np.random.choice(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'], size=size),
        'Date': pd.date_range(start='2022-01-01', periods=size, freq='D'),
        'Is_Raining': np.random.choice([True, False], size=size),
        'Visitors': np.random.randint(100, 1000, size=size),
        'Sales': np.random.uniform(1000.5, 5000.5, size=size),
        'Category': np.random.choice(['A', 'B', 'C', 'D'], size=size)
    }

    df = pd.DataFrame(data)
    
    # Introducing null values into the DataFrame
    # Select columns to introduce null values into, excluding ID to maintain its integrity
    columns_to_nullify = ['Temperature', 'City', 'Is_Raining', 'Visitors', 'Sales', 'Category']
    
    for column in columns_to_nullify:
        # Determine the number of nulls based on the specified percentage
        num_nulls = int(size * null_percentage)
        # Randomly select indices to be nullified
        null_indices = np.random.choice(df.index, num_nulls, replace=False)
        df.loc[null_indices, column] = np.nan

    # Save the DataFrame to a CSV file
    file_path = 'sample_eda_dataframe_with_nulls.csv'
    df.to_csv(file_path, index=False)

    return f"DataFrame saved to '{file_path}'", df.head()

# Example usage: Create a DataFrame of size 200 with 10% random null values
create_dataframe(200, 0)


("DataFrame saved to 'sample_eda_dataframe_with_nulls.csv'",
    ID  Temperature         City       Date  Is_Raining  Visitors        Sales  \
 0   1    22.946837  Los Angeles 2022-01-01        True       617  4821.087864   
 1   2    24.674964      Phoenix 2022-01-02       False       135  4774.444448   
 2   3    27.606562      Chicago 2022-01-03        True       589  2754.554135   
 3   4    29.729000      Houston 2022-01-04        True       901  4906.395432   
 4   5    24.242245      Phoenix 2022-01-05       False       112  4649.048857   
 
   Category  
 0        C  
 1        D  
 2        C  
 3        D  
 4        B  )