In [1]:
import pandas as pd
import numpy as np

In [2]:
# Existing data
data = {
    'Dependents': [1.0, 0.0, 0.0, 0.0, 2.0],
    'ApplicantIncome': [4583, 3000, 2583, 6000, 5417],
    'CoapplicantIncome': [1508.0, 0.0, 2358.0, 0.0, 4196.0],
    'LoanAmount': [128000, 66000, 120000, 141000, 267000],
    'Loan_Amount_Term': [360.0, 360.0, 360.0, 360.0, 360.0],
    'Credit_History': [1.0, 1.0, 1.0, 1.0, 1.0],
    'Gender_Female': [False, False, False, False, False],
    'Gender_Male': [True, True, True, True, True],
    'Married_No': [False, False, False, True, False],
    'Married_Yes': [True, True, True, False, True],
    'Education_Graduate': [True, True, False, True, True],
    'Education_Not Graduate': [False, False, True, False, False],
    'Self_Employed_No': [True, False, True, True, False],
    'Self_Employed_Yes': [False, True, False, False, True],
    'Property_Area_Rural': [False, False, False, False, False],
    'Property_Area_Semiurban': [False, False, False, False, False],
    'Property_Area_Urban': [True, True, True, True, True]
}

In [3]:
# Create DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,1.0,4583,1508.0,128000,360.0,1.0,False,True,False,True,True,False,True,False,False,False,True
1,0.0,3000,0.0,66000,360.0,1.0,False,True,False,True,True,False,False,True,False,False,True
2,0.0,2583,2358.0,120000,360.0,1.0,False,True,False,True,False,True,True,False,False,False,True
3,0.0,6000,0.0,141000,360.0,1.0,False,True,True,False,True,False,True,False,False,False,True
4,2.0,5417,4196.0,267000,360.0,1.0,False,True,False,True,True,False,False,True,False,False,True


In [4]:
# Add 10 rows with balanced gender distribution and randomized values
additional_data = {
    'Dependents': np.random.choice([0.0, 1.0, 2.0, 3.0], size=10),
    'ApplicantIncome': np.random.randint(2000, 20000, size=10),
    'CoapplicantIncome': np.random.randint(0, 10000, size=10),
    'LoanAmount': np.random.randint(50000, 300000, size=10),
    'Loan_Amount_Term': np.random.choice([360.0, 180.0, 240.0, 120.0], size=10),
    'Credit_History': np.random.choice([0.0, 1.0], size=10),
    'Gender_Female': np.random.choice([True, False], size=10, p=[0.5, 0.5]),
    'Gender_Male': np.random.choice([True, False], size=10, p=[0.5, 0.5]),
    'Married_No': np.random.choice([True, False], size=10),
    'Married_Yes': np.random.choice([True, False], size=10),
    'Education_Graduate': np.random.choice([True, False], size=10),
    'Education_Not Graduate': np.random.choice([True, False], size=10),
    'Self_Employed_No': np.random.choice([True, False], size=10),
    'Self_Employed_Yes': np.random.choice([True, False], size=10),
    'Property_Area_Rural': np.random.choice([True, False], size=10),
    'Property_Area_Semiurban': np.random.choice([True, False], size=10),
    'Property_Area_Urban': np.random.choice([True, False], size=10)
}

In [5]:
# Concatenate additional data to the existing DataFrame
df_additional = pd.DataFrame(additional_data)
df = pd.concat([df, df_additional], ignore_index=True)
df

Unnamed: 0,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,1.0,4583,1508.0,128000,360.0,1.0,False,True,False,True,True,False,True,False,False,False,True
1,0.0,3000,0.0,66000,360.0,1.0,False,True,False,True,True,False,False,True,False,False,True
2,0.0,2583,2358.0,120000,360.0,1.0,False,True,False,True,False,True,True,False,False,False,True
3,0.0,6000,0.0,141000,360.0,1.0,False,True,True,False,True,False,True,False,False,False,True
4,2.0,5417,4196.0,267000,360.0,1.0,False,True,False,True,True,False,False,True,False,False,True
5,1.0,14251,497.0,74788,240.0,1.0,True,True,False,True,True,True,False,False,True,False,True
6,0.0,5080,2135.0,262433,360.0,1.0,False,True,False,True,True,True,False,True,True,True,True
7,1.0,13186,4717.0,204606,240.0,0.0,False,False,True,False,False,True,False,False,False,False,False
8,3.0,8283,2750.0,272350,120.0,1.0,True,False,False,True,False,False,False,False,False,True,False
9,3.0,5078,2066.0,261516,360.0,1.0,True,True,False,False,False,True,False,True,False,True,True


In [6]:
# Randomize the DataFrame
np.random.seed(42)  # Set seed for reproducibility
df = df.apply(np.random.permutation, axis=0)
df

Unnamed: 0,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Gender_Female,Gender_Male,Married_No,Married_Yes,Education_Graduate,Education_Not Graduate,Self_Employed_No,Self_Employed_Yes,Property_Area_Rural,Property_Area_Semiurban,Property_Area_Urban
0,3.0,7534,0.0,120000,240.0,1.0,False,True,False,False,False,True,True,True,False,True,True
1,0.0,4378,4096.0,204606,120.0,1.0,False,True,False,True,True,False,True,False,True,False,True
2,1.0,4583,4196.0,144107,360.0,0.0,True,True,False,False,False,False,False,True,False,True,True
3,3.0,8283,6952.0,128000,360.0,1.0,False,True,False,True,True,False,False,False,False,True,True
4,1.0,5078,4173.0,161148,360.0,1.0,False,True,True,False,True,True,True,False,False,False,False
5,3.0,5080,4717.0,84900,120.0,1.0,False,True,False,True,True,True,True,False,False,False,False
6,0.0,7423,2135.0,74788,360.0,1.0,True,False,False,False,False,True,False,False,True,True,True
7,0.0,6000,0.0,261516,240.0,1.0,False,True,True,True,True,False,False,False,False,False,True
8,0.0,18353,2358.0,83112,360.0,0.0,True,True,False,True,False,False,False,False,False,False,True
9,2.0,3000,1508.0,141000,240.0,1.0,True,False,True,True,True,False,False,False,False,True,True


In [7]:
# Save DataFrame to csv
df.to_csv('test.csv', index=False)