## Dummy dataframe generator

Using Faker package to create a dummy data filled dataframe based off of predetermined columns

In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime
from faker import Faker
from faker.providers import BaseProvider
import json
import streamlit as st
import base64

In [2]:
fake = Faker('en_GB')  

In [3]:
class BaseProvider:
    pass

class Provider(BaseProvider):
    def __init__(self, faker):
        self.fake = faker
        self.start_date = datetime(2023, 1, 1)
        self.end_date = datetime(2024, 12, 1)
        self.cars = ['Audi', 'BMW', 'Ford', 'Honda', 'Jaguar', 'Land Rover', 'Mercedes-Benz', 'Nissan', 'Toyota', 'Volkswagen']
        self.cost_start = 50
        self.cost_end = 200
        self.lobs = ['Hotel', 'Motel', 'Holiday_Inn']
        self.code_start = 2000
        self.code_end = 2200
        self.C_Brand = ['AA', 'AB', 'AC', 'AD']
        self.C_Inter = ['ZA', 'FD', 'VW']
        self.C_Class = [1, 2, 3, 4]
        self.P_Pre = ['A_', 'B_', 'C_', 'D_', 'E_', 'F_', 'G_']
        self.postcode_areas = ['AB', 'AL', 'B', 'BA', 'BB', 'BD', 'BH', 'BL', 'BN', 'BR', 'BS', 'BT', 'CA', 'CB', 'CF', 'CH', 'CM', 'CO', 'CR', 'CT', 'CV', 'CW', 'DA', 'DD', 'DE', 'DG', 'DH', 'DL', 'DN', 'DT', 'DY', 'E', 'EC', 'EH', 'EN', 'EX', 'FK', 'FY', 'G', 'GL', 'GY', 'GU', 'HA', 'HD', 'HG', 'HP', 'HR', 'HS', 'HU', 'HX', 'IG', 'IM', 'IP', 'IV', 'JE', 'KA', 'KT', 'KW', 'KY', 'L', 'LA', 'LD', 'LE', 'LL', 'LN', 'LS', 'LU', 'M', 'ME', 'MK', 'ML', 'N', 'NE', 'NG', 'NN', 'NP', 'NR', 'NW', 'OL', 'OX', 'PA', 'PE', 'PH', 'PL', 'PO', 'PR', 'RG', 'RH', 'RM', 'S', 'SA', 'SE', 'SG', 'SK', 'SL', 'SM', 'SN', 'SO', 'SP', 'SR', 'SS', 'ST', 'SW', 'SY', 'TA', 'TD', 'TF', 'TN', 'TQ', 'TR', 'TS', 'TW', 'UB', 'W', 'WA', 'WC', 'WD', 'WF', 'WN', 'WR', 'WS', 'WV', 'YO', 'ZE']
        self.postcode_units = ['AA', 'AB', 'AD', 'AE', 'AF', 'AG', 'AH', 'AJ', 'AL', 'AN', 'AP', 'AQ', 'AR', 'AS', 'AT', 'AU', 'AW', 'AX', 'AY', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BJ', 'BL', 'BN', 'BP', 'BQ', 'BR', 'BS', 'BT', 'BU', 'BW', 'BX', 'BY', 'BZ', 'DA', 'DB', 'DD', 'DE', 'DF', 'DG', 'DH', 'DJ', 'DL', 'DN', 'DP', 'DQ', 'DR', 'DS', 'DT', 'DU', 'DW', 'DX', 'DY', 'DZ', 'EA', 'EB', 'ED', 'EE', 'EF', 'EG', 'EH', 'EJ', 'EL', 'EN', 'EP', 'EQ', 'ER', 'ES', 'ET', 'EU', 'EW', 'EX', 'EY', 'EZ', 'FA', 'FB', 'FD', 'FE', 'FF', 'FG', 'FH', 'FJ', 'FL', 'FN', 'FP', 'FQ', 'FR', 'FS', 'FT', 'FU', 'FW', 'FX', 'FY', 'FZ', 'GA', 'GB', 'GD', 'GE', 'GF', 'GG', 'GH', 'GJ', 'GL', 'GN', 'GP', 'GQ', 'GR', 'GS', 'GT', 'GU', 'GW', 'GX', 'GY', 'GZ', 'HA', 'HB', 'HD', 'HE', 'HF', 'HG', 'HH', 'HJ', 'HL', 'HN', 'HP', 'HQ', 'HR', 'HS', 'HT', 'HU', 'HW', 'HX', 'HY', 'HZ', 'JA', 'JB', 'JD', 'JE', 'JF', 'JG', 'JH', 'JJ', 'JL', 'JN', 'JP', 'JQ', 'JR', 'JS', 'JT', 'JU', 'JW', 'JX', 'JY', 'JZ', 'LA', 'LB', 'LD', 'LE', 'LF', 'LG', 'LH', 'LJ', 'LL', 'LN', 'LP', 'LQ', 'LR', 'LS', 'LT', 'LU', 'LW', 'LX', 'LY', 'LZ', 'NA', 'NB', 'ND']

    def date(self):        
            """Return random date between the start and end dates."""        
            return self.fake.date_between_dates(
                date_start=self.start_date, date_end=self.end_date).strftime('%Y/%m/%d')

    def car(self):
        """Return a random car from cars."""        
        return random.choice(self.cars)

    def cost(self):
        """Return a random cost between the start and end range."""                
        return random.randrange(self.cost_start, self.cost_end)

    def lob(self):
        """Return a random line of business."""        
        return random.choice(self.lobs)

    def code(self):
        """Return a random code between the start and end range."""        
        return random.randrange(self.code_start, self.code_end)        

    def Generate_C_key(self):
        T_Brand_Code = random.choice(self.C_Brand)
        T_C_Class = random.choice(self.C_Class)
        T_C_Inter = random.choice(self.C_Inter)
        T_C_Digit = random.randrange(100000, 999999)
        Claim_No = f"{T_Brand_Code}/{T_C_Class}/{T_C_Inter}/{T_C_Digit}"
        return Claim_No

    def Generate_P_key(self):
        T_P_Inter = random.choice(self.P_Pre)
        T_P_Digit = random.randrange(10000000, 99999999)
        Policy_No = f"{T_P_Inter}{T_P_Digit}"
        return Policy_No

    def Generate_Postcode(self):
        Postcode_Area = random.choice(self.postcode_areas)
        Postcode_Area_In = random.randrange(0, 10)
        Postcode_Unit_In = random.randrange(0, 9)
        Postcode_Unit = random.choice(self.postcode_units)
        pc = f"{Postcode_Area}{Postcode_Area_In} {Postcode_Unit_In}{Postcode_Unit}"
        return pc


In [4]:
fake = Faker()
fake.add_provider(Provider(fake))

In [5]:
# Generate fake data
fakedata = []
for _ in range(50):
    datasample = {
        'C_Flag': fake.Generate_C_key(),
        'P_Flag': fake.Generate_P_key(),
        'Timestamp': fake.date_time_between(start_date='-1y', end_date='now', tzinfo=None),
        'Name': fake.name(),
        'Address': fake.address(),
        'Postcode': fake.Generate_Postcode(),
        'Car Make': fake.car(),
        'Phone': fake.phone_number(),
        'VRN': fake.license_plate(),
        'Company': fake.company(),
        'Cost': fake.cost(),
        'LoB': fake.lob(),
        'Location': fake.local_latlng(country_code='GB', coords_only=True),
        'NI_Key': fake.ssn(),
    }
    fakedata.append(datasample)

In [6]:
# Convert to DataFrame
fakedata = pd.DataFrame(fakedata)

In [7]:
fakedata.head(5)

Unnamed: 0,C_Flag,P_Flag,Timestamp,Name,Address,Postcode,Car Make,Phone,VRN,Company,Cost,LoB,Location,NI_Key
0,AD/2/FD/225870,A_99411506,2023-06-27 02:49:00,Kathleen Todd DVM,"5155 Lewis Center\nStephenland, OK 20130",TQ6 4GW,Mercedes-Benz,569-375-5927x7501,WV0 S6G,Clark-Johnson,113,Motel,"(51.38673, 0.30367)",305-43-2383
1,AC/2/FD/110232,B_29452959,2024-05-26 04:36:11,Daniel Terry,"9855 Melissa Ports Suite 474\nBlackview, DE 18992",NP5 5FW,Toyota,(998)522-9989,312-UJM,Cruz LLC,133,Hotel,"(53.7446, -0.33525)",491-48-3369
2,AA/3/ZA/399060,A_62357376,2023-08-10 04:55:58,Paige Pearson,"2167 Robert Run Apt. 223\nWest Rodneyborough, ...",BA0 1EN,Audi,(594)219-8787,84O Y35,Schultz PLC,190,Motel,"(50.75767, -1.5443)",248-65-1746
3,AD/2/ZA/300303,A_54245055,2023-10-18 21:15:54,Michael Kemp,"297 Green Station Apt. 639\nAllenberg, NH 97840",DN3 5GX,Audi,432-861-7243x334,LCK 238,"Mathis, Young and Freeman",158,Holiday_Inn,"(51.39148, -0.29825)",827-79-3610
4,AA/4/ZA/351810,G_40117449,2023-12-10 18:57:23,Terry Clark,"184 William Club Suite 223\nGarciafort, NV 14470",CT3 5DN,Land Rover,+1-569-958-5441,440J,Boyd PLC,98,Motel,"(53.81667, -3.05)",453-79-8740


In [8]:
fake_select = fakedata.columns.to_list()

In [9]:
fake_select

['C_Flag',
 'P_Flag',
 'Timestamp',
 'Name',
 'Address',
 'Postcode',
 'Car Make',
 'Phone',
 'VRN',
 'Company',
 'Cost',
 'LoB',
 'Location',
 'NI_Key']