In [14]:
import numpy as np
from faker import Faker,Factory
import pandas as pd
import os
from datetime import datetime
import random

fake = Factory.create("de_DE")

In [15]:
def generate_seed():
    now = datetime.now()
    #Calculate seed number from a few time data
    seed = (now.day * now.minute * now.second * now.month * now.year * now.hour) / now.microsecond 
    #If a negative or 0 value is the result, a "simpler" replacement seed is generated. 
    if(seed <= 0):
        seed = now.day * (now.minute + 1)
    return seed

In [16]:
def create_fake_data(num = 10, seed = 123):
    #Setting the seed for the probability functions
    np.random.seed(seed)
    fake.seed_instance(seed)
    #Defining the Output Array
    output = []
    #Loop over the number of personal data to be created
    for x in range(num):
        age_part = np.random.choice([0,1,2], p=[0.33,0.38,0.29])
        age = random.randint(20,39) if(age_part == 0) else random.randint(40,59) if(age_part == 1) else random.randint(60,79)
        politics = np.random.choice(["Links", "Mitte", "Rechts"], p=[0.1545, 0.722, 0.1235])
        if(age <= 29):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.425,0.081,0.117,0.071,0.059,0.003,0.244])
        elif(age <= 39):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.452,0.091,0.059,0.052,0.157,0.016,0.173])
        elif(age <= 49):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.516,0.097,0.014,0.011,0.179,0.017,0.166])
        elif(age <= 59):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.559,0.113,0.005,0.003,0.159,0.014,0.147])
        else:
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.544,0.096,0.002,0.001,0.132,0.012,0.213])
        social = np.random.choice([0,1,2,3], p=[0.6,0.2,0.15,0.05]) if(politics=="Mitte") else np.random.choice([0,1,2,3], p=[0.4,0.25,0.25,0.1]) if(politics=="Links") else np.random.choice([0,1,2,3], p=[0.75,0.15,0.09,0.01])
        location = np.random.choice(["Großstadt","Kleinstadt","Vorort","Ländlich"], p=[0.55,0.25,0.05,0.15])
        co2 = round(random.uniform(4,12),2)
        output.append(
            {
                #Name of the person
                "Name": fake.first_name(),
                "Alter": age,
                "Politische Orientierung": politics,
                "Bildungsabschluss": grad,
                "Soziales": social,
                "Wohnlage": location,
                "Co2-Fußabdruck": co2
            }  
        )
    #Return of the output array with one person per entry
    return output

In [17]:
def generate_data(data_count):
    seed = generate_seed()
    df = pd.DataFrame(create_fake_data(data_count,int(seed)))
    return df

In [18]:
data = generate_data(10000)
data.to_csv("Daten_Szenario2.csv", sep=';', encoding='utf-8', index=False)