In [12]:
import numpy as np
from faker import Faker,Factory
import pandas as pd
import os
from datetime import datetime
import random

fake = Factory.create("de_DE")

In [13]:
def generate_seed():
    now = datetime.now()
    #Calculate seed number from a few time data
    seed = (now.day * now.minute * now.second * now.month * now.year * now.hour) / now.microsecond 
    #If a negative or 0 value is the result, a "simpler" replacement seed is generated. 
    if(seed <= 0):
        seed = now.day * (now.minute + 1)
    return seed

In [14]:
def create_fake_data(num = 10, seed = 123):
    #Setting the seed for the probability functions
    np.random.seed(seed)
    fake.seed_instance(seed)
    #Defining the Output Array
    output = []
    #Loop over the number of personal data to be created
    for x in range(num):
        age_part = np.random.choice([0,1,2], p=[0.33,0.38,0.29])
        age = random.randint(20,39) if(age_part == 0) else random.randint(40,59) if(age_part == 1) else random.randint(60,79)
        politics = np.random.choice(["Links", "Mitte", "Rechts"], p=[0.1545, 0.722, 0.1235])
        if(age <= 29):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.425,0.081,0.117,0.071,0.059,0.003,0.244])
        elif(age <= 39):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.452,0.091,0.059,0.052,0.157,0.016,0.173])
        elif(age <= 49):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.516,0.097,0.014,0.011,0.179,0.017,0.166])
        elif(age <= 59):
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.559,0.113,0.005,0.003,0.159,0.014,0.147])
        else:
            grad = np.random.choice(["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"], p=[0.544,0.096,0.002,0.001,0.132,0.012,0.213])
        social = np.random.choice([0,1,2,3], p=[0.6,0.2,0.15,0.05]) if(politics=="Mitte") else np.random.choice([0,1,2,3], p=[0.4,0.25,0.25,0.1]) if(politics=="Links") else np.random.choice([0,1,2,3], p=[0.75,0.15,0.09,0.01])
        location = np.random.choice(["Großstadt","Kleinstadt","Vorort","Ländlich"], p=[0.55,0.25,0.05,0.15])
        co2 = round(random.uniform(4,12),2)
        output.append(
            {
                #Name of the person
                "Name": fake.first_name(),
                "Alter": age,
                "Politische Orientierung": politics,
                "Bildungsabschluss": grad,
                "Soziales": social,
                "Wohnlage": location,
                "CO2-Fußabdruck": co2
            }  
        )
    #Return of the output array with one person per entry
    return output

In [15]:
def generate_data(data_count):
    seed = generate_seed()
    df = pd.DataFrame(create_fake_data(data_count,int(seed)))
    return df

In [16]:
additional = {
    "Name": "Random",
    "Alter": "20-79"
}
influential = {
    "Politische Orientierung": ["Links", "Mitte", "Rechts"],
    "Bildungsabschluss": ["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"],
    "Soziales": [0,1,2,3],
    "Wohnlage": ["Großstadt","Kleinstadt","Vorort","Ländlich"],
    "CO2-Fußabdruck": [4,5,6,7,8,9,10,11,12]
}
bias = {
    "Alter": "20-79",
    "Politische Orientierung": ["Links", "Mitte", "Rechts"],
    "Bildungsabschluss": ["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"],
    "Soziales": [0,1,2,3],
    "Wohnlage": ["Großstadt","Kleinstadt","Vorort","Ländlich"],
    "CO2-Fußabdruck": [4,5,6,7,8,9,10,11,12]
}
def create_Rules():
    influential = {
        "Politische Orientierung": [-50, 50, -50],
        "Bildungsabschluss": [20, 40, 60, 80, 100, 120,-120],
        "Soziales": [-70,0,40,100],
        "Wohnlage": [-30,20,20,-30],
        "CO2-Fußabdruck": [110,70,40,10,0,-20,-40,-70,-110]
    }
    return influential

In [17]:
class Evaluator:
    #Creating a evaluator with its own rules and a bias or not
    def __init__(self, rules, bias, bias_neg=200):
        self.rules = rules
        self.bias = bias
        self.bias_neg = bias_neg
    #Function to evaluate a submitted person with or without bias
    def rate(self, influential, person, bias):
        rate = 1000
        for key in self.rules.keys():
            if(key == "CO2-Fußabdruck"):
                value_of_key = int(person[key])
                index = influential[key].index(value_of_key)
                rate += self.rules[key][index]
            else:
                value_of_key = person[key]
                index = influential[key].index(value_of_key)
                rate += self.rules[key][index]
        if(self.bias):
            for b in bias:
                if(b == "Alter"):
                    under = int(bias[b].split('-')[0])
                    upper = int(bias[b].split('-')[1])
                    if(person[b]<=upper and person[b]>=under):
                        if((rate-self.bias_neg)<600):
                            rate = 600
                        else:
                            rate-=self.bias_neg
                else:
                    if(bias[b].__contains__(person[b])):
                        if((rate-self.bias_neg)<600):
                            rate = 600
                        else:
                            rate-=self.bias_neg
        person["Bewertung"] = rate
        return person

In [18]:
def work(df, bias, evaluator_count, bias_evaluator, bias_neg):
    #Values in the personprofile which only serve as filler and are therefore irrelevant
    additional = {
        "Name": "Random",
        "Alter": "20-79"
    }
    #Values in the personprofile which influence the evaluation
    influential = {
        "Politische Orientierung": ["Links", "Mitte", "Rechts"],
        "Bildungsabschluss": ["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"],
        "Soziales": [0,1,2,3],
        "Wohnlage": ["Großstadt","Kleinstadt","Vorort","Ländlich"],
        "CO2-Fußabdruck": [4,5,6,7,8,9,10,11,12]
    }
    #Values in the personprofile which can have an effect on the evaluation as a bias
    personprofile_bias = {
        "Alter": "20-79",
        "Politische Orientierung": ["Links", "Mitte", "Rechts"],
        "Bildungsabschluss": ["Ausbildung", "Fachschulabschluss", "Bachelor", "Master", "Diplom", "Promotion","ohne"],
        "Soziales": [0,1,2,3],
        "Wohnlage": ["Großstadt","Kleinstadt","Vorort","Ländlich"],
        "CO2-Fußabdruck": [4,5,6,7,8,9,10,11,12]
    }
    #Create the rules and save them
    rules = create_Rules()
    #Create the number of evaluators
    evaluator = []
    for x in range(evaluator_count):
        evaluator.append(Evaluator(rules=rules, bias=False))
    #Convert the number of evaluators specified as parameters to evaluators with a bias
    for x in range(bias_evaluator):
        evaluator[x].bias = True
        evaluator[x].bias_neg = bias_neg
    #Random number of evaluator selections
    i = 0
    #The final evaluated persons
    finished_persons = []
    #For each person in the dataframe
    for index, r in df.iterrows():
        #Determine a random evaluator from all the evaluators
        i = random.randint(0, evaluator_count-1)
        #Have the personprofile evaluated and saved
        person = evaluator[i].rate(influential, r.copy(), bias)
        #Add the finished personprofile to the array
        finished_persons.append(person)
    #Save the finished personprofiles as a data frame and return them
    newdf = pd.DataFrame(finished_persons)
    return newdf

In [19]:
age_sample = {
    "Alter": ["30-40"]
}
bias = {
    "Politische Orientierung": ["Links"]
}
data = generate_data(10000)
finished = work(df=data,bias=bias,evaluator_count=10,bias_evaluator=4,bias_neg=200)
data.to_csv("Daten_Szenario2.csv", sep=';', encoding='utf-8', index=False)
finished.to_csv("Daten_Bewertet_Szenario2.csv", sep=';', encoding='utf-8', index=False)