# Piotr Kaczmarczyk - factory generator project

# Imports and global objects for main.py.

In [6]:
from faker import Faker
import pandas as pd
import numpy as np

fake = Faker(['it_IT'])

# Data generators.

In [4]:
def workers(iterations):
    groups = [fake.color_name(), fake.color_name(), fake.color_name(), fake.color_name(), fake.color_name()]
    full_time = round(iterations / 2)
    half_time = round(iterations / 3)
    part_time = round(iterations - full_time - half_time)
    workers_dictionary = []

    for i in range(iterations):
        if full_time > 0:
            work_type = "Full time"
            full_time = full_time - 1
        elif half_time > 0:
            work_type = "Half time"
            half_time = half_time - 1
        elif part_time > 0:
            work_type = "Part time"
            part_time = part_time - 1

        workers_dictionary.append([fake.random_int() * fake.random_int(1, 5),
                                   fake.name(),
                                   groups[fake.random_int(0, 4)],
                                   fake.date(),  # date default is -30 years
                                   work_type
                                   ])

    workers_df = pd.DataFrame(workers_dictionary,
                              columns=['id', 'name', 'team', 'date_of_employment', 'employment_type'])
    return workers_df


def parts(iterations):
    steps = {}
    for i in range(iterations):
        steps[i] = {id(fake.random_int()): [np.random.gamma(2, 2, 1)[0],
                                            np.random.normal(2, 2, 1)[0],
                                            np.random.exponential(2, 1)[0]
                                            ]}
    return steps


def item_list(worker, part):
    items = []

    for index_worker in range(len(worker)):
        index_item = 1
        if worker['employment_type'][index_worker] == "Full time":
            amount = fake.random_int(500, 1000)
        elif worker['employment_type'][index_worker] == "Half time":
            amount = fake.random_int(100, 500)
        else:
            amount = fake.random_int(1, 1000)
        # index_item = 1
        for index_item in range(amount):
            items.append([
                str(part[index_item].keys())[11:24] + " - " + str(index_worker),
                part[index_item][int(str(part[index_item].keys())[11:24])][0],
                part[index_item][int(str(part[index_item].keys())[11:24])][1],
                part[index_item][int(str(part[index_item].keys())[11:24])][2],
                index_worker
            ])
            if index_item >= len(part) - 1:
                break

    amount_df = pd.DataFrame(items, columns=['item_number', 'step_1', 'step_2', 'step_3', 'employee_id'])
    return amount_df


def parameters_of_data(data_set):
    print("\nHead function ")
    print(data_set.head())
    print("\nNunique function")
    print(data_set.nunique())
    print("\nIsnull function")
    print(data_set.isnull())
    print("\nDescribe function")
    print(data_set.describe())
    print("\nInfo function")
    print(data_set.info())

# Main function, execution of data creators

In [5]:
if __name__ == '__main__':
    company = workers(1000)
    parts = parts(1000)
    itemProduction = item_list(company, parts)

    company.to_csv("workers.csv")
    itemProduction.to_csv("widgets.csv")

    workers = pd.read_csv("workers.csv")
    widgets = pd.read_csv("widgets.csv")

    parameters_of_data(workers)


Head function 
   Unnamed: 0     id                     name         team date_of_employment  \
0           0  27088       Pomponio Boccaccio    DarkGreen         2004-05-02   
1           1   1442     Costantino Giradello  FloralWhite         1983-08-13   
2           2  19836  Tiziana Martucci-Florio    DarkGreen         1990-02-23   
3           3  25299   Dott. Carolina Ferrara       Maroon         1980-04-06   
4           4   5780       Silvio Bonaventura   BlueViolet         2006-09-05   

  employment_type  
0       Full time  
1       Full time  
2       Full time  
3       Full time  
4       Full time  

Nunique function
Unnamed: 0            1000
id                     966
name                  1000
team                     5
date_of_employment     973
employment_type          3
dtype: int64

Isnull function
     Unnamed: 0     id   name   team  date_of_employment  employment_type
0         False  False  False  False               False            False
1         False  Fa

# Part 2 - charts

In [7]:
import pandas as pd
import altair as alt

workers = pd.read_csv('workers.csv')
widgets = pd.DataFrame(pd.read_csv('widgets.csv'))

# chart1

In [9]:
data = pd.DataFrame(workers, columns=['date_of_employment'])
data = pd.DataFrame(workers.value_counts(ascending=False).iloc[:25])
alt.Chart(data.reset_index().rename(columns={0: "counts"})).mark_bar(orient='vertical').encode(
    x='date_of_employment',
    y='counts', color='counts'
)

  for col_name, dtype in df.dtypes.iteritems():


# chart2

In [10]:
data = pd.DataFrame(workers, columns=['date_of_employment'])
data = data.groupby(data['date_of_employment'].map(lambda x: pd.Timestamp(x).year)).count()
data = data.rename(columns={'date_of_employment': 'counts'})
data["counts"] = data["counts"].astype('int32')
data = data.reset_index(level=0)
alt.Chart(data).mark_bar(orient='vertical').encode(
    alt.X('date_of_employment'), y='counts', color='counts'
)

  for col_name, dtype in df.dtypes.iteritems():


# chart3

In [11]:
data = pd.DataFrame(workers, columns=['id', 'employment_type'])
data = data.groupby(['employment_type']).count()
data = data.rename(columns={'id': 'counts'})
data["counts"] = data["counts"].astype('int32')
data = data.reset_index(level=0)
alt.Chart(data).mark_arc().encode(
    color='employment_type', theta='counts'
)

  for col_name, dtype in df.dtypes.iteritems():


# chart4

In [12]:
data = pd.DataFrame(workers, columns=['id', 'team'])
data = data.groupby(['team']).count()
data = data.rename(columns={'id': 'counts'})
data["counts"] = data["counts"].astype('int32')
data = data.reset_index(level=0)
alt.Chart(data).mark_bar(orient='horizontal').encode(
    x='counts', y='team', color='counts'
)

  for col_name, dtype in df.dtypes.iteritems():


# chart5

In [13]:
data = pd.DataFrame(workers, columns=['id', 'team', 'employment_type'])
data = data.groupby(['team', 'employment_type']).count()
data = data.rename(columns={'id': 'counts'})
data["counts"] = data["counts"].astype('int32')
data = data.reset_index(level=0)
data = data.reset_index(level=0)
alt.Chart(data).mark_bar(orient='horizontal').encode(
    column="team",
    x='employment_type', y='counts', color='counts'
)

  for col_name, dtype in df.dtypes.iteritems():


# chart6

In [14]:
df_elements = pd.DataFrame(pd.read_csv('widgets.csv'), columns=['step_1', 'step_2', 'step_3'])[:5000]
df_elements = df_elements.reset_index(level=0)
alt.Chart(df_elements).mark_bar(orient='horizontal').encode(
    x=alt.X('step_1', bin=True), y='count()'
)

  for col_name, dtype in df.dtypes.iteritems():


In [15]:
df_elements = pd.DataFrame(pd.read_csv('widgets.csv'), columns=['step_1', 'step_2', 'step_3'])[:5000]
df_elements = df_elements.reset_index(level=0)
alt.Chart(df_elements).mark_bar(orient='horizontal').encode(
    alt.X('step_2', bin=True), y='count()'
)

  for col_name, dtype in df.dtypes.iteritems():


In [16]:
df_elements = pd.DataFrame(pd.read_csv('widgets.csv'), columns=['step_1', 'step_2', 'step_3'])[:5000]
df_elements = df_elements.reset_index(level=0)
alt.Chart(df_elements).mark_bar(orient='horizontal').encode(
    alt.X('step_3', bin=True), alt.Y('count()'),
)

  for col_name, dtype in df.dtypes.iteritems():


# chart7

In [17]:
data = pd.DataFrame(pd.read_csv('workers.csv'))
widgets = pd.DataFrame(pd.read_csv('widgets.csv'))
data_merged = data.merge(widgets, left_on='id', right_on='employee_id')
data_merged = data_merged.groupby(['team']).count()
data_merged = data_merged.rename(columns={'id': 'counts'})
data_merged["counts"] = data_merged["counts"].astype('int32')
data_merged = data_merged.reset_index(level=0)
alt.Chart(data_merged).mark_bar(orient='horizontal').encode(
    x='team', y='counts', color='counts'
)

  for col_name, dtype in df.dtypes.iteritems():


# chart8

In [18]:
data = pd.DataFrame(pd.read_csv('workers.csv'))
widgets = pd.DataFrame(pd.read_csv('widgets.csv'))
data_merged = data.merge(widgets, left_on='id', right_on='employee_id')
data_merged = data_merged.groupby(['employment_type']).count()
data_merged = data_merged.rename(columns={'id': 'counts'})
data_merged["counts"] = data_merged["counts"].astype('int32')
data_merged = data_merged.reset_index(level=0)
alt.Chart(data_merged).mark_bar(orient='horizontal').encode(
    x='employment_type', y='counts', color='counts'
)

  for col_name, dtype in df.dtypes.iteritems():


# chart9

In [19]:
data = pd.DataFrame(pd.read_csv('workers.csv'))
widgets = pd.DataFrame(pd.read_csv('widgets.csv'))
data_merged = data.merge(widgets, left_on='id', right_on='employee_id')
data_merged = data_merged.groupby(['name']).count()
data_merged = data_merged.reset_index(level=0)
data_merged = pd.DataFrame(data_merged, columns=['name', 'id'])
data_merged = data_merged.rename(columns={'id': 'counts'})
data_merged["counts"] = data_merged["counts"].astype('int32')
alt.Chart(data_merged.nsmallest(20, columns=['counts'])).mark_bar(orient='horizontal').encode(
    x='counts', y='name', color='counts'
)

  for col_name, dtype in df.dtypes.iteritems():
