In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_rows', 500)

In [3]:
def first_letter(series):
    return series.str.get(0).str.upper()

In [4]:
def make_first_letter_index(df, col):
    return df.assign(first_letter = lambda x: first_letter(x[col])).set_index("first_letter")

In [5]:
countries_df = pd.read_csv("countries_chosen.csv").pipe(make_first_letter_index, 'country')

In [6]:
nato_string = "Alpha, Bravo, Charlie, Delta, Echo, Foxtrot, Golf, Hotel, India, Juliett, Kilo, Lima, Mike, November, Oscar, Papa, Quebec, Romeo, Sierra, Tango, Uniform, Victor, Whiskey, X-ray, Yankee, Zulu"

In [7]:
nato_alphabet = [letter.strip() for letter in nato_string.split(',')]

In [8]:
alpha_df = pd.DataFrame({"alphabet": nato_alphabet}).pipe(make_first_letter_index, "alphabet")

In [9]:
baby_names_raw = pd.read_csv("yob2018.txt", header=None, names=["Name", "Gender", "Count"])

In [10]:
names_df = baby_names_raw.drop_duplicates(["Name"]).\
               assign(first_letter = lambda x: first_letter(x.Name)).\
               sort_values(["Gender", "first_letter", "Count"], ascending=[True, True, False]).\
               groupby(["Gender", "first_letter"]).\
               first()

In [11]:
male_names_df = names_df.loc["M"]
female_names_df = names_df.loc["F"]

In [12]:
def join_rename(df, new_df, rename_dict):
    return df.join(new_df[rename_dict.keys()].rename(columns=rename_dict))

In [13]:
hosting_types = ["On-prem", "GDC", "CC"]
class_types = ["Unclassified (Official Open)", "Unclassified (Official Closed)", "Restricted", "Confidential"]
status_types = ["In progress", "POC", "deployed"]

In [14]:
def create_cycle(iterable, length):
    return [iterable[i%len(iterable)] for i in range(length)]

In [15]:
def create_cycle_col(df, iterable, name):
    kwargs = {name: create_cycle(iterable, len(df))}
    return df.assign(**kwargs)

In [16]:
system_df = countries_df.rename(columns={"country": "FULL_NAME",
                                         "code": "SHORT_NAME"}).\
             pipe(join_rename, female_names_df, {"Name": "DEPT_OWNER"}).\
             pipe(join_rename, male_names_df, {"Name": "IDTD_REP"}).\
             reset_index().\
             rename(columns={"first_letter": "ID"}).\
             pipe(create_cycle_col, nato_alphabet[:5], "DEPT").\
             pipe(create_cycle_col, hosting_types, "HOSTING_MODEL").\
             pipe(create_cycle_col, class_types, "CLASSIFICATION").\
             pipe(create_cycle_col, status_types, "STATUS")
 

In [17]:
system_df

Unnamed: 0,ID,FULL_NAME,SHORT_NAME,DEPT_OWNER,IDTD_REP,DEPT,HOSTING_MODEL,CLASSIFICATION,STATUS
0,A,Australia,AUS,Ava,Abraham,Alpha,On-prem,Unclassified (Official Open),In progress
1,B,Belgium,BEL,Brooklyn,Bryan,Bravo,GDC,Unclassified (Official Closed),POC
2,C,Canada,CAN,Charlotte,Calvin,Charlie,CC,Restricted,deployed
3,D,Denmark,DNK,Delilah,Dominic,Delta,On-prem,Confidential,In progress
4,E,Estonia,EST,Emma,Ezekiel,Echo,GDC,Unclassified (Official Open),POC
5,F,France,FRA,Faith,Francisco,Alpha,CC,Unclassified (Official Closed),deployed
6,G,Greece,GRC,Grace,George,Bravo,On-prem,Restricted,In progress
7,H,Hungary,HUN,Harper,Hector,Charlie,GDC,Confidential,POC
8,I,Italy,ITA,Isabella,Ivan,Delta,CC,Unclassified (Official Open),deployed
9,J,Japan,JPN,Josephine,Joshua,Echo,On-prem,Unclassified (Official Closed),In progress


In [18]:
system_df.to_csv("../systems_gen.csv", index=False)

In [129]:
letters = [alphabet[0] for alphabet in nato_alphabet]

In [130]:
protocol_types = ["REST API", "SFTP", "DB Query"]

In [131]:
def make_edges(letters, size, protocol_types):
    edge_list = []
    for i in range(size):
        _from = np.random.choice(letters)
        _to = np.random.choice([letter for letter in letters if letter!=_from])
        edge_list.append((_from, _to))
    return edge_list

In [139]:
np.random.seed(1)

edge_df = pd.DataFrame(make_edges(letters, 50, protocol_types), columns=["FROM", "TO"]).\
             pipe(create_cycle_col, protocol_types, "PROTOCOL")

edge_df.to_csv("../links_gen.csv", index=False)