# LOCKED POSITION RANDOMIZED DATA

FILENAME: budget_randomized.ipynb
    
PROJECT: Randomized Data Model

DATE CREATED: 27-MAY-20

DATE UPDATED: 27-MAY-20

## PHASE 1: PROJECT SETUP

Import the necessary libraries needed for ETL, engineering, and export efforts

In [7]:
import pandas as pd
import csv
import random
import sqlite3
import itertools
import numpy as np
import datetime
from itertools import repeat
import time as t
import getpass as gp

In [2]:
def init_array(df_length):
    '''
    DESCRIPTION: A function to create and return a two_dimensional array with randomized float values
    '''
    
    length = df_length
    
    num_of_projects = 20 # can change the amount of projects
    num_of_subprojects = 40 # can change the amount of subprojects
    
    proj_list = ['project' + str(i+1) for i in range(num_of_projects)] # create a list of project names
    
    num_of_proj_records = int(length / num_of_projects)
    proj_values = [x for item in proj_list for x in repeat(item, num_of_project_records)]
    
    subproject_list = ['subproject' + str(i+1) for i in range(num_of_subprojects)]
    
    project_values = []
    subproject_values = []

    for index in range(length):
        proj_val = random.choice(proj_list)
        sb_val = (random.choice(subproject_list))
        project_values.append(proj_val)
        subproject_values.append(subproject_val)
    
    # create randomized budget data
    yr3_forecast= np.random.randint(low = 100000, high = 30000000, size = df_length) 
    yr2_random = np.random.uniform(low=0.5, high=1.3, size=df_length)
    yr2_forecast = np.round(yr3_forecast * yr2_random,2)
    
    yr1_random = np.random.uniform(low=0.8, high=1.2, size=df_length)
    yr1_forecast = np.round(yr2_forecast * yr1_random,2)
    
    plan_random = np.random.uniform(low=0.6, high=1.3, size=df_length)
    plan_val = np.round(yr1_forecast * plan_random,2)
    
    approp_random = np.random.uniform(low=0.6, high=1.2, size=df_length)
    approp_val = np.round(plan_val * approp_random,2)
    
    oblig_random = np.random.uniform(low=0.8, high=1.0, size=df_length)
    oblig_val = np.round(approp_val * oblig_random,2)
    
    raw_df = pd.DataFrame(columns=['project', 'subproject', 'yr+3_forecast','yr+2_forecast','yr+1_forecast','yr0_plan','yr-1_approp','yr-2_oblig'])

    raw_df['project'] = project_values
    raw_df['subproject'] = subproject_values
    raw_df['yr+3_forecast'] = yr3_forecast
    raw_df['yr+2_forecast'] = yr2_forecast
    raw_df['yr+1_forecast'] = yr1_forecast
    raw_df['yr0_plan'] = plan_val
    raw_df['yr-1_approp'] = approp_val
    raw_df['yr-2_oblig'] = oblig_val
    
    return raw_df

# PHASE 2: Function Test

In [3]:
train_df = init_array(10000)
train_df.tail(10)

Unnamed: 0,yr+3_forecast,yr+2_forecast,yr+1_forecast,yr0_plan,yr-1_approp,yr-2_oblig
9990,19711090,20464449.14,17612465.44,21949409.2,23379762.6,21258410.93
9991,3724388,2587960.66,3042774.76,2370460.24,2026023.91,1966468.79
9992,18103875,9231943.01,9916064.31,12449215.62,11181516.05,8946238.91
9993,10846761,13784132.17,14952376.79,10272437.17,9667122.42,8084412.07
9994,835537,497227.14,439529.0,527385.07,363005.97,343002.47
9995,1777124,1660467.1,1703155.16,1401518.69,1302580.5,1177079.0
9996,18497426,15640661.76,17015836.34,10876658.76,9905386.98,8207588.51
9997,4483627,3355848.59,2710380.01,2609478.34,1996525.27,1601747.59
9998,20578065,16161554.3,16445346.95,20308664.42,14807202.67,13934505.52
9999,15277590,17243459.99,16537638.27,14910851.06,10583186.9,9958957.17


# PHASE 3: EXPORT DATA

In [4]:
train_df.to_csv(r'locked_random_data_v1', index = False)

# TEST

In [17]:
length = 10000
num_of_projects = 20
num_of_subprojects = 40

In [18]:
proj_list = ['project' + str(i+1) for i in range(num_of_projects)]
proj_list[-5:]

['project15', 'project16', 'project17', 'project18', 'project19']

In [19]:
proj_values = [x for item in proj_list for x in repeat(item, 20)]
proj_repeat

['project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',

In [5]:
subproject_list = ['subproject' + str(i+1) for i in range(num_of_subprojects)]
subproject_list

['subproject0',
 'subproject1',
 'subproject2',
 'subproject3',
 'subproject4',
 'subproject5',
 'subproject6',
 'subproject7',
 'subproject8',
 'subproject9',
 'subproject10',
 'subproject11',
 'subproject12',
 'subproject13',
 'subproject14',
 'subproject15',
 'subproject16',
 'subproject17',
 'subproject18',
 'subproject19',
 'subproject20',
 'subproject21',
 'subproject22',
 'subproject23',
 'subproject24',
 'subproject25',
 'subproject26',
 'subproject27',
 'subproject28',
 'subproject29',
 'subproject30',
 'subproject31',
 'subproject32',
 'subproject33',
 'subproject34',
 'subproject35',
 'subproject36',
 'subproject37',
 'subproject38',
 'subproject39',
 'subproject40',
 'subproject41',
 'subproject42',
 'subproject43',
 'subproject44',
 'subproject45',
 'subproject46',
 'subproject47',
 'subproject48',
 'subproject49',
 'subproject50',
 'subproject51',
 'subproject52',
 'subproject53',
 'subproject54',
 'subproject55',
 'subproject56',
 'subproject57',
 'subproject58',
 'subpr

In [20]:
subproject_values = []

for index in range(length):
    val = (random.choice(subproject_list))
    subproject_values.append(val)

In [21]:
subproject_values

['subproject26',
 'subproject170',
 'subproject106',
 'subproject102',
 'subproject106',
 'subproject237',
 'subproject37',
 'subproject149',
 'subproject238',
 'subproject7',
 'subproject80',
 'subproject91',
 'subproject163',
 'subproject36',
 'subproject59',
 'subproject184',
 'subproject65',
 'subproject159',
 'subproject52',
 'subproject169',
 'subproject31',
 'subproject208',
 'subproject3',
 'subproject56',
 'subproject61',
 'subproject6',
 'subproject140',
 'subproject233',
 'subproject123',
 'subproject50',
 'subproject118',
 'subproject168',
 'subproject228',
 'subproject117',
 'subproject90',
 'subproject189',
 'subproject203',
 'subproject29',
 'subproject96',
 'subproject164',
 'subproject17',
 'subproject182',
 'subproject164',
 'subproject92',
 'subproject74',
 'subproject5',
 'subproject110',
 'subproject47',
 'subproject187',
 'subproject248',
 'subproject138',
 'subproject157',
 'subproject159',
 'subproject158',
 'subproject72',
 'subproject173',
 'subproject163',
 '