# LOCKED POSITION RANDOMIZED DATA

FILENAME: budget_randomized.ipynb
    
PROJECT: Randomized Data Model

DATE CREATED: 27-MAY-20

DATE UPDATED: 27-MAY-20

## PHASE 1: PROJECT SETUP

Import the necessary libraries needed for ETL, engineering, and export efforts

In [7]:
import pandas as pd
import csv
import random
import sqlite3
import itertools
import numpy as np
import datetime
from itertools import repeat
import time as t
import getpass as gp

In [26]:
def init_array(df_length):
    '''
    DESCRIPTION: A function to create and return a two_dimensional array with randomized float values
    '''
    
    length = df_length
    
    num_of_projects = 20 # can change the amount of projects
    num_of_subprojects = 40 # can change the amount of subprojects
    
    proj_list = ['project' + str(i+1) for i in range(num_of_projects)] # create a list of project names
    
    num_of_proj_records = int(length / num_of_projects)
    
    subproject_list = ['subproject' + str(i+1) for i in range(num_of_subprojects)]
    
    project_values = []
    subproject_values = []

    for index in range(length):
        proj_val = random.choice(proj_list)
        sb_val = (random.choice(subproject_list))
        project_values.append(proj_val)
        subproject_values.append(sb_val)
    
    # create randomized budget data
    yr3_forecast= np.random.randint(low = 100000, high = 30000000, size = df_length) 
    yr2_random = np.random.uniform(low=0.5, high=1.3, size=df_length)
    yr2_forecast = np.round(yr3_forecast * yr2_random,2)
    
    yr1_random = np.random.uniform(low=0.8, high=1.2, size=df_length)
    yr1_forecast = np.round(yr2_forecast * yr1_random,2)
    
    plan_random = np.random.uniform(low=0.6, high=1.3, size=df_length)
    plan_val = np.round(yr1_forecast * plan_random,2)
    
    approp_random = np.random.uniform(low=0.6, high=1.2, size=df_length)
    approp_val = np.round(plan_val * approp_random,2)
    
    oblig_random = np.random.uniform(low=0.8, high=1.0, size=df_length)
    oblig_val = np.round(approp_val * oblig_random,2)
    
    raw_df = pd.DataFrame(columns=['project', 'subproject', 'yr+3_forecast','yr+2_forecast','yr+1_forecast','yr0_plan','yr-1_approp','yr-2_oblig'])

    raw_df['project'] = project_values
    raw_df['subproject'] = subproject_values
    raw_df['yr+3_forecast'] = yr3_forecast
    raw_df['yr+2_forecast'] = yr2_forecast
    raw_df['yr+1_forecast'] = yr1_forecast
    raw_df['yr0_plan'] = plan_val
    raw_df['yr-1_approp'] = approp_val
    raw_df['yr-2_oblig'] = oblig_val
    
    return raw_df

# PHASE 2: Function Test

In [27]:
train_df = init_array(10000)
train_df.tail(10)

Unnamed: 0,project,subproject,yr+3_forecast,yr+2_forecast,yr+1_forecast,yr0_plan,yr-1_approp,yr-2_oblig
9990,project15,subproject12,26861083,21595010.55,20230335.28,23310510.6,25840071.04,21136076.72
9991,project3,subproject16,19917730,16266221.19,17306539.72,21727514.08,18418477.41,15854571.4
9992,project7,subproject22,19760325,20856724.45,18934503.86,20464819.22,14077432.33,13607214.98
9993,project4,subproject24,14782333,13950676.61,12386980.59,11936565.46,12626263.07,12345587.01
9994,project16,subproject16,9230533,4837457.92,4817563.41,5843725.98,4971351.03,4564988.3
9995,project10,subproject30,2842457,3120296.08,3617607.43,3272428.52,2711422.96,2320867.54
9996,project12,subproject8,11833728,12151063.08,11133108.91,6698224.31,6954235.69,6612260.15
9997,project1,subproject4,15813214,10059412.11,8318155.12,7123565.32,5412655.12,4713015.47
9998,project1,subproject19,19587843,12077430.71,10487298.25,11055815.78,7586974.81,7237450.56
9999,project19,subproject36,3830931,3631258.06,4258315.76,3658271.9,3239950.12,2773654.04


# PHASE 3: EXPORT DATA

In [4]:
train_df.to_csv(r'locked_random_data_v1', index = False)

# TEST

In [17]:
length = 10000
num_of_projects = 20
num_of_subprojects = 40

In [18]:
proj_list = ['project' + str(i+1) for i in range(num_of_projects)]
proj_list[-5:]

['project15', 'project16', 'project17', 'project18', 'project19']

In [19]:
proj_values = [x for item in proj_list for x in repeat(item, 20)]
proj_repeat

['project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project0',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project1',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project2',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',
 'project3',

In [5]:
subproject_list = ['subproject' + str(i+1) for i in range(num_of_subprojects)]
subproject_list

['subproject0',
 'subproject1',
 'subproject2',
 'subproject3',
 'subproject4',
 'subproject5',
 'subproject6',
 'subproject7',
 'subproject8',
 'subproject9',
 'subproject10',
 'subproject11',
 'subproject12',
 'subproject13',
 'subproject14',
 'subproject15',
 'subproject16',
 'subproject17',
 'subproject18',
 'subproject19',
 'subproject20',
 'subproject21',
 'subproject22',
 'subproject23',
 'subproject24',
 'subproject25',
 'subproject26',
 'subproject27',
 'subproject28',
 'subproject29',
 'subproject30',
 'subproject31',
 'subproject32',
 'subproject33',
 'subproject34',
 'subproject35',
 'subproject36',
 'subproject37',
 'subproject38',
 'subproject39',
 'subproject40',
 'subproject41',
 'subproject42',
 'subproject43',
 'subproject44',
 'subproject45',
 'subproject46',
 'subproject47',
 'subproject48',
 'subproject49',
 'subproject50',
 'subproject51',
 'subproject52',
 'subproject53',
 'subproject54',
 'subproject55',
 'subproject56',
 'subproject57',
 'subproject58',
 'subpr

In [20]:
subproject_values = []

for index in range(length):
    val = (random.choice(subproject_list))
    subproject_values.append(val)

In [21]:
subproject_values

['subproject26',
 'subproject170',
 'subproject106',
 'subproject102',
 'subproject106',
 'subproject237',
 'subproject37',
 'subproject149',
 'subproject238',
 'subproject7',
 'subproject80',
 'subproject91',
 'subproject163',
 'subproject36',
 'subproject59',
 'subproject184',
 'subproject65',
 'subproject159',
 'subproject52',
 'subproject169',
 'subproject31',
 'subproject208',
 'subproject3',
 'subproject56',
 'subproject61',
 'subproject6',
 'subproject140',
 'subproject233',
 'subproject123',
 'subproject50',
 'subproject118',
 'subproject168',
 'subproject228',
 'subproject117',
 'subproject90',
 'subproject189',
 'subproject203',
 'subproject29',
 'subproject96',
 'subproject164',
 'subproject17',
 'subproject182',
 'subproject164',
 'subproject92',
 'subproject74',
 'subproject5',
 'subproject110',
 'subproject47',
 'subproject187',
 'subproject248',
 'subproject138',
 'subproject157',
 'subproject159',
 'subproject158',
 'subproject72',
 'subproject173',
 'subproject163',
 '