# LOCKED POSITION RANDOMIZED DATA

FILENAME: budget_randomized.ipynb
    
PROJECT: Randomized Data Model

DATE CREATED: 27-MAY-20

DATE UPDATED: 27-MAY-20

## PHASE 1: PROJECT SETUP

Import the necessary libraries needed for ETL, engineering, and export efforts

In [1]:
import pandas as pd
import csv
import random
import sqlite3
import itertools
import numpy as np
import datetime
import time as t
import getpass as gp

In [2]:
def init_array(df_length):
    '''
    DESCRIPTION: A function to create and return a two_dimensional array with randomized float values
    '''
    

    num_of_subprojects = df_length / 50
    
    length = df_length
    yr3_forecast= np.random.randint(low = 100000, high = 30000000, size = df_length) 
    
    yr2_random = np.random.uniform(low=0.5, high=1.3, size=df_length)
    yr2_forecast = np.round(yr3_forecast * yr2_random,2)
    
    yr1_random = np.random.uniform(low=0.8, high=1.2, size=df_length)
    yr1_forecast = np.round(yr2_forecast * yr1_random,2)
    
    plan_random = np.random.uniform(low=0.6, high=1.3, size=df_length)
    plan_val = np.round(yr1_forecast * plan_random,2)
    
    approp_random = np.random.uniform(low=0.6, high=1.2, size=df_length)
    approp_val = np.round(plan_val * approp_random,2)
    
    oblig_random = np.random.uniform(low=0.8, high=1.0, size=df_length)
    oblig_val = np.round(approp_val * oblig_random,2)
    
    raw_df = pd.DataFrame(columns=['yr+3_forecast','yr+2_forecast','yr+1_forecast','yr0_plan','yr-1_approp','yr-2_oblig'])


    raw_df['yr+3_forecast'] = yr3_forecast
    raw_df['yr+2_forecast'] = yr2_forecast
    raw_df['yr+1_forecast'] = yr1_forecast
    raw_df['yr0_plan'] = plan_val
    raw_df['yr-1_approp'] = approp_val
    raw_df['yr-2_oblig'] = oblig_val
    
    return raw_df

# PHASE 2: Function Test

In [3]:
train_df = init_array(10000)
train_df.tail(10)

Unnamed: 0,yr+3_forecast,yr+2_forecast,yr+1_forecast,yr0_plan,yr-1_approp,yr-2_oblig
9990,19711090,20464449.14,17612465.44,21949409.2,23379762.6,21258410.93
9991,3724388,2587960.66,3042774.76,2370460.24,2026023.91,1966468.79
9992,18103875,9231943.01,9916064.31,12449215.62,11181516.05,8946238.91
9993,10846761,13784132.17,14952376.79,10272437.17,9667122.42,8084412.07
9994,835537,497227.14,439529.0,527385.07,363005.97,343002.47
9995,1777124,1660467.1,1703155.16,1401518.69,1302580.5,1177079.0
9996,18497426,15640661.76,17015836.34,10876658.76,9905386.98,8207588.51
9997,4483627,3355848.59,2710380.01,2609478.34,1996525.27,1601747.59
9998,20578065,16161554.3,16445346.95,20308664.42,14807202.67,13934505.52
9999,15277590,17243459.99,16537638.27,14910851.06,10583186.9,9958957.17


# PHASE 3: EXPORT DATA

In [4]:
train_df.to_csv(r'locked_random_data_v1', index = False)

# TEST

In [8]:
length = 10000
num_of_projects = length / 20
num_of_subprojects = length / 40


['project0',
 'project1',
 'project2',
 'project3',
 'project4',
 'project5',
 'project6',
 'project7',
 'project8',
 'project9',
 'project10',
 'project11',
 'project12',
 'project13',
 'project14',
 'project15',
 'project16',
 'project17',
 'project18',
 'project19',
 'project20',
 'project21',
 'project22',
 'project23',
 'project24',
 'project25',
 'project26',
 'project27',
 'project28',
 'project29',
 'project30',
 'project31',
 'project32',
 'project33',
 'project34',
 'project35',
 'project36',
 'project37',
 'project38',
 'project39',
 'project40',
 'project41',
 'project42',
 'project43',
 'project44',
 'project45',
 'project46',
 'project47',
 'project48',
 'project49',
 'project50',
 'project51',
 'project52',
 'project53',
 'project54',
 'project55',
 'project56',
 'project57',
 'project58',
 'project59',
 'project60',
 'project61',
 'project62',
 'project63',
 'project64',
 'project65',
 'project66',
 'project67',
 'project68',
 'project69',
 'project70',
 'project71',
 '

In [None]:
proj_list = ['project'+str(i) for i in range(num_of_projects)]
proj_list

In [None]:
subproject_list = ['subproject' + str(i) for i in range(num_of_subprojects)]
subproject_list