In [163]:
#%store -r df

In [1]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np
import pandas as pd

# Plotting library
from matplotlib import pyplot

# Optimization module in scipy
from scipy import optimize

In [2]:
# tells matplotlib to embed plots within the notebook
#%matplotlib inline

In [3]:
os.chdir('C:\\Users\\belincoln\\repos\\BudgetPredict')
# Set working directory to the data folder so you can correctly read in the csv files
%cd data

C:\Users\belincoln\repos\BudgetPredict\data


In [4]:
# Read data from DHS Contracts
df = pd.read_csv('FY2019_070_Contracts_Full_20200110_1.csv', header = 0, usecols = ['contract_transaction_unique_key', 
                        'federal_action_obligation','total_dollars_obligated', 'base_and_exercised_options_value', 
                        'current_total_value_of_award', 'base_and_all_options_value','potential_total_value_of_award'],
                 dtype = {'contract_transaction_unique_key':'str','federal_action_obligation': 'float',
                        'total_dollars_obligated': 'float', 'base_and_exercised_options_value': 'float', 
                        'current_total_value_of_award': 'float', 'base_and_all_options_value': 'float',
                        'potential_total_value_of_award': 'float'})

In [5]:
# Create 3 new features for analysis
df['Percent awarded over potential total awarded'] = df['current_total_value_of_award'] / df['potential_total_value_of_award']
df['Percent Cumulatively Obligated over potential total value of award'] = df['total_dollars_obligated'] / df['potential_total_value_of_award']
df['Percent Cumulatively Obligated over total value already awarded'] = df['total_dollars_obligated'] / df['current_total_value_of_award']

# Create Indicator Variable
df['Indicator'] = df['federal_action_obligation']<-1000

# set index to each transaction key
df.set_index('contract_transaction_unique_key', inplace = True)

# Data Dictionary for Column Headers from USASPENDING.GOV

#### federal_action_obligation: 

Amount of Federal government’s obligation, de-obligation, or liability for an award transaction.

#### total_dollars_obligated: This doesn't make sense to me

This is a system generated element providing the sum of all the amounts entered in the "Action Obligation" field for a particular PIID and Agency. Example: Contract has 9 Modifications under "Transaction Number" as '1' and 9 modifications with the same PIID under "Transaction Number" as '2'. The base contracts and all the modifications have "Action Obligation" as $10 each. The value for the field "Total Obligated Amount" when the either of the bases or the modification is retrieved through atom feeds will be $200 ($100 under Transaction Number 1 + $100 under Transaction Number 2). "Total Obligated Amount" is generated irrespective of the "Transaction Number" on the Awards.

#### base_and_exercised_options_value
The change (from this transaction only) to the current contract value (i.e., the base contract and any options that have been exercised).

#### current_total_value_of_award

Total amount obligated to date on an award. For a contract, this amount includes the base and exercised options. For a non-loan financial assistance award (AssistanceType ≠ 07 or 08), this is the sum of all the FederalActionObligation values in transactions with the same AwardingSubTierAgencyCode and FAIN (for RecordType = 2 or 3) or AwardingSubTierAgencyCode and URI (for RecordType = 1). For a loan award (AssistanceType = 07 or 08), this is the sum of all OriginalLoanSubsidyCost values in transactions with the same AwardingSubTierAgencyCode and FAIN (for RecordType = 2 or 3) or AwardingSubTierAgencyCode and URI (for RecordType = 1). In the subaward data context, this element refers to the total amount obligated to date on the prime award.

#### base_and_all_options_value

The change (from this transaction only) to the potential contract value (i.e., the base contract and any exercised or unexercised options).

#### potential_total_value_of_award

Total amount that could be obligated on a contract, if the base and all options are exercised.


In [169]:
print(df.min())

# Why do these two have negative values you think?
    # Because it was a deobligation!

# df['base_and_all_options_value'][df['base_and_all_options_value'] < 0]


federal_action_obligation                                             -75532042.2
total_dollars_obligated                                                       0.0
base_and_exercised_options_value                                      -75532042.2
current_total_value_of_award                                                  0.0
base_and_all_options_value                                           -108425811.2
potential_total_value_of_award                                                0.0
Percent awarded over potential total awarded                                  0.0
Percent Cumulatively Obligated over potential total value of award            0.0
Percent Cumulatively Obligated over total value already awarded               0.0
Indicator                                                                     0.0
dtype: float64


### Note
Possibly: take out transactions that don't meet inequality

In [7]:
# Is total_dollars_obligated < current_total_value_of_award < potential_value_of_award?
test = df['total_dollars_obligated'] <= df['current_total_value_of_award'] 

# test if this is true for all transactions
print(not any(test))

# shows the percentage of transactions where this inequality does nto hold
print((len(df) - test.sum()) / len(df))

# Show indexes where this inequality does not hold
df[test == False].index

False
0.04829182510934424


Index(['7008_-NONE-_HSCG3816DL00004_P00009_-NONE-_-NONE-',
       '7013_-NONE-_HSTS0514AMED077_P00001_-NONE-_-NONE-',
       '7008_-NONE-_70Z03818DB2000003_P00002_-NONE-_-NONE-',
       '7008_-NONE-_70Z03819DB2000001_0_-NONE-_-NONE-',
       '7008_-NONE-_70Z03819DM0000001_0_-NONE-_-NONE-',
       '7012_-NONE-_HSCEDM17D00009_P00008_-NONE-_-NONE-',
       '7008_-NONE-_HSCG2317DPXC002_P00003_-NONE-_-NONE-',
       '7008_-NONE-_HSCG3815D202051_P00005_-NONE-_-NONE-',
       '7008_-NONE-_HSCG4016D60308_P00007_-NONE-_-NONE-',
       '7015_-NONE-_HSFLAR16D00001_P00006_-NONE-_-NONE-',
       ...
       '7012_-NONE-_70CDCR18D00000004_P00002_-NONE-_-NONE-',
       '7008_-NONE-_HSCG8817DPMV093_P00004_-NONE-_-NONE-',
       '7008_-NONE-_HSCG8817DPMV093_P00005_-NONE-_-NONE-',
       '7008_-NONE-_70Z08419DBHQ00400_P00001_-NONE-_-NONE-',
       '7022_-NONE-_HSFE4014A0167_P00005_-NONE-_-NONE-',
       '7022_-NONE-_HSFE4014A0168_P00005_-NONE-_-NONE-',
       '7022_-NONE-_HSFE4014A0184_P00005_-NONE-_-NON

In [11]:
# Is total_dollars_obligated < current_total_value_of_award < potential_value_of_award?
test2 = df['current_total_value_of_award'] <= df['potential_total_value_of_award']

# test if this is true for all transactions
print(not any(test2))

# shows the percentage of transactions where this inequality does nto hold
print((len(df) - test2.sum()) / len(df))

# Show indexes where this inequality does not hold
df[test2 == False].index

False
0.04829182510934424


Index(['7008_-NONE-_HSCG3816DL00004_P00009_-NONE-_-NONE-',
       '7013_-NONE-_HSTS0514AMED077_P00001_-NONE-_-NONE-',
       '7008_-NONE-_70Z03818DB2000003_P00002_-NONE-_-NONE-',
       '7008_-NONE-_70Z03819DB2000001_0_-NONE-_-NONE-',
       '7008_-NONE-_70Z03819DM0000001_0_-NONE-_-NONE-',
       '7012_-NONE-_HSCEDM17D00009_P00008_-NONE-_-NONE-',
       '7008_-NONE-_HSCG2317DPXC002_P00003_-NONE-_-NONE-',
       '7008_-NONE-_HSCG3815D202051_P00005_-NONE-_-NONE-',
       '7008_-NONE-_HSCG4016D60308_P00007_-NONE-_-NONE-',
       '7015_-NONE-_HSFLAR16D00001_P00006_-NONE-_-NONE-',
       ...
       '7012_-NONE-_70CDCR18D00000004_P00002_-NONE-_-NONE-',
       '7008_-NONE-_HSCG8817DPMV093_P00004_-NONE-_-NONE-',
       '7008_-NONE-_HSCG8817DPMV093_P00005_-NONE-_-NONE-',
       '7008_-NONE-_70Z08419DBHQ00400_P00001_-NONE-_-NONE-',
       '7022_-NONE-_HSFE4014A0167_P00005_-NONE-_-NONE-',
       '7022_-NONE-_HSFE4014A0168_P00005_-NONE-_-NONE-',
       '7022_-NONE-_HSFE4014A0184_P00005_-NONE-_-NON

# NOTE
Check to see if the transactions that fail the first test also fail the second test.